pastebin - collaborative debugging tool
rovema.kpaste.net RSS


x86_rdtscp2_perf.c - compare execution time x86 RDTSCP instruction vs. |sched_getcpu()|
Posted by Anonymous on Sat 26th Nov 2022 18:53
raw | new post
modification of post by Anonymous (view diff)

  1. rmainz@derfwpc5131:~/work/x86_rdtscp/try3_perf$ banner "test 104"
  2.                                            #      ###   #
  3.   #####  ######   ####    #####           ##     #   #  #    #
  4.     #    #       #          #            # #    # #   # #    #
  5.     #    #####    ####      #              #    #  #  # #######
  6.     #    #            #     #              #    #   # #      #
  7.     #    #       #    #     #              #     #   #       #
  8.     #    ######   ####      #            #####    ###        #
  9.  
  10. rmainz@derfwpc5131:~/work/x86_rdtscp/try3_perf$ make clean
  11. rm -f \
  12.         x86_rdtscp2_perf_32 \
  13.         x86_rdtscp2_perf_64
  14. rmainz@derfwpc5131:~/work/x86_rdtscp/try3_perf$ make tests
  15. gcc -std=gnu17 -m32 -Wall -g -O0 x86_rdtscp2_perf.c -lpthread -o x86_rdtscp2_perf_32
  16. gcc -std=gnu17 -m64 -Wall -g -O0 x86_rdtscp2_perf.c -lpthread -o x86_rdtscp2_perf_64
  17. ksh93 -c 'set -o xtrace ; \
  18.         cat x86_rdtscp2_perf.c ; \
  19.         cat Makefile ; \
  20.         time ./x86_rdtscp2_perf_32 ; \
  21.         time ./x86_rdtscp2_perf_64'
  22. + cat x86_rdtscp2_perf.c
  23. /*
  24.  * x86_rdtscp2_perf.c - compare execution time x86 RDTSCP instruction
  25.  * vs. |sched_getcpu()|
  26.  *
  27.  * Compile with:
  28.  * $ gcc -std=gnu17 -m32 -Wall x86_rdtscp2_perf.c -lpthread
  29.  *
  30.  * Written by Roland Mainz <roland.mainz@nrubsig.org>
  31.  *
  32.  */
  33.  
  34. #define _XOPEN_SOURCE 700
  35. #define _GNU_SOURCE 1
  36.  
  37. #include <stdio.h>
  38. #include <stdlib.h>
  39. #include <errno.h>
  40.  
  41. #include <pthread.h>
  42. #include <sched.h>
  43.  
  44. #include <x86intrin.h>
  45.  
  46.  
  47. int main(int ac, char *av[])
  48. {
  49.         (void)puts("#start.");
  50.  
  51.         unsigned int    A = 0;
  52.         long long       li; /* loop iterator */
  53.  
  54. #define USE_FIXED_CPU_NUMBER 1
  55. #if USE_FIXED_CPU_NUMBER
  56.         int             cpu;
  57.         cpu_set_t       cpuset;
  58.         pthread_t       thread;
  59.  
  60.         thread = pthread_self();
  61.         cpu = sched_getcpu();
  62.  
  63.         CPU_ZERO(&cpuset);
  64.         CPU_SET(cpu, &cpuset);
  65.  
  66.         if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset) == 0) {
  67.                 (void)printf("# pthread_setaffinity_np(), "
  68.                         "thread fixed to cpu %d\n",
  69.                         cpu);
  70.         }
  71.         else {
  72.                 /*
  73.                  * If you specify a CPU number beyond the maximum number
  74.                  * of CPUs, then |pthread_setaffinity_np()| can have a
  75.                  * |errno == 0|
  76.                  */
  77.                 perror("pthread_setaffinity_np");
  78.         }
  79. #endif /* USE_FIXED_CPU_NUMBER */
  80.  
  81.         struct timespec start, stop;
  82.         long long accum;
  83.         long long dummy = 0LL; /* use dummy variable to defeat optimiser */
  84.  
  85.         if (clock_gettime(CLOCK_REALTIME, &start) == -1) {
  86.                 perror("clock gettime");
  87.                 exit(EXIT_FAILURE);
  88.         }
  89. #define NUMTESTITERATIONS (1000000000LL)
  90.         for (li = 0 ; li < NUMTESTITERATIONS ; li++) {
  91.                 dummy += _rdtscp(&A);
  92.                 dummy += A;
  93.         }
  94.  
  95.         if (clock_gettime(CLOCK_REALTIME, &stop) == -1 ) {
  96.                 perror("clock gettime");
  97.                 exit(EXIT_FAILURE);
  98.         }
  99.  
  100. #define NSECPERSEC (1000000000LL)
  101.         accum = (stop.tv_sec - start.tv_sec)*NSECPERSEC +
  102.                 (stop.tv_nsec - start.tv_nsec);
  103.         (void)printf("# x86 rdtscp instruction:\t"
  104.                 "iterations=%lld\t"
  105.                 "time=%20.20f secs\n",
  106.                 li,
  107.                 (double)(((long double)accum)/((long double)NSECPERSEC)));
  108.  
  109.  
  110.  
  111.         if (clock_gettime(CLOCK_REALTIME, &start) == -1) {
  112.                 perror( "clock gettime" );
  113.                 exit(EXIT_FAILURE);
  114.         }
  115.  
  116.         for (li = 0 ; li < NUMTESTITERATIONS ; li++) {
  117.                 dummy += sched_getcpu();
  118.         }
  119.  
  120.  
  121.         if (clock_gettime(CLOCK_REALTIME, &stop) == -1) {
  122.                 perror("clock gettime");
  123.                 exit(EXIT_FAILURE);
  124.         }
  125.  
  126.         accum = (stop.tv_sec - start.tv_sec)*NSECPERSEC +
  127.                 (stop.tv_nsec - start.tv_nsec);
  128.         (void)printf("# |sched_getcpu()|:      \t"
  129.                 "iterations=%lld\t"
  130.                 "time=%20.20f secs\n",
  131.                 li,
  132.                 (double)(((long double)accum)/((long double)NSECPERSEC)));
  133.  
  134.         (void)dummy;
  135.  
  136.         (void)puts("#done.");
  137.  
  138.         return EXIT_SUCCESS;
  139. }
  140. + cat Makefile
  141. #
  142. # Makefile for x86_rdtscp2_perf.c - compare execution time x86 RDTSCP
  143. # instruction vs. |sched_getcpu()|
  144. #
  145.  
  146. all: \
  147.         x86_rdtscp2_perf_32 \
  148.         x86_rdtscp2_perf_64
  149.  
  150. # explicitly use -O0 to disable peephole optimisations
  151. x86_rdtscp2_perf_32: x86_rdtscp2_perf.c
  152.         gcc -std=gnu17 -m32 -Wall -g -O0 x86_rdtscp2_perf.c -lpthread -o x86_rdtscp2_perf_32
  153.  
  154. x86_rdtscp2_perf_64: x86_rdtscp2_perf.c
  155.         gcc -std=gnu17 -m64 -Wall -g -O0 x86_rdtscp2_perf.c -lpthread -o x86_rdtscp2_perf_64
  156.  
  157.  
  158. tests: \
  159.         x86_rdtscp2_perf_32 \
  160.         x86_rdtscp2_perf_64
  161.         ksh93 -c 'set -o xtrace ; \
  162.                 cat x86_rdtscp2_perf.c ; \
  163.                 cat Makefile ; \
  164.                 time ./x86_rdtscp2_perf_32 ; \
  165.                 time ./x86_rdtscp2_perf_64'
  166.  
  167. clean:
  168.         rm -f \
  169.                 x86_rdtscp2_perf_32 \
  170.                 x86_rdtscp2_perf_64
  171. # EOF.
  172. + ./x86_rdtscp2_perf_32
  173. #start.
  174. # pthread_setaffinity_np(), thread fixed to cpu 5
  175. # x86 rdtscp instruction:       iterations=1000000000   time=14.56760232699999946249 secs
  176. # |sched_getcpu()|:             iterations=1000000000   time=333.57820389099998692473 secs
  177. #done.
  178.  
  179. real    5m48.15s
  180. user    3m42.13s
  181. sys     2m5.94s
  182. + ./x86_rdtscp2_perf_64
  183. #start.
  184. # pthread_setaffinity_np(), thread fixed to cpu 3
  185. # x86 rdtscp instruction:       iterations=1000000000   time=12.15419475899999923740 secs
  186. # |sched_getcpu()|:             iterations=1000000000   time=13.76809202599999970573 secs
  187. #done.
  188.  
  189. real    0m25.94s
  190. user    0m25.92s
  191. sys     0m0.00s

Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:

To highlight particular lines, prefix each line with {%HIGHLIGHT}




All content is user-submitted.
The administrators of this site (kpaste.net) are not responsible for their content.
Abuse reports should be emailed to us at