- rmainz@derfwpc5131:~/work/x86_rdtscp/try3_perf$ cat x86_rdtscp2_perf.c
- /*
- * x86_rdtscp2_perf.c - compare execution time x86 RDTSCP instruction
- * vs. |sched_getcpu()|
- *
- * Compile with:
- * $ gcc -std=gnu17 -m32 -Wall x86_rdtscp2_perf.c -lpthread
- *
- * Written by Roland Mainz <roland.mainz@nrubsig.org>
- *
- */
- #define _XOPEN_SOURCE 700
- #define _GNU_SOURCE 1
- #include <stdio.h>
- #include <stdlib.h>
- #include <errno.h>
- #include <pthread.h>
- #include <sched.h>
- #include <x86intrin.h>
- int main(int ac, char *av[])
- {
- (void)puts("#start.");
- unsigned int A = 0;
- #define USE_FIXED_CPU_NUMBER 1
- #if USE_FIXED_CPU_NUMBER
- int cpu;
- cpu_set_t cpuset;
- pthread_t thread;
- thread = pthread_self();
- cpu = sched_getcpu();
- CPU_ZERO(&cpuset);
- CPU_SET(cpu, &cpuset);
- if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset) == 0) {
- (void)printf("# pthread_setaffinity_np(), "
- "thread fixed to cpu %d\n",
- cpu);
- }
- else {
- /*
- * If you specify a CPU number beyond the maximum number
- * of CPUs, then |pthread_setaffinity_np()| can have a
- * |errno == 0|
- */
- perror("pthread_setaffinity_np");
- }
- #endif /* USE_FIXED_CPU_NUMBER */
- struct timespec start, stop;
- long long accum;
- long long dummy = 0LL; /* use dummy variable to defeat optimiser */
- if (clock_gettime(CLOCK_REALTIME, &start) == -1) {
- perror("clock gettime");
- exit(EXIT_FAILURE);
- }
- for (long l = 0 ; l < 100000000 ; l++) {
- dummy += _rdtscp(&A);
- dummy += A;
- }
- if (clock_gettime(CLOCK_REALTIME, &stop) == -1 ) {
- perror("clock gettime");
- exit(EXIT_FAILURE);
- }
- #define NSECPERSEC (1000000000UL)
- accum = (stop.tv_sec - start.tv_sec)*NSECPERSEC +
- (stop.tv_nsec - start.tv_nsec);
- (void)printf("# time needed for x86 rdtscp instruction:\t%20.20f\n",
- (double)((long double)accum/(long double)NSECPERSEC));
- if (clock_gettime(CLOCK_REALTIME, &start) == -1) {
- perror( "clock gettime" );
- exit(EXIT_FAILURE);
- }
- for (long l = 0 ; l < 100000000 ; l++) {
- dummy += sched_getcpu();
- }
- if (clock_gettime(CLOCK_REALTIME, &stop) == -1) {
- perror("clock gettime");
- exit(EXIT_FAILURE);
- }
- accum = (stop.tv_sec - start.tv_sec)*NSECPERSEC +
- (stop.tv_nsec - start.tv_nsec);
- (void)printf("# time needed for |sched_getcpu()|: \t%20.20f\n",
- (double)((long double)accum/(long double)NSECPERSEC));
- (void)dummy;
- (void)puts("#done.");
- return EXIT_SUCCESS;
- }
- rmainz@derfwpc5131:~/work/x86_rdtscp/try3_perf$ cat Makefile
- #
- # Makefile for x86_rdtscp2_perf.c - compare execution time x86 RDTSCP
- # instruction vs. |sched_getcpu()|
- #
- all: \
- x86_rdtscp2_perf_32 \
- x86_rdtscp2_perf_64
- # explicitly use -O0 to disable peephole optimisations
- x86_rdtscp2_perf_32: x86_rdtscp2_perf.c
- gcc -std=gnu17 -m32 -Wall -g -O0 x86_rdtscp2_perf.c -lpthread -o x86_rdtscp2_perf_32
- x86_rdtscp2_perf_64: x86_rdtscp2_perf.c
- gcc -std=gnu17 -m64 -Wall -g -O0 x86_rdtscp2_perf.c -lpthread -o x86_rdtscp2_perf_64
- tests: \
- x86_rdtscp2_perf_32 \
- x86_rdtscp2_perf_64
- ksh93 -c 'set -o xtrace ; time ./x86_rdtscp2_perf_32 ; time ./x86_rdtscp2_perf_64'
- clean:
- rm -f \
- x86_rdtscp2_perf_32 \
- x86_rdtscp2_perf_64
- # EOF.
- rmainz@derfwpc5131:~/work/x86_rdtscp/try3_perf$ make tests
- ksh93 -c 'set -o xtrace ; time ./x86_rdtscp2_perf_32 ; time ./x86_rdtscp2_perf_64'
- + ./x86_rdtscp2_perf_32
- #start.
- # pthread_setaffinity_np(), thread fixed to cpu 0
- # time needed for x86 rdtscp instruction: 1.29958247999999998434
- # time needed for |sched_getcpu()|: 3.39503941599999992107
- #done.
- real 0m34.77s
- user 0m21.45s
- sys 0m13.29s
- + ./x86_rdtscp2_perf_64
- #start.
- # pthread_setaffinity_np(), thread fixed to cpu 6
- # time needed for x86 rdtscp instruction: 1.20903293999999994490
- # time needed for |sched_getcpu()|: 1.38745017499999989674
- #done.
- real 0m2.60s
- user 0m2.60s
- sys 0m0.00s
x86_rdtscp2_perf.c - compare execution time x86 RDTSCP instruction vs. |sched_getcpu()|
Posted by Anonymous on Sat 26th Nov 2022 15:28
raw | new post
view followups (newest first): x86_rdtscp2_perf.c - compare execution time x86 RDTSCP instruction vs. |sched_getcpu()| by Anonymous
Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.