- rmainz@DERFWNB4966:~/tmp/sse_vs_387_perf1$ cat sse_vs_387_perf1.c
- #include <stdlib.h>
- #include <stdio.h>
- #include <math.h>
- #define AR_SIZE (2048UL*65536UL)
- typedef double myartype;
- myartype ar[AR_SIZE];
- void ar_mul(myartype *restrict myar, size_t size, myartype value)
- {
- size_t i;
- for(i=0 ; i < size ; i++) {
- myar[i] *= value;
- }
- }
- void ar_div(myartype *restrict myar, size_t size, myartype value)
- {
- size_t i;
- for(i=0 ; i < size ; i++) {
- myar[i] /= value;
- }
- }
- void ar_add(myartype *restrict myar, size_t size, myartype value)
- {
- size_t i;
- for(i=0 ; i < size ; i++) {
- myar[i] += value;
- }
- }
- void ar_nextafter(myartype *restrict myar, size_t size, myartype value)
- {
- size_t i;
- for(i=0 ; i < size ; i++) {
- myar[i] = nextafter(myar[i], value);
- }
- }
- void calculate(size_t ar_size, int j_max)
- {
- size_t i;
- int j;
- for(i=0 ; i < ar_size ; i++) {
- ar[i] = 0.0;
- }
- for(j=0 ; j < j_max ; j++) {
- for(i=0 ; i < ar_size ; i++) {
- ar[i] += ((myartype)i) / ((myartype)j_max);
- }
- }
- ar_add(ar, ar_size, +39.3);
- ar_add(ar, ar_size, -13.1);
- ar_add(ar, ar_size, -13.1);
- ar_add(ar, ar_size, -13.1);
- ar_nextafter(ar, ar_size, INFINITY);
- ar_nextafter(ar, ar_size, INFINITY);
- ar_nextafter(ar, ar_size, -INFINITY);
- ar_nextafter(ar, ar_size, -INFINITY);
- ar_mul(ar, ar_size, 7.1);
- ar_mul(ar, ar_size, 7.1);
- ar_mul(ar, ar_size, 7.1);
- ar_mul(ar, ar_size, 7.1);
- ar_mul(ar, ar_size, 7.1);
- ar_mul(ar, ar_size, 7.1);
- ar_div(ar, ar_size, 7.1);
- ar_div(ar, ar_size, 7.1);
- ar_div(ar, ar_size, 7.1);
- ar_div(ar, ar_size, 7.1);
- ar_div(ar, ar_size, 7.1);
- ar_div(ar, ar_size, 7.1);
- }
- int main(int ac, char *av[])
- {
- calculate(AR_SIZE, 7);
- return EXIT_SUCCESS;
- }
- rmainz@DERFWNB4966:~/tmp/sse_vs_387_perf1$ cat Makefile
- #
- # sse_vs_387_perf1/Makefile
- #
- CSTD=-std=c17
- ARCHFLAGS=-m32 -mtune=generic
- MATHSSEFLAGS=-mfpmath=sse -msse2
- #MATH387FLAGS=-mfpmath=387
- OPTFLAGS=-O2 -fstrict-aliasing
- CC=gcc
- all: tests
- sse_vs_387_perf1.sse_$(CC): sse_vs_387_perf1.c
- $(CC) $(CSTD) $(ARCHFLAGS) $(MATHSSEFLAGS) -g -Wall $(OPTFLAGS) $< -lm -o $@
- sse_vs_387_perf1.nosse_$(CC): sse_vs_387_perf1.c
- $(CC) $(CSTD) $(ARCHFLAGS) $(MATH387FLAGS) -g -Wall $(OPTFLAGS) $< -lm -o $@
- tests: \
- sse_vs_387_perf1.sse_$(CC) \
- sse_vs_387_perf1.nosse_$(CC)
- ksh93 -c 'set -o xtrace ; for i in $^ ; do time "./$$i" ; done'
- clean:
- rm -f \
- sse_vs_387_perf1.sse_$(CC) \
- sse_vs_387_perf1.nosse_$(CC)
- # EOF.
sse_vs_387_perf1
Posted by Anonymous on Mon 19th Sep 2022 16:52
raw | new post
Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.