How can I benchmark C code easily?
Use the function clock()
defined in time.h
:
startTime = (float)clock()/CLOCKS_PER_SEC;/* Do work */endTime = (float)clock()/CLOCKS_PER_SEC;timeElapsed = endTime - startTime;
Basically, all you want is a high resolution timer. The elapsed time is of course just a difference in times and the speedup is calculated by dividing the times for each task. I have included the code for a high resolution timer that should work on at least windows and unix.
#ifdef WIN32#include <windows.h>double get_time(){ LARGE_INTEGER t, f; QueryPerformanceCounter(&t); QueryPerformanceFrequency(&f); return (double)t.QuadPart/(double)f.QuadPart;}#else#include <sys/time.h>#include <sys/resource.h>double get_time(){ struct timeval t; struct timezone tzp; gettimeofday(&t, &tzp); return t.tv_sec + t.tv_usec*1e-6;}#endif
Benchmark C code easily
#include <time.h>int main(void) { clock_t start_time = clock(); // code or function to benchmark double elapsed_time = (double)(clock() - start_time) / CLOCKS_PER_SEC; printf("Done in %f seconds\n", elapsed_time);}
Easy benchmark of multi-threaded C code
If you want to benchmark multithreaded program you first need to take a closer look at clock:
Description
The clock() function returns an approximation of processor timeused by the program.
Return value
The value returned is the CPU time used so far as a clock_t; toget the number of seconds used, divide by CLOCKS_PER_SEC. If theprocessor time used is not available or its value cannot berepresented, the function returns the value (clock_t)(-1)
Hence it is very important to divide your elapsed_time by the number of threads in order to get the execution time of your function:
#include <time.h>#include <omp.h>#define THREADS_NB omp_get_max_threads()#pragma omp parallel for private(i) num_threads(THREADS_NB)clock_t start_time = clock();// code or function to benchmarkdouble elapsed_time = (double)(clock() - start_time) / CLOCKS_PER_SEC;printf("Done in %f seconds\n", elapsed_time / THREADS_NB); // divide by THREADS_NB!
Example
#include <stdlib.h>#include <string.h>#include <stdio.h>#include <time.h>#include <omp.h>#define N 20000#define THREADS_NB omp_get_max_threads()void init_arrays(double *a, double *b) { memset(a, 0, sizeof(a)); memset(b, 0, sizeof(b)); for (int i = 0; i < N; i++) { a[i] += 1.0; b[i] += 1.0; }}double func2(double i, double j) { double res = 0.0; while (i / j > 0.0) { res += i / j; i -= 0.1; j -= 0.000003; } return res;}double single_thread(double *a, double *b) { double res = 0; int i, j; for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { if (i == j) continue; res += func2(a[i], b[j]); } } return res;}double multi_threads(double *a, double *b) { double res = 0; int i, j; #pragma omp parallel for private(j) num_threads(THREADS_NB) reduction(+:res) for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { if (i == j) continue; res += func2(a[i], b[j]); } } return res;}int main(void) { double *a, *b; a = (double *)calloc(N, sizeof(double)); b = (double *)calloc(N, sizeof(double)); init_arrays(a, b); clock_t start_time = clock(); double res = single_thread(a, b); double elapsed_time = (double)(clock() - start_time) / CLOCKS_PER_SEC; printf("Default: Done with %f in %f sd\n", res, elapsed_time); start_time = clock(); res = multi_threads(a, b); elapsed_time = (double)(clock() - start_time) / CLOCKS_PER_SEC; printf("With OMP: Done with %f in %f sd\n", res, elapsed_time / THREADS_NB);}
Compile with:
gcc -O3 multithread_benchmark.c -fopenmp && time ./a.out
Output:
Default: Done with 2199909813.614555 in 4.909633 sdWith OMP: Done with 2199909799.377532 in 1.708831 sdreal 0m6.703s (from time function)