Your prime number enumeration code is flawed:
in the code initially posted, the for
loop in the benchmark
function had no side effect, so efficient compilers were able to optimise it and generate essentially no code. This explains the great disparity from one system to another.
in the last update, your algorithm does not compute the count of prime numbers, it merely performs a huge number of divisions and counts the number of times you get a zero remainder. This is much more costly than an actual prime number test which is itself much less efficient than performing a Sieve of Eratostenes.
For the purpose of measuring and comparing system performance, this method focuses exaggeratedly on the speed of the division opcode, and it shows a great variation between Linux, OS/X and Windows probably because of the size of type unsigned long
which is 64-bit on Linux and OS/X vs 32-bit on Windows, making the modulo operation faster on Windows, even for the same set of numbers. Furthermore this type of benchmark uses a single core, so it does not measure total system performance by a long shot.
Relative performance of the different systems should be measured using a more diversified set of operations, stressing the CPU, memory, storage and communications systems.
Regarding the prime number enumeration, here is a modified version with a prime test:
#include <limits.h>
#include <stdio.h>
#include <time.h>
unsigned long long bench(double x) {
if (x < 0 || x >= ULLONG_MAX) {
printf("invalid benchmark range\n");
return 0;
}
unsigned long long n = (unsigned long long)x;
unsigned long long count = 0;
if (n >= 2)
count++;
for (unsigned long long p = 3; p <= n; p += 2) {
count++;
for (unsigned long long i = 3; i * i <= p; i += 2) {
if (p % i == 0) {
count--;
break;
}
}
}
return count;
}
int main() {
double x;
int y;
clock_t total = 0;
unsigned long long count;
double time_taken;
printf("\nPCB v0.1\nOpen-source Tool for Benchmarking System Speed.\n\nRecommended Load Value 1 - 3\n");
printf("\nEnter load value: ");
if (scanf("%lf", &x) != 1)
return 1;
printf("\nEnter repeat count: ");
if (scanf("%d", &y) != 1)
return 1;
x = x * 100000;
printf("\nPress Enter to Run ");
getchar();
getchar();
printf("\n(...Running...)\n");
for (int z = 0; z < y; z++) {
clock_t t;
t = clock();
count = bench(x);
t = clock() - t;
total += t;
time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds
printf("\n%llu primes, time taken #%d = %.4f seconds\n", count, z, time_taken);
}
time_taken = ((double)total) / CLOCKS_PER_SEC; // in seconds
printf("\nAverage time taken = %.4f seconds\n", time_taken / y);
printf("\nPress Enter to Exit ");
getchar();
return 0;
}
Output:
PCB v0.1
Open-source Tool for Benchmarking System Speed.
Recommended Load Value 1 - 3
Enter load value: 1
Enter repeat count: 5
Press Enter to Run
(...Running...)
9592 primes, time taken #0 = 0.0126 seconds
9592 primes, time taken #1 = 0.0117 seconds
9592 primes, time taken #2 = 0.0133 seconds
9592 primes, time taken #3 = 0.0136 seconds
9592 primes, time taken #4 = 0.0137 seconds
Average time taken = 0.0130 seconds
Press Enter to Exit
This is almost 2000x faster than the initial code on my laptop.
Running a load of 100 gives this output:
PCB v0.1
Open-source Tool for Benchmarking System Speed.
Recommended Load Value 1 - 3
Enter load value: 100
Enter repeat count: 5
Press Enter to Run
(...Running...)
664579 primes, time taken #0 = 7.4249 seconds
664579 primes, time taken #1 = 7.3742 seconds
664579 primes, time taken #2 = 7.4119 seconds
664579 primes, time taken #3 = 7.3887 seconds
664579 primes, time taken #4 = 7.6725 seconds
Average time taken = 7.4544 seconds
Press Enter to Exit
Which is still much slower than a sieve:
$ chqrlie > time prime -c 1..10000000
664579
real 0m0.009s
user 0m0.006s
sys 0m0.001s
Here is a simplistic implementation using the Sieve approach that is not quite as fast as the optimised one used in my primes
utility, but still achieves an average time of 0,0773 seconds
for a load of 100, a 100x improvement over the prime test loop:
unsigned long long bench(double x) {
/* simplistic Sieve of Eratostenes version */
if (x < 0 || x >= SIZE_MAX) {
printf("invalid benchmark range\n");
return 0;
}
size_t count = 0;
size_t n = (size_t)x + 1; // array size
if (n > 1) {
unsigned char *a = calloc(n, 1);
if (a == NULL) {
printf("cannot allocate memory\n");
return 0;
}
// 0 and 1 are considered composite
a[0] = a[1] = 1;
// flag all multiples of 2 as composite
for (size_t i = 4; i < n; i += 2) {
a[i] = 1;
}
for (size_t p = 3; p * p < n; p += 2) {
// for all potential prime numbers
if (a[p] == 0) {
// if p is prime, flag all odd multiples of p as composite
for (size_t i = p * p; i < n; i += 2 * p) {
a[i] = 1;
}
}
}
count = n;
// count the number of composite numbers
for (size_t i = 0; i < n; i++) {
count -= a[i];
}
free(a);
}
return count;
}