K
Kenneth P. Turvey
I saw the the monster thread in this group comparing the performance of
a single library function between C++ and Java and it gave me an idea.
In an earlier discussion in this newsgroup we discussed the fact that
Intel hardware might cause Java to perform poorly on transcendental
functions do to the poor accuracy of its implementation of the functions
in it's processor.
I had read some time ago (when I was pulling my hair out working on
Fourier transforms of images) that this was the reason I was getting
such poor performance out of Java on these problems.
I had never tested it. So today I wrote two similar programs, one in C
and one in Java, that just produced random numbers and then calculated
the sine and cosine of them. (Please don't tell anyone in comp.lang.c,
one pointless flame war is enough).
These numbers showed Java to be much less efficient in doing sines
and consines, but it still left the question open as to whether this
performance lag was due to the math involved itself or just some
inefficiency in the way Java was processing the loop or the array
lookups. So I ran another test to see how the performance compared on
the same program when the transcendental functions were replaced with
simple floating point arithmetic. In this case Java out performed C. So
it looks as if the Java compilers are, in fact, having to work around an
Intel bug on this platform. This work around is quite costly as can be
seen in the data below. I had hoped that Intel would have fixed this
problem by now, but I guess not.
For the purposes of comparison would someone please run the test
programs on a platform that does not use an Intel processor? I would
like to know if Java performs better on these platforms when compared to
the C implementation.
Thanks!
The results:
Sine/Cosine
Environment Calculations* Seconds
--------------------------------------------
C 40,000,000 3.118970
sun java 6.0 40,000,000 6.543
ibm java 6.0 40,000,000 12.669
ibm java 5.0 40,000,000 9.82
C 80,000,000 6.155521
sun java 6.0 80,000,000 13.038
ibm java 6.0 80,000,000 25.404
ibm java 5.0 80,000,000 12.351
Multiplication/Division
Environment Calculations* Seconds
--------------------------------------------
C 40,000,000 0.502109
sun java 6.0 40,000,000 0.432
ibm java 6.0 40,000,000 0.424
ibm java 5.0 40,000,000 0.427
C 80,000,000 1.005564
sun java 6.0 80,000,000 0.86
ibm java 6.0 80,000,000 0.855
ibm java 5.0 80,000,000 0.855
* Note that this is double the argument to the program
** These results are all based on the best of 10 runs.
--Java Program---------------------------------------
public class Main {
public static void main(String[] args) {
int nums = 10;
int runs = 1;
Random random = new Random();
try {
if (args.length < 1 && args.length > 3) {
throw new IllegalArgumentException();
}
nums = Integer.parseInt(args[0]);
if (args.length >= 2) {
runs = Integer.parseInt(args[1]);
}
if (args.length == 3) {
random = new Random(Integer.parseInt(args[2]));
}
}
catch(Exception e) {
System.err.println("Usage: Transcendental.jar <nums> [runs] [seed]");
System.exit(-1);
}
double[] values = new double[nums];
double[] sins = new double[nums];
double[] coss = new double[nums];
for (int run = 0; run < runs; run++) {
for (int index = 0; index < nums; index++) {
values[index] = random.nextDouble() * 2.0 * Math.PI;
}
long startMillis = System.currentTimeMillis();
for (int index = 0; index < nums; index++) {
//sins[index] = Math.sin(values[index]);
//coss[index] = Math.cos(values[index]);
sins[index] = values[index] * 17.0;
coss[index] = values[index] / 23.0;
}
long endMillis = System.currentTimeMillis();
System.out.println("Runtime: " + (endMillis - startMillis) / 1000.0
+ " seconds");
int index = random.nextInt(nums);
System.out.println("Example: value: " + values[index] + " Sine: "
+ sins[index] + " Cosine: " + coss[index]);
}
}
}
------------------------------
---C Program------------------
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
#define PI 3.14159265358979323846
double get_seconds(struct timeval end_time, struct timeval start_time);
int main(int argc, char* argv[]) {
int nums = 10;
int runs = 10;
srand(times(NULL));
if (argc < 2 || argc > 4) {
fprintf(stderr, "Usage: transcendental <num> [runs] [seed]\n");
exit(-1);
}
if (argc >= 2) {
sscanf(argv[1], "%d", &nums);
}
if (argc >= 3) {
sscanf(argv[2], "%d", &runs);
}
if (argc == 4) {
int seed;
sscanf(argv[3], "%d", &seed);
srand(seed);
}
double *values = (double*) malloc(nums * sizeof(double));
double *sins = (double*) malloc(nums * sizeof(double));
double *coss = (double*) malloc(nums * sizeof(double));
int run;
for (run = 0; run < runs; run++) {
int index;
for (index = 0; index < nums; index++) {
values[index] = rand() * 2.0 * PI / RAND_MAX;
}
struct timeval start;
struct timeval end;
gettimeofday(&start, NULL);
for (index = 0; index < nums; index++) {
//sins[index] = sin(values[index]);
//coss[index] = cos(values[index]);
sins[index] = values[index] * 17.0;
coss[index] = values[index] / 23.0;
}
gettimeofday(&end, NULL);
printf("Runtime: %lf seconds\n", get_seconds(end, start));
// Print one out at random so the compiler can't optimize it away.
index = (int) rand() * (double) nums / RAND_MAX;
printf("Example: value: %lf sine: %lf cos: %lf\n",
values[index], sins[index], coss[index]);
}
return 0;
}
double get_seconds(struct timeval end_time, struct timeval start_time) {
double end = end_time.tv_sec + end_time.tv_usec / 1000000.0;
double start = start_time.tv_sec + start_time.tv_usec / 1000000.0;
return end - start;
}
a single library function between C++ and Java and it gave me an idea.
In an earlier discussion in this newsgroup we discussed the fact that
Intel hardware might cause Java to perform poorly on transcendental
functions do to the poor accuracy of its implementation of the functions
in it's processor.
I had read some time ago (when I was pulling my hair out working on
Fourier transforms of images) that this was the reason I was getting
such poor performance out of Java on these problems.
I had never tested it. So today I wrote two similar programs, one in C
and one in Java, that just produced random numbers and then calculated
the sine and cosine of them. (Please don't tell anyone in comp.lang.c,
one pointless flame war is enough).
These numbers showed Java to be much less efficient in doing sines
and consines, but it still left the question open as to whether this
performance lag was due to the math involved itself or just some
inefficiency in the way Java was processing the loop or the array
lookups. So I ran another test to see how the performance compared on
the same program when the transcendental functions were replaced with
simple floating point arithmetic. In this case Java out performed C. So
it looks as if the Java compilers are, in fact, having to work around an
Intel bug on this platform. This work around is quite costly as can be
seen in the data below. I had hoped that Intel would have fixed this
problem by now, but I guess not.
For the purposes of comparison would someone please run the test
programs on a platform that does not use an Intel processor? I would
like to know if Java performs better on these platforms when compared to
the C implementation.
Thanks!
The results:
Sine/Cosine
Environment Calculations* Seconds
--------------------------------------------
C 40,000,000 3.118970
sun java 6.0 40,000,000 6.543
ibm java 6.0 40,000,000 12.669
ibm java 5.0 40,000,000 9.82
C 80,000,000 6.155521
sun java 6.0 80,000,000 13.038
ibm java 6.0 80,000,000 25.404
ibm java 5.0 80,000,000 12.351
Multiplication/Division
Environment Calculations* Seconds
--------------------------------------------
C 40,000,000 0.502109
sun java 6.0 40,000,000 0.432
ibm java 6.0 40,000,000 0.424
ibm java 5.0 40,000,000 0.427
C 80,000,000 1.005564
sun java 6.0 80,000,000 0.86
ibm java 6.0 80,000,000 0.855
ibm java 5.0 80,000,000 0.855
* Note that this is double the argument to the program
** These results are all based on the best of 10 runs.
--Java Program---------------------------------------
public class Main {
public static void main(String[] args) {
int nums = 10;
int runs = 1;
Random random = new Random();
try {
if (args.length < 1 && args.length > 3) {
throw new IllegalArgumentException();
}
nums = Integer.parseInt(args[0]);
if (args.length >= 2) {
runs = Integer.parseInt(args[1]);
}
if (args.length == 3) {
random = new Random(Integer.parseInt(args[2]));
}
}
catch(Exception e) {
System.err.println("Usage: Transcendental.jar <nums> [runs] [seed]");
System.exit(-1);
}
double[] values = new double[nums];
double[] sins = new double[nums];
double[] coss = new double[nums];
for (int run = 0; run < runs; run++) {
for (int index = 0; index < nums; index++) {
values[index] = random.nextDouble() * 2.0 * Math.PI;
}
long startMillis = System.currentTimeMillis();
for (int index = 0; index < nums; index++) {
//sins[index] = Math.sin(values[index]);
//coss[index] = Math.cos(values[index]);
sins[index] = values[index] * 17.0;
coss[index] = values[index] / 23.0;
}
long endMillis = System.currentTimeMillis();
System.out.println("Runtime: " + (endMillis - startMillis) / 1000.0
+ " seconds");
int index = random.nextInt(nums);
System.out.println("Example: value: " + values[index] + " Sine: "
+ sins[index] + " Cosine: " + coss[index]);
}
}
}
------------------------------
---C Program------------------
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
#define PI 3.14159265358979323846
double get_seconds(struct timeval end_time, struct timeval start_time);
int main(int argc, char* argv[]) {
int nums = 10;
int runs = 10;
srand(times(NULL));
if (argc < 2 || argc > 4) {
fprintf(stderr, "Usage: transcendental <num> [runs] [seed]\n");
exit(-1);
}
if (argc >= 2) {
sscanf(argv[1], "%d", &nums);
}
if (argc >= 3) {
sscanf(argv[2], "%d", &runs);
}
if (argc == 4) {
int seed;
sscanf(argv[3], "%d", &seed);
srand(seed);
}
double *values = (double*) malloc(nums * sizeof(double));
double *sins = (double*) malloc(nums * sizeof(double));
double *coss = (double*) malloc(nums * sizeof(double));
int run;
for (run = 0; run < runs; run++) {
int index;
for (index = 0; index < nums; index++) {
values[index] = rand() * 2.0 * PI / RAND_MAX;
}
struct timeval start;
struct timeval end;
gettimeofday(&start, NULL);
for (index = 0; index < nums; index++) {
//sins[index] = sin(values[index]);
//coss[index] = cos(values[index]);
sins[index] = values[index] * 17.0;
coss[index] = values[index] / 23.0;
}
gettimeofday(&end, NULL);
printf("Runtime: %lf seconds\n", get_seconds(end, start));
// Print one out at random so the compiler can't optimize it away.
index = (int) rand() * (double) nums / RAND_MAX;
printf("Example: value: %lf sine: %lf cos: %lf\n",
values[index], sins[index], coss[index]);
}
return 0;
}
double get_seconds(struct timeval end_time, struct timeval start_time) {
double end = end_time.tv_sec + end_time.tv_usec / 1000000.0;
double start = start_time.tv_sec + start_time.tv_usec / 1000000.0;
return end - start;
}