A
anhadikal
hello,
i have strange results doing some simple "benchmark" ...
on my amd athlon 4800+ x2:
on linux:
-----------------------------------------------------------------------------------------------------------------
cmp_i686
--------------
C strcpy: 3764.7 MB/second (2560.0 MB in
680000 clocks)
our strcpy1: 2976.7 MB/second (2560.0 MB in
860000 clocks)
our strcpy2: 3084.3 MB/second (2560.0 MB in
830000 clocks)
C memcpy: 3084.3 MB/second (2560.0 MB in
830000 clocks)
our memcpy: 1969.2 MB/second (2560.0 MB in
1300000 clocks)
cmp_athlon
-----------------
C strcpy: 3764.7 MB/second (2560.0 MB in
680000 clocks)
our strcpy1: 4000.0 MB/second (2560.0 MB in
640000 clocks)
our strcpy2: 3122.0 MB/second (2560.0 MB in
820000 clocks)
C memcpy: 3122.0 MB/second (2560.0 MB in
820000 clocks)
our memcpy: 1984.5 MB/second (2560.0 MB in
1290000 clocks)
-----------------------------------------------------------------------------------------------------------------
on windows:
-----------------------------------------------------------------------------------------------------------------
C:\DundE\anh\Eigene Dateien\downloads>cmp_athlon.exe
C strcpy: 4821.1 MB/second (2560.0 MB in 531 clocks)
our strcpy1: 4196.7 MB/second (2560.0 MB in 610 clocks)
our strcpy2: 3148.8 MB/second (2560.0 MB in 813 clocks)
C memcpy: 2873.2 MB/second (2560.0 MB in 891 clocks)
our memcpy: 1973.8 MB/second (2560.0 MB in 1297 clocks)
C:\DundE\anh\Eigene Dateien\downloads>cmp_i686.exe
C strcpy: 4812.0 MB/second (2560.0 MB in 532 clocks)
our strcpy1: 2976.7 MB/second (2560.0 MB in 860 clocks)
our strcpy2: 3091.8 MB/second (2560.0 MB in 828 clocks)
C memcpy: 2825.6 MB/second (2560.0 MB in 906 clocks)
our memcpy: 1949.7 MB/second (2560.0 MB in 1313 clocks)
our memcpy: 1949.7 MB/second (2560.0 MB in 1313 clocks)
-----------------------------------------------------------------------------------------------------------------
and now on i5 430m ...
linux:
-----------------------------------------------------------------------------------------------------------------
[dd@lappy Downloads]$ ./cmp_686
C strcpy: 1113.0 MB/second (2560.0 MB in 2300000
clocks)
our strcpy1: 1497.1 MB/second (2560.0 MB in 1710000
clocks)
our strcpy2: 1523.8 MB/second (2560.0 MB in 1680000
clocks)
C memcpy: 1630.6 MB/second (2560.0 MB in 1570000
clocks)
our memcpy: 1207.5 MB/second (2560.0 MB in 2120000
clocks)
[dd@lappy Downloads]$ ./cmp_core2
C strcpy: 1075.6 MB/second (2560.0 MB in 2380000
clocks)
our strcpy1: 1741.5 MB/second (2560.0 MB in 1470000
clocks)
our strcpy2: 1706.7 MB/second (2560.0 MB in 1500000
clocks)
C memcpy: 1600.0 MB/second (2560.0 MB in 1600000
clocks)
our memcpy: 1213.3 MB/second (2560.0 MB in 2110000
clocks)
-----------------------------------------------------------------------------------------------------------------
windows:
-----------------------------------------------------------------------------------------------------------------
C:\Users\Lappi\Desktop\xcp>cmp_686.exe
C strcpy: 3731.8 MB/second (2560.0 MB in 686 clocks)
our strcpy1: 3417.9 MB/second (2560.0 MB in 749 clocks)
our strcpy2: 3417.9 MB/second (2560.0 MB in 749 clocks)
C memcpy: 2562.6 MB/second (2560.0 MB in 999 clocks)
our memcpy: 1823.4 MB/second (2560.0 MB in 1404 clocks)
C:\Users\Lappi\Desktop\xcp>cmp_core2.exe
C strcpy: 4238.4 MB/second (2560.0 MB in 604 clocks)
our strcpy1: 3699.4 MB/second (2560.0 MB in 692 clocks)
our strcpy2: 3459.5 MB/second (2560.0 MB in 740 clocks)
C memcpy: 2552.3 MB/second (2560.0 MB in 1003 clocks)
our memcpy: 2051.3 MB/second (2560.0 MB in 1248 clocks)
-----------------------------------------------------------------------------------------------------------------
I always compiled with gcc 4.5.1 -O2 -s ... Why are the results so
different???
here the bench code:
--------------------------------------------------------------------------------------------------------------------
/****
*
* modified version from Preston L. Bannister
*
**/
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <string.h>
/
*******************************************************************************
*******************************************************************************
* configs
*******************************************************************************
******************************************************************************/
#define LOOPS 10000000
static const char sOut1[] =
"QBTnetfnh8TpTWvPzARBNWr2gMFofe3AzwMXVOGbdL2xOOACwMefrMxpxZ62qakW";
static const char sOut2[] =
"ct6V7lZ42RoryDlvM1EzT54T5qV3DGUA4UIIhVv0TSK0lTx0TKIFc4E4YIdfjfKp";
/
*******************************************************************************
*******************************************************************************
* engine
*******************************************************************************
******************************************************************************/
unsigned int nLength = ::strlen(sOut1);
unsigned int dtLoop = 0;
unsigned int nTotal = 0;
char sWork[256];
typedef void (*doit)(const char*, const char*);
void report_times(const char* s, unsigned int dt)
{
double ts = (double)dt / CLOCKS_PER_SEC;
double mb = (double)(nTotal) / 1000000;
double rate = mb / ts;
printf("%s:\t\t %0.1f MB/second (%0.1f MB in %u clocks)\n", s,
rate, mb, dt);
}
int time_function(doit fn)
{
clock_t t0 = ::clock();
for (int i=0; i<LOOPS; ++i) {
const char* s1 = sOut1 + (15 & i);
const char* s2 = sOut2 + nLength - (15 & i);
(*fn)(s1,s2);
}
return (int):clock() - t0) - dtLoop;
}
void do_total(const char* s1,const char* s2)
{
nTotal += 4 * nLength;
}
/
*******************************************************************************
* end engine
******************************************************************************/
/
*******************************************************************************
*******************************************************************************
* benchmark
*******************************************************************************
******************************************************************************/
void do_c_strcpy(const char* s1,const char* s2)
{
::strcpy(sWork,s1);
::strcpy(sWork,s2);
}
void our_strcpy1(char* s1,const char* s2)
{
while (*s1++ = *s2++);
}
void our_strcpy2(char* s1,const char* s2)
{
register unsigned int i;
for (i = 0; s2 != 0; ++i)
s1 = s2;
s1 = 0;
}
void do_our_strcpy1(const char* s1,const char* s2)
{
our_strcpy1(sWork,s1);
our_strcpy1(sWork,s2);
}
void do_our_strcpy2(const char* s1,const char* s2)
{
our_strcpy2(sWork,s1);
our_strcpy2(sWork,s2);
}
void do_c_memcpy(const char* s1, const char* s2)
{
int l1 = strlen(s1);
int l2 = strlen(s2);
::memcpy(sWork, s1, l1);
::memcpy(sWork, s2, l2);
}
void our_memcpy(char* dest, const char* src, int size)
{
for(int i=0; i<size; ++i)
dest = src;
}
void do_our_memcpy(const char* s1, const char* s2)
{
int l1 = strlen(s1);
int l2 = strlen(s2);
our_memcpy(sWork, s1, l1);
our_memcpy(sWork, s2, l2);
}
/
*******************************************************************************
* end benchmark
******************************************************************************/
/
*******************************************************************************
*******************************************************************************
* main programm
*******************************************************************************
******************************************************************************/
int main(int ac,char** av)
{
dtLoop = time_function(do_total);
report_times("C strcpy", time_function(do_c_strcpy));
report_times("our strcpy1", time_function(do_our_strcpy1));
report_times("our strcpy2", time_function(do_our_strcpy2));
report_times("C memcpy", time_function(do_c_memcpy));
report_times("our memcpy", time_function(do_our_memcpy));
return 0;
}
i have strange results doing some simple "benchmark" ...
on my amd athlon 4800+ x2:
on linux:
-----------------------------------------------------------------------------------------------------------------
cmp_i686
--------------
C strcpy: 3764.7 MB/second (2560.0 MB in
680000 clocks)
our strcpy1: 2976.7 MB/second (2560.0 MB in
860000 clocks)
our strcpy2: 3084.3 MB/second (2560.0 MB in
830000 clocks)
C memcpy: 3084.3 MB/second (2560.0 MB in
830000 clocks)
our memcpy: 1969.2 MB/second (2560.0 MB in
1300000 clocks)
cmp_athlon
-----------------
C strcpy: 3764.7 MB/second (2560.0 MB in
680000 clocks)
our strcpy1: 4000.0 MB/second (2560.0 MB in
640000 clocks)
our strcpy2: 3122.0 MB/second (2560.0 MB in
820000 clocks)
C memcpy: 3122.0 MB/second (2560.0 MB in
820000 clocks)
our memcpy: 1984.5 MB/second (2560.0 MB in
1290000 clocks)
-----------------------------------------------------------------------------------------------------------------
on windows:
-----------------------------------------------------------------------------------------------------------------
C:\DundE\anh\Eigene Dateien\downloads>cmp_athlon.exe
C strcpy: 4821.1 MB/second (2560.0 MB in 531 clocks)
our strcpy1: 4196.7 MB/second (2560.0 MB in 610 clocks)
our strcpy2: 3148.8 MB/second (2560.0 MB in 813 clocks)
C memcpy: 2873.2 MB/second (2560.0 MB in 891 clocks)
our memcpy: 1973.8 MB/second (2560.0 MB in 1297 clocks)
C:\DundE\anh\Eigene Dateien\downloads>cmp_i686.exe
C strcpy: 4812.0 MB/second (2560.0 MB in 532 clocks)
our strcpy1: 2976.7 MB/second (2560.0 MB in 860 clocks)
our strcpy2: 3091.8 MB/second (2560.0 MB in 828 clocks)
C memcpy: 2825.6 MB/second (2560.0 MB in 906 clocks)
our memcpy: 1949.7 MB/second (2560.0 MB in 1313 clocks)
our memcpy: 1949.7 MB/second (2560.0 MB in 1313 clocks)
-----------------------------------------------------------------------------------------------------------------
and now on i5 430m ...
linux:
-----------------------------------------------------------------------------------------------------------------
[dd@lappy Downloads]$ ./cmp_686
C strcpy: 1113.0 MB/second (2560.0 MB in 2300000
clocks)
our strcpy1: 1497.1 MB/second (2560.0 MB in 1710000
clocks)
our strcpy2: 1523.8 MB/second (2560.0 MB in 1680000
clocks)
C memcpy: 1630.6 MB/second (2560.0 MB in 1570000
clocks)
our memcpy: 1207.5 MB/second (2560.0 MB in 2120000
clocks)
[dd@lappy Downloads]$ ./cmp_core2
C strcpy: 1075.6 MB/second (2560.0 MB in 2380000
clocks)
our strcpy1: 1741.5 MB/second (2560.0 MB in 1470000
clocks)
our strcpy2: 1706.7 MB/second (2560.0 MB in 1500000
clocks)
C memcpy: 1600.0 MB/second (2560.0 MB in 1600000
clocks)
our memcpy: 1213.3 MB/second (2560.0 MB in 2110000
clocks)
-----------------------------------------------------------------------------------------------------------------
windows:
-----------------------------------------------------------------------------------------------------------------
C:\Users\Lappi\Desktop\xcp>cmp_686.exe
C strcpy: 3731.8 MB/second (2560.0 MB in 686 clocks)
our strcpy1: 3417.9 MB/second (2560.0 MB in 749 clocks)
our strcpy2: 3417.9 MB/second (2560.0 MB in 749 clocks)
C memcpy: 2562.6 MB/second (2560.0 MB in 999 clocks)
our memcpy: 1823.4 MB/second (2560.0 MB in 1404 clocks)
C:\Users\Lappi\Desktop\xcp>cmp_core2.exe
C strcpy: 4238.4 MB/second (2560.0 MB in 604 clocks)
our strcpy1: 3699.4 MB/second (2560.0 MB in 692 clocks)
our strcpy2: 3459.5 MB/second (2560.0 MB in 740 clocks)
C memcpy: 2552.3 MB/second (2560.0 MB in 1003 clocks)
our memcpy: 2051.3 MB/second (2560.0 MB in 1248 clocks)
-----------------------------------------------------------------------------------------------------------------
I always compiled with gcc 4.5.1 -O2 -s ... Why are the results so
different???
here the bench code:
--------------------------------------------------------------------------------------------------------------------
/****
*
* modified version from Preston L. Bannister
*
**/
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <string.h>
/
*******************************************************************************
*******************************************************************************
* configs
*******************************************************************************
******************************************************************************/
#define LOOPS 10000000
static const char sOut1[] =
"QBTnetfnh8TpTWvPzARBNWr2gMFofe3AzwMXVOGbdL2xOOACwMefrMxpxZ62qakW";
static const char sOut2[] =
"ct6V7lZ42RoryDlvM1EzT54T5qV3DGUA4UIIhVv0TSK0lTx0TKIFc4E4YIdfjfKp";
/
*******************************************************************************
*******************************************************************************
* engine
*******************************************************************************
******************************************************************************/
unsigned int nLength = ::strlen(sOut1);
unsigned int dtLoop = 0;
unsigned int nTotal = 0;
char sWork[256];
typedef void (*doit)(const char*, const char*);
void report_times(const char* s, unsigned int dt)
{
double ts = (double)dt / CLOCKS_PER_SEC;
double mb = (double)(nTotal) / 1000000;
double rate = mb / ts;
printf("%s:\t\t %0.1f MB/second (%0.1f MB in %u clocks)\n", s,
rate, mb, dt);
}
int time_function(doit fn)
{
clock_t t0 = ::clock();
for (int i=0; i<LOOPS; ++i) {
const char* s1 = sOut1 + (15 & i);
const char* s2 = sOut2 + nLength - (15 & i);
(*fn)(s1,s2);
}
return (int):clock() - t0) - dtLoop;
}
void do_total(const char* s1,const char* s2)
{
nTotal += 4 * nLength;
}
/
*******************************************************************************
* end engine
******************************************************************************/
/
*******************************************************************************
*******************************************************************************
* benchmark
*******************************************************************************
******************************************************************************/
void do_c_strcpy(const char* s1,const char* s2)
{
::strcpy(sWork,s1);
::strcpy(sWork,s2);
}
void our_strcpy1(char* s1,const char* s2)
{
while (*s1++ = *s2++);
}
void our_strcpy2(char* s1,const char* s2)
{
register unsigned int i;
for (i = 0; s2 != 0; ++i)
s1 = s2;
s1 = 0;
}
void do_our_strcpy1(const char* s1,const char* s2)
{
our_strcpy1(sWork,s1);
our_strcpy1(sWork,s2);
}
void do_our_strcpy2(const char* s1,const char* s2)
{
our_strcpy2(sWork,s1);
our_strcpy2(sWork,s2);
}
void do_c_memcpy(const char* s1, const char* s2)
{
int l1 = strlen(s1);
int l2 = strlen(s2);
::memcpy(sWork, s1, l1);
::memcpy(sWork, s2, l2);
}
void our_memcpy(char* dest, const char* src, int size)
{
for(int i=0; i<size; ++i)
dest = src;
}
void do_our_memcpy(const char* s1, const char* s2)
{
int l1 = strlen(s1);
int l2 = strlen(s2);
our_memcpy(sWork, s1, l1);
our_memcpy(sWork, s2, l2);
}
/
*******************************************************************************
* end benchmark
******************************************************************************/
/
*******************************************************************************
*******************************************************************************
* main programm
*******************************************************************************
******************************************************************************/
int main(int ac,char** av)
{
dtLoop = time_function(do_total);
report_times("C strcpy", time_function(do_c_strcpy));
report_times("our strcpy1", time_function(do_our_strcpy1));
report_times("our strcpy2", time_function(do_our_strcpy2));
report_times("C memcpy", time_function(do_c_memcpy));
report_times("our memcpy", time_function(do_our_memcpy));
return 0;
}