On Thu, May 19, 2011 at 03:22:18AM +0100, Ben Bacarisse wrote:
[snip]
Ok, here's a quick and dirty hack that measures a highly contrived
scenario. This code first allocates two arenas of different size, and
then ping-pongs the allocations between the two arenas for a defined
number of iterations. Then, I do more or less the same thing with
malloc.y
The preliminary results show that my special-purpose allocator is 2-3
times faster than glibc malloc. Not quite and order-of-magnitude
difference in performance (unless you think in base-2), but very
acceptable. In some programs there may be a larger difference in
performance becuase of reduceed memory fragmentation or reduced
overall memory use. I do not plan to isolate those factors at this
time to measure their effect.
Code is compiled with "gcc -O0 -o arena_test arena_test.c -lrt"
The test platform here is an Intel Atom netbook running at 1.333GHz,
1G RAM, OpenSuSE 11.4, libc-2.11.3, gcc 4.5.1 20101208.
Typical result:
[31/22:32] nb
ts/10 stevet ~/stuff/src/libs/tools/test: ./arena_test
Arena: Iterations: 100000; elapsed CPU time (msec): 39181
Malloc: Iterations: 100000; elapsed CPU time (msec): 107265
Code follows:
#include <stdio.h>
#include <malloc.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#define TEST_SIZE 1024 * 1024
#define ITERATIONS 100000
#define DIFFERENCE 5
struct arena_head_s {
int obj_size;
int arena_size;
unsigned char *
arena;
int free;
int free_list;
};
typedef struct arena_head_s arena_head;
#define arena_obj_addr(x, n) ((void *) &x->arena[n * x->obj_size])
arena_qa(arena_head *x)
{
int n;
n = x->free_list;
if(n != -1) {
x->free_list = *((int *) arena_obj_addr(x, n));
x->free--;
}
return(n);
}
void arena_free(arena_head *p, int n)
{
*((int *) arena_obj_addr(p, n)) = p->free_list;
p->free_list = n;
p->free++;
return;
}
arena_head * arena_creat(size_t s, int n)
{
arena_head * h;
int i;
h = malloc(sizeof(arena_head));
if(h == NULL) {
perror("malloc()");
exit(1);
}
h->obj_size = s;
h->arena_size = n;
h->free = n;
h->arena = malloc(s * n);
if(h->arena == NULL) {
perror("malloc()");
exit(1);
}
h->free_list = 0;
for(i = 0; i < n; i++)
*((int *) arena_obj_addr(h, i)) = i + 1;
*((int *) arena_obj_addr(h, i)) = -1;
return(h);
}
typedef unsigned long long int T64;
#define processcputime(p) \
{ \
T64 * t = p; \
struct timespec T; \
\
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &T) == -1) { \
perror("clock_gettime()"); \
exit(1); \
} \
\
*t = ((T64)T.tv_sec) * 1000000LL + ((T64) T.tv_nsec / 1000); \
}
struct test_load_s {
int ref;
int data;
};
typedef struct test_load_s test_load;
void work(test_load *p)
{
if(!p) {
perror("null ptr");
exit(1);
}
p->ref++;
p->data = (int) p % 4 * rand();
return;
}
int main(int argc, char ** argv)
{
arena_head *h1, *h2;
T64 start, end;
int i, n;
test_load *p;
processcputime(&start);
h1 = arena_creat(sizeof(test_load), TEST_SIZE);
h2 = arena_creat(sizeof(test_load) + DIFFERENCE, TEST_SIZE);
if(!h1 || !h2) {
perror("arenas");
exit(1);
}
processcputime(&start);
for(i = 0; i < ITERATIONS; i++) {
n = arena_qa(h1);
if(n == -1) {
perror("Ooops.");
exit(1);
}
work(arena_obj_addr(h1, n));
n = arena_qa(h2);
if(n == -1) {
perror("Ooops 2.");
exit(1);
}
work(arena_obj_addr(h2, n));
}
processcputime(&end);
fprintf(stdout, "Arena: Iterations: %ld; elapsed CPU time (msec): %ld\n", ITERATIONS, (end - start));
processcputime(&start);
for(i = 0; i < ITERATIONS; i++) {
p = malloc(sizeof(test_load));
if(!p) {
perror("malloc()");
exit(1);
}
work(p);
p = malloc(sizeof(test_load) + DIFFERENCE);
if(!p) {
perror("malloc2()");
exit(1);
}
work(p);
}
processcputime(&end);
fprintf(stdout, "Malloc: Iterations: %ld; elapsed CPU time (msec): %ld\n", ITERATIONS, (end - start));
exit(0);
}
Regards,
Uncle Steve