J
jacob navia
When building a software project, one of the many pitfalls you can
encounter is releasing the software too early. When released, software
gets used, and the way it is designed gets frozen for fears of not being
backwards compatible and leaving the few users with a lot of work of
changing their code.
Luckily, I haven't released any code for my container library, so I can
rewrite completely the way it presents itself to the user without any
problems.
I did just that.
I decided to
(1) Hide all definitions of the data types used, leaving abstract data
types instead:
typedef struct List List;
(2) leave only a public list of functions in a single object:
typedef struct tagListInterface {
List *Create(size_t elementSize);
// .. many other functions
} ListInterface;
(3) Declare as extern a single global object that is the ONLY exported
name from all the interface:
extern ListInterface iList;
extern HastTableInterface iHashTable;
etc.
(4)
All user code now uses the global interface, what makes disappear that
horrible:
list_object->VTable->Find(list_object, data);
replacing it with the much simpler:
iList.Find(list_object,data);
(5)
This way of interfacing containers has the added advantage that the
creation function can be replaced by your own one, without having the
need to instantiate an object to be able to get it.
(6)
The impact in the linker symbol table is reduced to just ONE symbol for
each container.
----------------------------------------------------------------------
As an example of how this looks like, here is an example of a container
that I added recently: Bloom filters. This filters apply several times
hash functions to a datum, and the hash result index a bitstring,
setting each bit n times to 1. More can be found in the wikipedia.
Here is the header file
typedef struct tagBloomFilter BloomFilter;
typedef struct tagBloomFilterInterface {
BloomFilter *(*Create)(size_t MaxElements,double probability);
size_t (*SetElement)(BloomFilter *b,
const void *key,size_t keylen);
int (*GetElement)(BloomFilter *b,const void *key,size_t keylen);
int (*Clear)(BloomFilter *b);
int (*Finalize)(BloomFilter *b);
} BloomFilterInterface;
extern BloomFilterInterface iBloomFilter;
Here is the implementation
/*
The bloom filter uses k hash functions to store k bits in a bit string
that represent a stored value. It can return false positives, but if it
says that an element is NOT there, it is definitely not there.
The size of the bloom filter and the number of hash functions you should
be using depending on your application can be calculated using the
formulas on the Wikipedia page:
m = -n*ln(p)/(ln(2)^2)
This will tell you the number of bits m to use for your filter, given
the number n of elements in your filter and the false positive
probability p you want to achieve. All that for the ideal number of hash
functions k which you can calculate like this:
k = 0.7*m/n
*/
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include "containers.h"
struct tagBloomFilter {
size_t count; // Elements stored already
size_t MaxNbOfElements;
double Probability;
size_t HashFunctions;
size_t nbOfBits;
unsigned char *bits;
unsigned Seeds[1];
ContainerMemoryManager *Allocator;
};
//-----------------------------------------------------------------------------
// MurmurHash2, by Austin Appleby
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
static unsigned int Hash(const void * key, int len, unsigned int seed )
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const unsigned int m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
unsigned int h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
unsigned int k = *(unsigned int *)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4; len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13; h *= m; h ^= h >> 15;
return h;
}
static BloomFilter *Create(size_t nbOfElements,double Probability)
{
int nbOfBits;
int k;
BloomFilter *result;
if (Probability >= 1.0) {
iError.RaiseError("BloomFilter.Create",CONTAINER_ERROR_BADARG);
return NULL;
}
nbOfBits = -round(nbOfElements*log(Probability)/(log(2.0)*log(2.0)));
k = round(0.7*nbOfBits/nbOfElements);
result = CurrentMemoryManager->malloc(sizeof(BloomFilter) + k*sizeof(int));
if (result == NULL) {
iError.RaiseError("BloomFilter.Create",CONTAINER_ERROR_NOMEMORY);
return NULL;
}
result->bits = CurrentMemoryManager->malloc(1+nbOfBits/8);
if (result->bits == NULL) {
CurrentMemoryManager->free(result);
iError.RaiseError("BloomFilter.Create",CONTAINER_ERROR_NOMEMORY);
return NULL;
}
result->nbOfBits = nbOfBits;
result->MaxNbOfElements = nbOfElements;
result->Probability = Probability;
result->HashFunctions = k;
while (k > 0) {
k--;
result->Seeds[k] = rand();
}
result->Allocator = CurrentMemoryManager;
return result;
}
static size_t Add(BloomFilter *b, const void *key, size_t keylen)
{
unsigned hash;
int i;
for (i=0; i<b->HashFunctions;i++) {
hash = Hash(key,keylen,b->Seeds);
hash %= b->nbOfBits;
b->bits[hash >> 3] |= 1 << (hash&7);
}
return ++b->count;
}
static int GetElement(BloomFilter *b, const void *key, size_t keylen)
{
unsigned hash;
int i;
for (i=0; i<b->HashFunctions;i++) {
hash = Hash(key,keylen,b->Seeds);
hash %= b->nbOfBits;
if ((b->bits[hash >> 3] & (1 << (hash&7))) == 0)
return 0;
}
return 1;
}
static int Clear(BloomFilter *b)
{
memset(b->bits,0,1+b->nbOfBits/8);
return 1;
}
static int Finalize(BloomFilter *b)
{
b->Allocator->free(b->bits);
b->Allocator->free(b);
return 1;
}
BloomFilterInterface iBloomFilter = {
Create,
Add,
GetElement,
Clear,
Finalize,
};
#ifdef TEST
int main(void)
{
BloomFilter *b = iBloomFilter.Create(10,0.00001);
int i,errors=0;
i = 4734;
iBloomFilter.SetElement(b,&i,sizeof(int));
i = 9457;
iBloomFilter.SetElement(b,&i,sizeof(int));
i = 458223;
iBloomFilter.SetElement(b,&i,sizeof(int));
i = 40774;
iBloomFilter.SetElement(b,&i,sizeof(int));
i = 9334422;
iBloomFilter.SetElement(b,&i,sizeof(int));
i = 9334422;
if (iBloomFilter.GetElement(b,&i,sizeof(int))) {
printf("9334422 found as it should\n");
}
else printf("9334422 not found and is present\n");
i = 4734;
if (iBloomFilter.GetElement(b,&i,sizeof(int))) {
printf("4734 found as it should\n");
}
else {
printf("4734 not found and is present\n");
errors++;
}
i = 9;
if (iBloomFilter.GetElement(b,&i,sizeof(int))) {
printf("9 found as it should not\n");
errors++;
}
else printf("9 not found and is not present\n");
iBloomFilter.Finalize(b);
return errors;
}
#endif
encounter is releasing the software too early. When released, software
gets used, and the way it is designed gets frozen for fears of not being
backwards compatible and leaving the few users with a lot of work of
changing their code.
Luckily, I haven't released any code for my container library, so I can
rewrite completely the way it presents itself to the user without any
problems.
I did just that.
I decided to
(1) Hide all definitions of the data types used, leaving abstract data
types instead:
typedef struct List List;
(2) leave only a public list of functions in a single object:
typedef struct tagListInterface {
List *Create(size_t elementSize);
// .. many other functions
} ListInterface;
(3) Declare as extern a single global object that is the ONLY exported
name from all the interface:
extern ListInterface iList;
extern HastTableInterface iHashTable;
etc.
(4)
All user code now uses the global interface, what makes disappear that
horrible:
list_object->VTable->Find(list_object, data);
replacing it with the much simpler:
iList.Find(list_object,data);
(5)
This way of interfacing containers has the added advantage that the
creation function can be replaced by your own one, without having the
need to instantiate an object to be able to get it.
(6)
The impact in the linker symbol table is reduced to just ONE symbol for
each container.
----------------------------------------------------------------------
As an example of how this looks like, here is an example of a container
that I added recently: Bloom filters. This filters apply several times
hash functions to a datum, and the hash result index a bitstring,
setting each bit n times to 1. More can be found in the wikipedia.
Here is the header file
typedef struct tagBloomFilter BloomFilter;
typedef struct tagBloomFilterInterface {
BloomFilter *(*Create)(size_t MaxElements,double probability);
size_t (*SetElement)(BloomFilter *b,
const void *key,size_t keylen);
int (*GetElement)(BloomFilter *b,const void *key,size_t keylen);
int (*Clear)(BloomFilter *b);
int (*Finalize)(BloomFilter *b);
} BloomFilterInterface;
extern BloomFilterInterface iBloomFilter;
Here is the implementation
/*
The bloom filter uses k hash functions to store k bits in a bit string
that represent a stored value. It can return false positives, but if it
says that an element is NOT there, it is definitely not there.
The size of the bloom filter and the number of hash functions you should
be using depending on your application can be calculated using the
formulas on the Wikipedia page:
m = -n*ln(p)/(ln(2)^2)
This will tell you the number of bits m to use for your filter, given
the number n of elements in your filter and the false positive
probability p you want to achieve. All that for the ideal number of hash
functions k which you can calculate like this:
k = 0.7*m/n
*/
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include "containers.h"
struct tagBloomFilter {
size_t count; // Elements stored already
size_t MaxNbOfElements;
double Probability;
size_t HashFunctions;
size_t nbOfBits;
unsigned char *bits;
unsigned Seeds[1];
ContainerMemoryManager *Allocator;
};
//-----------------------------------------------------------------------------
// MurmurHash2, by Austin Appleby
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
static unsigned int Hash(const void * key, int len, unsigned int seed )
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const unsigned int m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
unsigned int h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
unsigned int k = *(unsigned int *)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4; len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13; h *= m; h ^= h >> 15;
return h;
}
static BloomFilter *Create(size_t nbOfElements,double Probability)
{
int nbOfBits;
int k;
BloomFilter *result;
if (Probability >= 1.0) {
iError.RaiseError("BloomFilter.Create",CONTAINER_ERROR_BADARG);
return NULL;
}
nbOfBits = -round(nbOfElements*log(Probability)/(log(2.0)*log(2.0)));
k = round(0.7*nbOfBits/nbOfElements);
result = CurrentMemoryManager->malloc(sizeof(BloomFilter) + k*sizeof(int));
if (result == NULL) {
iError.RaiseError("BloomFilter.Create",CONTAINER_ERROR_NOMEMORY);
return NULL;
}
result->bits = CurrentMemoryManager->malloc(1+nbOfBits/8);
if (result->bits == NULL) {
CurrentMemoryManager->free(result);
iError.RaiseError("BloomFilter.Create",CONTAINER_ERROR_NOMEMORY);
return NULL;
}
result->nbOfBits = nbOfBits;
result->MaxNbOfElements = nbOfElements;
result->Probability = Probability;
result->HashFunctions = k;
while (k > 0) {
k--;
result->Seeds[k] = rand();
}
result->Allocator = CurrentMemoryManager;
return result;
}
static size_t Add(BloomFilter *b, const void *key, size_t keylen)
{
unsigned hash;
int i;
for (i=0; i<b->HashFunctions;i++) {
hash = Hash(key,keylen,b->Seeds);
hash %= b->nbOfBits;
b->bits[hash >> 3] |= 1 << (hash&7);
}
return ++b->count;
}
static int GetElement(BloomFilter *b, const void *key, size_t keylen)
{
unsigned hash;
int i;
for (i=0; i<b->HashFunctions;i++) {
hash = Hash(key,keylen,b->Seeds);
hash %= b->nbOfBits;
if ((b->bits[hash >> 3] & (1 << (hash&7))) == 0)
return 0;
}
return 1;
}
static int Clear(BloomFilter *b)
{
memset(b->bits,0,1+b->nbOfBits/8);
return 1;
}
static int Finalize(BloomFilter *b)
{
b->Allocator->free(b->bits);
b->Allocator->free(b);
return 1;
}
BloomFilterInterface iBloomFilter = {
Create,
Add,
GetElement,
Clear,
Finalize,
};
#ifdef TEST
int main(void)
{
BloomFilter *b = iBloomFilter.Create(10,0.00001);
int i,errors=0;
i = 4734;
iBloomFilter.SetElement(b,&i,sizeof(int));
i = 9457;
iBloomFilter.SetElement(b,&i,sizeof(int));
i = 458223;
iBloomFilter.SetElement(b,&i,sizeof(int));
i = 40774;
iBloomFilter.SetElement(b,&i,sizeof(int));
i = 9334422;
iBloomFilter.SetElement(b,&i,sizeof(int));
i = 9334422;
if (iBloomFilter.GetElement(b,&i,sizeof(int))) {
printf("9334422 found as it should\n");
}
else printf("9334422 not found and is present\n");
i = 4734;
if (iBloomFilter.GetElement(b,&i,sizeof(int))) {
printf("4734 found as it should\n");
}
else {
printf("4734 not found and is present\n");
errors++;
}
i = 9;
if (iBloomFilter.GetElement(b,&i,sizeof(int))) {
printf("9 found as it should not\n");
errors++;
}
else printf("9 not found and is not present\n");
iBloomFilter.Finalize(b);
return errors;
}
#endif