V
virtualadepts
This contest is open to everyone who knows C++. To enter all you need
to do is take the Boyer-Moore String Search Algorithm, as it is
written in C, and make the fastest running C++ program you can out of
it. I'm going to try to write code myself, and if you can beet me,
and your code executes faster, then you win a free I-Pod. My I-Pod is
used, but it comes jam packed with songs I've been listening to. So
take a crack at this problem and post your solution to the group.
http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
#include <string.h>
#include <limits.h>
/* This helper function checks, whether the last "portion" bytes
* of "needle" (which is "nlen" bytes long) exist within the "needle"
* at offset "offset" (counted from the end of the string),
* and whether the character preceding "offset" is not a match.
* Notice that the range being checked may reach beyond the
* beginning of the string. Such range is ignored.
*/
static int boyermoore_needlematch
(const unsigned char* needle, size_t nlen, size_t portion, size_t
offset)
{
ssize_t virtual_begin = nlen-offset-portion;
ssize_t ignore = 0;
if(virtual_begin < 0) { ignore = -virtual_begin; virtual_begin =
0; }
if(virtual_begin > 0 && needle[virtual_begin-1] == needle[nlen-
portion-1])
return 0;
return
memcmp(needle + nlen - portion + ignore,
needle + virtual_begin,
portion - ignore) == 0;
}
static size_t max(ssize_t a, ssize_t b) { return a>b ? a : b; }
/* Returns a pointer to the first occurrence of "needle"
* within "haystack", or NULL if not found.
*/
const unsigned char* memmem_boyermoore
(const unsigned char* haystack, size_t hlen,
const unsigned char* needle, size_t nlen)
{
size_t skip[nlen]; /* Array of shifts with self-substring match
check */
ssize_t occ[UCHAR_MAX+1]; /* Array of last occurrence of each
character */
size_t a, hpos;
if(nlen > hlen || nlen <= 0 || !haystack || !needle) return NULL;
/* Preprocess #1: init occ[]*/
/* Initialize the table to default value */
for(a=0; a<UCHAR_MAX+1; ++a) occ[a] = -1;
/* Then populate it with the analysis of the needle */
/* But ignoring the last letter */
for(a=0; a<nlen-1; ++a) occ[needle[a]] = a;
/* Preprocess #2: init skip[] */
/* Note: This step could be made a lot faster.
* A simple implementation is shown here. */
for(a=0; a<nlen; ++a)
{
size_t value = 0;
while(value < nlen && !boyermoore_needlematch(needle, nlen, a,
value))
++value;
skip[nlen-a-1] = value;
}
/* Search: */
for(hpos=0; hpos <= hlen-nlen; )
{
size_t npos=nlen-1;
while(needle[npos] == haystack[npos+hpos])
{
if(npos == 0) return haystack + hpos;
--npos;
}
hpos += max(skip[npos], npos - occ[haystack[npos+hpos]]);
}
return NULL;
}
to do is take the Boyer-Moore String Search Algorithm, as it is
written in C, and make the fastest running C++ program you can out of
it. I'm going to try to write code myself, and if you can beet me,
and your code executes faster, then you win a free I-Pod. My I-Pod is
used, but it comes jam packed with songs I've been listening to. So
take a crack at this problem and post your solution to the group.
http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
#include <string.h>
#include <limits.h>
/* This helper function checks, whether the last "portion" bytes
* of "needle" (which is "nlen" bytes long) exist within the "needle"
* at offset "offset" (counted from the end of the string),
* and whether the character preceding "offset" is not a match.
* Notice that the range being checked may reach beyond the
* beginning of the string. Such range is ignored.
*/
static int boyermoore_needlematch
(const unsigned char* needle, size_t nlen, size_t portion, size_t
offset)
{
ssize_t virtual_begin = nlen-offset-portion;
ssize_t ignore = 0;
if(virtual_begin < 0) { ignore = -virtual_begin; virtual_begin =
0; }
if(virtual_begin > 0 && needle[virtual_begin-1] == needle[nlen-
portion-1])
return 0;
return
memcmp(needle + nlen - portion + ignore,
needle + virtual_begin,
portion - ignore) == 0;
}
static size_t max(ssize_t a, ssize_t b) { return a>b ? a : b; }
/* Returns a pointer to the first occurrence of "needle"
* within "haystack", or NULL if not found.
*/
const unsigned char* memmem_boyermoore
(const unsigned char* haystack, size_t hlen,
const unsigned char* needle, size_t nlen)
{
size_t skip[nlen]; /* Array of shifts with self-substring match
check */
ssize_t occ[UCHAR_MAX+1]; /* Array of last occurrence of each
character */
size_t a, hpos;
if(nlen > hlen || nlen <= 0 || !haystack || !needle) return NULL;
/* Preprocess #1: init occ[]*/
/* Initialize the table to default value */
for(a=0; a<UCHAR_MAX+1; ++a) occ[a] = -1;
/* Then populate it with the analysis of the needle */
/* But ignoring the last letter */
for(a=0; a<nlen-1; ++a) occ[needle[a]] = a;
/* Preprocess #2: init skip[] */
/* Note: This step could be made a lot faster.
* A simple implementation is shown here. */
for(a=0; a<nlen; ++a)
{
size_t value = 0;
while(value < nlen && !boyermoore_needlematch(needle, nlen, a,
value))
++value;
skip[nlen-a-1] = value;
}
/* Search: */
for(hpos=0; hpos <= hlen-nlen; )
{
size_t npos=nlen-1;
while(needle[npos] == haystack[npos+hpos])
{
if(npos == 0) return haystack + hpos;
--npos;
}
hpos += max(skip[npos], npos - occ[haystack[npos+hpos]]);
}
return NULL;
}