fslurp() critique

R

rihad

Hi, here's an implementation of file slurping in C, like @lines = <FILE> in perl
or $lines = file("/path/to/file") in php. There's also a main() testing it out.
All suggestions are very welcome! Thank you.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define TEST_MAIN

#define CHUNK 4096

#define SLURP_TEXT 1
#define SLURP_BINARY 2

/* returns NULL terminated array of strings from text file, and array's size */
char **fslurp(const char *fname, int *);

/* copies file to memory, returns size in memory and pointer to beginning */
unsigned char *fslurpb(const char *fname, size_t *size);

/* frees whatever fslurp() returned */
void ffree(char **list);

/* frees whatever fslurpb() returned */
void ffreeb(unsigned char *mem);


static void *slurp(const char *, int, size_t *);


char **fslurp(const char *fname, int *numread)
{
char **retval = 0;
char *mem = 0, *p;
size_t offt = 0;
size_t num_lines = 0, softlim = 1;

mem = slurp(fname, SLURP_TEXT, &offt);

/* got the file, now set up the pointers */
for (p = mem; p < mem + offt; ) {
char *q;

if (num_lines + 1 == softlim) {
char **tmp = realloc(retval, (softlim += 5) * sizeof *retval);
if (!tmp)
goto nomem;
retval = tmp;
}

retval[num_lines++] = p;
if (!(q = memchr(p, '\n', mem + offt - p - 1)))
q = mem + offt;
*q = 0;
p = q + 1;
}

retval[num_lines] = 0; /* we got room for an extra 0 */
goto exit;

nomem:
free(retval);
free(mem);
retval = 0;
exit:
if (numread)
*numread = num_lines;
return retval;
}

unsigned char *fslurpb(const char *fname, size_t *size)
{
return slurp(fname, SLURP_BINARY, size);
}

void ffree(char **list)
{
if (list) {
free(list[0]);
free(list);
}
}

void ffreeb(unsigned char *mem)
{
free(mem);
}

static void *slurp(const char *fname, int mode, size_t *numread)
{
unsigned char *mem = 0;
size_t offt = 0, size = 0;
const char *realmode;
FILE *fp;

switch (mode) {
case SLURP_TEXT:
realmode = "r";
break;
case SLURP_BINARY:
default:
realmode = "rb";
break;
}

if ((fp = fopen(fname, realmode)) == 0)
goto exit;

/* slurp the file in */
for (;;) {
void *p;
size_t read;

if (!(p = realloc(mem, size += CHUNK)))
goto nomem;
mem = p;
read = fread(mem + offt, 1, CHUNK, fp);
offt += read;
if (read < CHUNK)
goto exit;
}

nomem:
free(mem);
mem = 0;
exit:
if (fp)
fclose(fp);
*numread = offt;
return mem;
}

#ifdef TEST_MAIN

/* really lame */
static int is_unix(void)
{
int retval = 0;
FILE *fp = fopen("/bin/sh", "r");
if (fp) {
retval = 1;
fclose(fp);
} else {
retval = (getenv("LOGNAME") != 0);
}
return retval;
}

static void list_dump(char **list)
{
if (!list) {
fputs("(null)", stderr);
} else {
int n;
for (n = 1 ; *list; list++, n++) {
fprintf(stderr, "%3d) %s\n", n, *list);
}
}
}

int main(int argc, char **argv)
{
/* test text slurp */
{
char **lines = fslurp(is_unix() ? "/etc/services" : "c:/boot.ini", 0);
list_dump(lines);
ffree(lines);
}

/* test binary slurp */
{
const char *fname = argv[0];
size_t num;
void *mem = fslurpb(fname, &num);
if (!mem) {
char buf[FILENAME_MAX + 32];
sprintf(buf, "%s could not be read", fname);
perror(buf);
} else {
fprintf(stderr, "Read %s into memory, size: %lu\n", fname,
(unsigned long) num);
ffreeb(mem);
}
}

getchar();
return 0;
}
#endif /* TEST_MAIN */
 
M

Martijn

rihad said:
Hi, here's an implementation of file slurping in C, like @lines = <FILE> in perl
or $lines = file("/path/to/file") in php. There's also a main() testing it out.
All suggestions are very welcome! Thank you.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define TEST_MAIN

#define CHUNK 4096

#define SLURP_TEXT 1
#define SLURP_BINARY 2

/* returns NULL terminated array of strings from text file, and array's size */
char **fslurp(const char *fname, int *);

/* copies file to memory, returns size in memory and pointer to beginning */
unsigned char *fslurpb(const char *fname, size_t *size);

Why not work with file handles, this will also clean up your code a bit and
is more consistent with likewise functions.
static void *slurp(const char *, int, size_t *);

Prototyping all functions could be nicer, even if you are not
forward-referencing them. This may also be a good place to put your
comments. Of course eventually the other function prototypes will be put in
an .h file.
char **fslurp(const char *fname, int *numread)
{

[code snipped]
if (num_lines + 1 == softlim) {
char **tmp = realloc(retval, (softlim += 5) * sizeof *retval);
if (!tmp)
goto nomem;
retval = tmp;
}

This does appear as unstructured code to me. Altough I have nothing against
goto, IMHO it's unproperly used here.

if (!tmp) {
free(retval);
retval = NULL; /* or (void*)0, or just 0 as you use */
free(mem);
/* *numread = 0; <- if you really want to, but on error
nothing has to be defined as long as
you properly document this */
break;
}

In this case the labels are dropped, as well as the "goto exit;".
retval[num_lines] = 0; /* we got room for an extra 0 */

You would have to precede this with "if ( retval != NULL )" or something
similar.
static void *slurp(const char *fname, int mode, size_t *numread)
{
unsigned char *mem = 0;
size_t offt = 0, size = 0;
const char *realmode;
FILE *fp;

switch (mode) {
case SLURP_TEXT:
realmode = "r";
break;
case SLURP_BINARY:
default:
realmode = "rb";
break;
}

This may be more scalable, but this may be more readable:

if ( mode == SLURP_TEXT )
realmode = "r";
else
realmode = "rb";

Then again, if you'd to work with filehandles, this wouldn't be an issue, as
well as all the error checking and having to close the file (and checking
whether it should be closed).
if ((fp = fopen(fname, realmode)) == 0)
goto exit;

why not:

if ( ... )
return ( NULL );
/* slurp the file in */
for (;;) {

I would use a while loop here, which reads until EOF, saves you another goto
void *p;
size_t read;

if (!(p = realloc(mem, size += CHUNK)))
goto nomem;

Why don't you check the filesize (fstat is available for quite some
compilers), so you can make an educated guess on the size and always enlarge
it if need be.
mem = p;
read = fread(mem + offt, 1, CHUNK, fp);
offt += read;
if (read < CHUNK)
goto exit;

Same thing on the goto as before
}

nomem:
free(mem);
mem = 0;
exit:
if (fp)
fclose(fp);
*numread = offt;
return mem;
}

[code snipped]
int main(int argc, char **argv)
{
/* test text slurp */
{
char **lines = fslurp(is_unix() ? "/etc/services" : "c:/boot.ini",
0);

I don't have a boot.ini - MSDOS.SYS should be available on all Windows
systems (in text-format, it's binary on pre Windows, try autoexec.bat on
true DOS systems, it will almost always have some content) - but what about
Mac (pre OS X) users? Why not take it from the command line, like for the
binary slurp?

I think it's a nice initiative. Good luck,
 
R

rihad

Why not work with file handles, this will also clean up your code a bit and
is more consistent with likewise functions.

OTOH it makes usage a bit simpler, though not as flexible. It's just a matter of
moving things around and extending the interface to allow both options.
char **fslurp(const char *fname, int *numread)
{

[code snipped]
if (num_lines + 1 == softlim) {
char **tmp = realloc(retval, (softlim += 5) * sizeof *retval);
if (!tmp)
goto nomem;
retval = tmp;
}

This does appear as unstructured code to me. Altough I have nothing against
goto, IMHO it's unproperly used here.

I'd have to disagree: IMO when goto labels are constrained to this special usage
(as a function "trailer") they are quite useful and almost always make the code
simpler. Without such "generic" trailers, you would inevitably either have to
duplicate code (or hide it behind a macro) or scatter "special cases" all over
the place and thus making it a tiny bit less scalable, two alternatives I'm
trying to avoid. Besides, I like the idea of a function having a single exit
point.

if (!tmp) {
free(retval);
retval = NULL; /* or (void*)0, or just 0 as you use */
free(mem);
/* *numread = 0; <- if you really want to, but on error
nothing has to be defined as long as
you properly document this */
break;
}

In this case the labels are dropped, as well as the "goto exit;".

Why not make this special treatment generic for the long run? If there were
another error condition, you would still have to do proper cleanup yet again.
retval[num_lines] = 0; /* we got room for an extra 0 */

You would have to precede this with "if ( retval != NULL )" or something
similar.

Ah yes, to protect against an empty file.
I would use a while loop here, which reads until EOF, saves you another goto

Yeah, but that would force me move size_t read outside the loop and away into
the outer block. Besides, gotos are strictly following the already-seen usage
pattern.
Why don't you check the filesize (fstat is available for quite some
compilers), so you can make an educated guess on the size and always enlarge
it if need be.

I can't find fstat() in my copy of n869.txt. Is it nonstandard?
 
M

Martijn

rihad said:
I can't find fstat() in my copy of n869.txt. Is it nonstandard?

As far as I know, it's not - it's POSIX if I'm correct, but I have seen
several compilers that support it. Maybe stat is available. Anyway, it is
just a suggestion. You could always use a macro, that is defined as a
function that return the proper value if such a function is available, or as
0 (or CHUNK) if none is available.
 
M

Martijn

I can't find fstat() in my copy of n869.txt. Is it nonstandard? ^^^^^^^^^^^^^^^^^^^^^^^^^^
On clc, "standard" means "the C standard".
POSIX is great, but it's off topic here.

I know - I concurred it's not part of the standard, thus rendering it off
topic -, in both mails the bit concerning fstat was only minor. In the
remainder of the last post I tried to offer a portable (and standard
compliant) way.

Well, hopefully _somebody_ benefits from this all...
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,995
Messages
2,570,230
Members
46,817
Latest member
DicWeils

Latest Threads

Top