Help with program crashing...

S

santosh

Hello all,

I've put together a small program to count the number of characters and
'words' in a text file. The minimum length of a word, (in terms of no.
of characters), as well as word delimiting characters can be specified
on the command line. The default delimiting characters built into the
program are space, newline, tab, carriage return, form feed, vertical
tab, comma and null. If a 'u' or 'U' is specified as the last command
line argument, this default set is ignored and the characters in the
penultimate command line argument are used.

Now, the problem is that the program is terminated by the OS when the
'minimum word length' argument, (the second command line argument after
the filename), exceeds a certain value which seems to vary from file to
file. As far as I can figure it out, the word counting code, in
function words() shouldn't simply crash.

Can anyone spot any logical mistake or other dubious calculation which
might cause this behaviour?

I compiled it with gcc -Wall -ansi -pedantic and there were three
warnings, none of which seem to me to be able to affect the code.

Thanks in advance.

The code follows:

/*
* Usage = words filename [mwl] [delchars] [uddc]
* Options in square brackets are optional.
* filename - Path name of file to be scanned.
* mwl - Specifies the minimum length, (in characters), a word must
have
* to be counted as such.
* delchars - One or more characters which will be added to the set of

* default word delimiting characters unless 'uddc' is specified
* as 'u'/'U', in which case, the default delimiters will be
* ignored.
*
* Default 'mwl' - 1 character.
* Default 'delchars' - space, tab, newline, carriage return, form
feed,
* vertical tab
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

/* A pointer to this structure is passed to the 'words()' function */
struct words_args {
FILE *fp; /* Pointer to a file opened in text mode for reading */
size_t mwl; /* Minimum length (in chars) a 'word' must have */
char *delchars; /* Null terminated array of custom delimiting chars */
char uddc; /* If 'u' or 'U', ignores default delimiting chars */
size_t *nwords; /* Upon return: No. of words in given file */
size_t *tchars; /* Upon return: Total no. of characters in file */
size_t *wchars; /* Upon return: Total chars making up 'words' */
size_t *awl; /* Upon return: Average length of words (in chars) */
};

int words( struct words_args *args );
/* Returns true if was file opened successfully */
unsigned short int words_open_file
( char *clarg_filename, const char *mode, struct words_args *pf );
/* Returns true if 'mwl' command line parameter is valid and
convertable */
unsigned short int words_conv_mwl( char *clarg_mwl, struct words_args
*pm );
/* Returns true if 'uddc' command line parameter is valid */
unsigned short int words_check_uddc( char *clarg_uddc, struct
words_args *pu );
/* Returns true if file was closed successfully */
unsigned short int words_close_file( FILE *fp );
/* Prints to file stream, the values returned by 'words()' */
void words_print_results( char *fn, struct words_args *ps, FILE *stream
);

void words_print_results( char *fn, struct words_args *ps, FILE *stream
) {
fprintf( stream, "\n\nFile: %s\n\tTotal characters = %u\n\tTotal "
"characters making up 'words' = %u\n\tTotal words = %u\n\t"
"Average word length, (in characters) = %u\nFile size = "
"%u characters x %u bytes per character = %u bytes.\n", fn,
*ps->tchars, *ps->wchars, *ps->nwords, *ps->awl, *ps->tchars,
sizeof(char), ((*ps->tchars) * sizeof(char)) );
return;
}

unsigned short int words_open_file
( char *clarg_filename, const char *mode, struct words_args *pf ) {
if( (pf->fp = fopen(clarg_filename, mode)) == NULL ) {
fprintf( stderr, "\nFile: %s\n\tOpen failed. (Mode "
"'%s').\n", clarg_filename, mode );
return 0;
}
else
return 1;
}

unsigned short int words_close_file( FILE *fp ) {
int rv;

rv = fclose(fp);
if( rv == 0 )
return 1;
if( rv == EOF ) {
fprintf( stderr, "\nwords_close_file(): Attempt to close file"
" failed.\n" );
return 0;
}
else {
fprintf( stderr, "\nwords_close_file(): Attempt to close file"
" failed.\n" );
return 0;
}
}

unsigned short int words_conv_mwl( char *clarg_mwl, struct words_args
*pm ) {
int sscanf_rv;
char *str = NULL;

/* Check if string contains only digit characters */
for(str = clarg_mwl; *str != '\0'; ++str) {
/* If not space and digit char signal error */
if( !isdigit((int) *str) && !isspace((int) *str) ){
fprintf( stderr, "\nwords_conv_mwl(): Bad parameter"
".\n\tParameter is: %s\n.", clarg_mwl );
return 0;
}
}

/* Attempt to convert string and assign to structure member */
sscanf_rv = sscanf(clarg_mwl, "%u", &pm->mwl);
if( (sscanf_rv == 0) || (sscanf_rv == EOF) ) {
fprintf( stderr, "\nwords_conv_mwl(): sscanf() returned 0 or"
" EOF.\n\tParameter is: %s\n", clarg_mwl );
return 0;
}
else {
if( sscanf_rv == 1 )
return 1;
else {
fprintf( stderr, "\nwords_conv_mwl(): sscanf() "
"returned %d.\n\tParameter is: %s\n.",
sscanf_rv, clarg_mwl );
return 0;
}
}
}

unsigned short int words_check_uddc( char *clarg_uddc, struct
words_args *pu ) {
char *cla_uddc = clarg_uddc, ch;
size_t ctr;

for(ctr = 0; *(cla_uddc + ctr) != '\0'; ++ctr) {
if( isspace((int) *(cla_uddc + ctr)) )
continue;
else {
ch = *(cla_uddc + ctr);
if( (ch == 'u') || (ch == 'U') ) {
pu->uddc = 'u';
return 1;
}
else {
fprintf( stderr, "\nwords_check_uddc(): "
"Invalid parameter\n\t: %s\n",
clarg_uddc );
return 0;
}
}
}
return 0;
}



int main( int argc, char *argv[] ) {
int words_rv = 0;
struct words_args args, *pargs = NULL, *pmemb = NULL;
size_t no_words = 0, no_tchars = 0, no_wchars = 0, avgwl = 0;
const char *words_usage = "\nwords - Data about words and characters "
"in a text file.\n\nUsage - words filename [mwl] [delchars] "
"[uddc]\nOptions within square brackets are optional.\n\n"
"filename - Relative or absolute path name of file to be "
"examined.\nmwl - Minimum length, (in characters), a 'word' "
"must have to be counted.\ndelchars - One or more characters "
"to be added to the default set of\nword delimiting characters"
".\nuddc - If this is 'u' or 'U', the default, builtin set of"
" delimiting characters\nwill be ignored, and the one "
"specified on the command line will be used.\n";

if( argc < 2 ) {
fprintf( stderr, words_usage ); /* Print usage message */
exit(EXIT_FAILURE);
}

pargs = &args; /* Initialise pointer to struct and member pointers */
args.nwords = &no_words;
args.tchars = &no_tchars;
args.wchars = &no_wchars;
args.awl = &avgwl;

if( argc == 2 ) { /* Only 'filename' given */
pmemb = &args;
if( !words_open_file(argv[1], "rt", pmemb) )
exit(EXIT_FAILURE);
else {
args.mwl = 1; /* Default value */
args.delchars = NULL; /* Default value */
args.uddc = 0; /* Default value */

words_rv = words( pargs );

if( words_rv == 1 ) {
words_print_results( argv[1], pargs, stdout);
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
else {
fprintf( stderr, "\nwords() returned %d.\n",
words_rv );
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
}
}

if( argc == 3 ) { /* 'filename' and 'mwl' given */
pmemb = &args;
if( !words_open_file(argv[1], "rt", pmemb) )
exit(EXIT_FAILURE);
if( !words_conv_mwl(argv[2], pmemb) )
exit(EXIT_FAILURE);

args.delchars = NULL; /* Default value */
args.uddc = 0; /* Default value */

words_rv = words( pargs );

if( words_rv == 1 ) {
words_print_results( argv[1], pargs, stdout);
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
else {
fprintf( stderr, "\nwords() returned %d.\n",
words_rv );
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
}

if( argc == 4 ) { /* 'filename', 'mwl' and 'delchars' are specified */
pmemb = &args;
if( !words_open_file(argv[1], "rt", pmemb) )
exit(EXIT_FAILURE);
if( !words_conv_mwl(argv[2], pmemb) )
exit(EXIT_FAILURE);

/* Assign 'delchars' CL argument string to corresp. pointer */
args.delchars = argv[3];
args.uddc = 0; /* Default value */

words_rv = words( pargs );

if( words_rv == 1 ) {
words_print_results( argv[1], pargs, stdout);
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
else {
fprintf( stderr, "\nwords() returned %d.\n",
words_rv );
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
}

if( argc == 5 ) { /* 'filename', 'mwl', 'delchars' & 'uddc' given */
pmemb = &args;
if( !words_open_file(argv[1], "rt", pmemb) )
exit(EXIT_FAILURE);
if( !words_conv_mwl(argv[2], pmemb) )
exit(EXIT_FAILURE);
/* Assign 'delchars' CL argument string to corresp. pointer */
args.delchars = argv[3];
/* Verify the last, i.e. the 'uddc', CL argument */
if( !words_check_uddc(argv[4], pmemb) )
exit(EXIT_FAILURE);

words_rv = words( pargs );

if( words_rv == 1 ) {
words_print_results( argv[1], pargs, stdout);
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
else {
fprintf( stderr, "\nwords() returned %d.\n",
words_rv );
if( !words_close_file( pargs->fp ) )
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
}
}

/* Execution will reach here only if 'argc' and thus the command line
* contains excess arguments. 'argc' should be at most 5. For now we
* print a message and exit. Later we can modify the above if
statement
* to accept 5 arguments and ignore the rest.
*/
if( argc > 5 ) {
fprintf( stderr, "\nWarning: Only four command line arguments "
"are supported.\n" );
fprintf( stderr, words_usage );
exit(EXIT_FAILURE);
}
}
/*
----------------------------------------------------------------------------
* END OF main()
*
----------------------------------------------------------------------------
*/

int words( struct words_args *args ) {
int nc;
size_t dss = 0, cwl = 0;
const char *ddcs = " \n\t\r\f\v,"; /* Default delimiting characters */
char *ds = NULL;

/* Arguments verification */
if( (args->delchars == NULL) && (args->uddc != 0) )
return 0;
if( args->fp == NULL )
return 0;
if( (args->nwords == NULL) || (args->tchars == NULL) )
return 0;
if( (args->wchars == NULL) || (args->awl == NULL) )
return 0;

/* If the custom delimiters string is not null, we verify that it ends

* with a null character, and if so, based on whether 'args->uddc' is
* 'U'/'u' or 0, we set the delimiters string 'ds' to point to the
* custom delimiters string exclusively in the former case or allocate
* space on the heap and by means of strcpy() and strcat(), build a
* unified delimiters string, consisting of both the default
delimiters
* 'ddcs' and custom delimiters, in the latter case.
*/
if( args->delchars != NULL ) {
if( strchr(args->delchars, '\0') != NULL ) {
if( args->uddc == 0 ) {
dss = (strlen(ddcs)+strlen(args->delchars)+1);
ds = malloc(dss);
if( ds == NULL )
return 0;
else {
if( strcpy(ds, ddcs) != ds )
return 0;
if( strcat(ds, args->delchars) != ds )
return 0;
}
}
else {
if((args->uddc == 'u') || (args->uddc == 'U'))
ds = args->delchars;
else
return 0;
}
}
else
return 0;
}
else {
if( args->delchars == NULL )
ds = ddcs;
else
return 0;
}

/* If 'mwl' is zero, replace with one. */
if( args->mwl == 0 )
args->mwl = 1;

/* The actual word counting code... */
while( (nc = fgetc(args->fp)) != EOF ) {
if( strchr(ds, nc) != NULL ) {
if( cwl >= args->mwl ) {
++(*args->nwords); /* Increment word count */
++(*args->tchars); /* Inc. total char count */
cwl = 0; /* Reset current word length count */
continue;
}
else {
++(*args->tchars);
cwl = 0;
continue;
}
}
else {
++cwl; /* Increment the current world length counter */
++(*args->tchars); /* Increment total char count */
++(*args->wchars); /* Inc. the words chars count */
continue;
}
}

/* File read returned EOF. If it's file's end, then calculate
* average word length and return successfully. If it's due to a read
* error or otherwise, return failure.
*/
if( feof(args->fp) ) {
/* Average word length is total characters making up words
* divided by the number of words
*/
*args->awl = *args->wchars / *args->nwords;
return 1;
}
else {
if( ferror(args->fp) )
return 0;
else
return 0;
}
}
/* ------------------------------------ */
/* END OF words.c */
/* ------------------------------------ */
 
B

boa

santosh said:
Hello all,

I've put together a small program to count the number of characters and
'words' in a text file. The minimum length of a word, (in terms of no.
of characters), as well as word delimiting characters can be specified
on the command line. The default delimiting characters built into the
program are space, newline, tab, carriage return, form feed, vertical
tab, comma and null. If a 'u' or 'U' is specified as the last command
line argument, this default set is ignored and the characters in the
penultimate command line argument are used.

Now, the problem is that the program is terminated by the OS when the
'minimum word length' argument, (the second command line argument after
the filename), exceeds a certain value which seems to vary from file to
file. As far as I can figure it out, the word counting code, in
function words() shouldn't simply crash.

Can anyone spot any logical mistake or other dubious calculation which
might cause this behaviour?

If 'minimum word length' is set very high, higher than the max length of
any word in the input, you get a divide by zero in words(), approx at
line 411.

/* Average word length is total characters making up words
* divided by the number of words
*/
*args->awl = *args->wchars / *args->nwords;


HTH
boa
 
R

Richard Heathfield

santosh said:
Can anyone spot any logical mistake or other dubious calculation which
might cause this behaviour?

*args->awl = *args->wchars / *args->nwords;

*args->nwords is 0. Oops.

Your program is very complicated, considering its simple task. Why, for
example, are these struct members pointers?

size_t *nwords;
size_t *tchars;
size_t *wchars;
size_t *awl;
 
S

santosh

Richard said:
santosh said:


*args->awl = *args->wchars / *args->nwords;

*args->nwords is 0. Oops.

Yes, should've spotted that. Thanks.
Your program is very complicated, considering its simple task. Why, for
example, are these struct members pointers?

size_t *nwords;
size_t *tchars;
size_t *wchars;
size_t *awl;

Well, originally I planned on passing the structure by value with the
above pointers pointing to the appropriate variables in the caller, but
later changed words() to accept a pointer to struct instead and forgot
to change the above pointers to variables. I will do that now.

Most of the code in the program is error checking at every turn,
otherwise bad CL arguments will cause further misbehaviour. The actual
word counting function seems simple enough to me, though as you point
out above, I can avoid the constant indirections.
 
R

Richard Heathfield

santosh said:
Well, originally I planned on passing the structure by value with the
above pointers pointing to the appropriate variables in the caller,

Just as a rule of thumb, it's generally (i.e. practically always!) best to
pass the address of the structure (as you have now chosen to do). If you
don't want the called function to modify the structure whose address is
passed, pass it as const struct T *p rather than struct T *p.
 
S

santosh

Richard said:
Just as a rule of thumb, it's generally (i.e. practically always!) best to
pass the address of the structure (as you have now chosen to do).

Yes, I guess passing a copy of a structure is required far less often
than by reference.
If you don't want the called function to modify the structure whose address is
passed, pass it as const struct T *p rather than struct T *p.

Okay, but the callee can make a copy of the const struct T *p and use
that.

An array name is also a const pointer, but modification is allowed in
that case...
 
R

Richard Heathfield

santosh said:
Okay, but the callee can make a copy of the const struct T *p and use
that.

Sure, but adding const ensures that the callee can't change the struct's
contents itself.
An array name is also a const pointer,

No, it isn't.
 
R

Richard Heathfield

santosh said:
Sorry about that. I'm still quite new to C.
I should have said it's a pointer constant, not a const pointer.

It isn't even a pointer constant. It's an array name. When used in a value
context, however, we must apply what Chris Torek calls "The Rule":

A = *(A + i) by definition, from the Standard

=> &A = &*(A + i)

=> &A = (A + i)

=> &A[0] = (A + 0)

=> &A[0] = A

(i.e. the name of an array, used in a value context, decays to a pointer to
the array's first element).

If this is what you meant, you are correct. Otherwise, you are not correct.
 
C

Chris Torek

santosh said:
Sorry about that. I'm still quite new to C.
I should have said it's a pointer constant, not a const pointer.

Even this is not quite right, for two reasons. One is somewhat
minor, but still significant: it is not (or at least not necessarily)
a *constant*:

#include <stdio.h>
void f(void) {
char a[10];
printf("%p\n", (void *)a);
}
void g(void) {
f();
}
int main(void) {
f();
g();
return 0;
}

This will often print two different numbers (although some systems
will produce just one, in which case "a" does appear to be a constant
after all). Note, however, that if we make "a" static, it is quite
likely to produce the same number for both printf()s (since the
output from %p is implementation defined, we cannot predict with
100% certainty that it *will* be the same number, though).

The more important reason not to say that an array "is" a pointer
is because an array is *not* a pointer. An array is an array, and
a pointer is a pointer. An array name used where a value is needed
*becomes* a pointer, but this pointer is *computed* (at compile
and/or run time as appropriate) by the compiler, in exactly the
same way that the address of any other object is computed.

See also <http://c-faq.com/aryptr/index.html> (all of section 6!).
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,954
Messages
2,570,116
Members
46,704
Latest member
BernadineF

Latest Threads

Top