reading text files

E

EkteGjetost

I would like to first apologize to those of you who read my last post
"desperately need help". As a regular on other forums i can understand how
aggravating it would be to have someone come on who obviously doesn't know
the community and asks for people to do their work for them.

So i've come much more prepared this time.

What my problem is, is that i need to write a program that will count the
number of alphabetic characters, numbers, punctuation marks, and spaces
from a text file.

Here's what i've done so far.

#include <stdio.h>
#include <ctype.h>

void countAlpha (FILE *infile, FILE *outfile, char alphabet);
void countDigit (FILE *infile, FILE *outfile, char numbers);
void countPunct (FILE *infile, FILE *outfile, char punctuation;
void countSpace (FILE *infile, FILE *outfile, char spaces);

int main()
{
FILE *infile;
FILE *outfile;
char alphabet = 0;
char numbers = 0;
char punctuation = 0;
char spaces = 0;

infile = fopen( "input.txt", "r");
if(infile == NULL)
{
printf("Cannot read input file: input.txt\n");
return 100;
}
outfile = fopen( "output.txt", "w");
if(outfile == NULL)
{
printf("Cannot open outputfile: output.txt\n");
return 100;
}

countAlpha(infile, outfile, alphabet);
countDigit(infile, outfile, numbers);
countPunct(infile, outfile, punctuation);
countSpace(infile, outfile, spaces);
return 0;
}

void countAlpha (FILE *infile, FILE *outfile, char alphabet)
{
fscanf(infile, "%c", alphabet);

while(isalpha(alphabet));
alphabet=getchar();
// i'm pretty sure this while loop is where the problem is

fprintf(outfile, "Alphabetic Characters: %c\n", alphabet);

and i just repeated the same things basically for each function after
that.

When i try to run this i get an error before what seems like anything else
happens.
 
R

Ravi Uday

EkteGjetost said:
I would like to first apologize to those of you who read my last post
"desperately need help". As a regular on other forums i can understand how
aggravating it would be to have someone come on who obviously doesn't know
the community and asks for people to do their work for them.

So i've come much more prepared this time.

What my problem is, is that i need to write a program that will count the
number of alphabetic characters, numbers, punctuation marks, and spaces
from a text file.

Here's what i've done so far.

#include <stdio.h>
#include <ctype.h>

void countAlpha (FILE *infile, FILE *outfile, char alphabet);
void countDigit (FILE *infile, FILE *outfile, char numbers);
void countPunct (FILE *infile, FILE *outfile, char punctuation;
void countSpace (FILE *infile, FILE *outfile, char spaces);

int main()
{
FILE *infile;
FILE *outfile;
char alphabet = 0;
char numbers = 0;
char punctuation = 0;
char spaces = 0;

infile = fopen( "input.txt", "r");
if(infile == NULL)
{
printf("Cannot read input file: input.txt\n");
return 100;
}
outfile = fopen( "output.txt", "w");
if(outfile == NULL)
{
printf("Cannot open outputfile: output.txt\n");
return 100;
}

countAlpha(infile, outfile, alphabet);
countDigit(infile, outfile, numbers);
countPunct(infile, outfile, punctuation);
countSpace(infile, outfile, spaces);
return 0;
}

void countAlpha (FILE *infile, FILE *outfile, char alphabet)
{
fscanf(infile, "%c", alphabet);

while(isalpha(alphabet));

Why are you having a semi-colon at the end of while ???
alphabet=getchar();
// i'm pretty sure this while loop is where the problem is

you got it :)
 
E

EkteGjetost

Alright so here's what it looks like "complete" without the pair of
semicolons in the while loops.

#include <stdio.h>
#include <ctype.h>

void countAlpha (FILE *infile, FILE *outfile, char alphabet);
void countDigit (FILE *infile, FILE *outfile, char numbers);
void countPunct (FILE *infile, FILE *outfile, char punctuation);
void countSpace (FILE *infile, FILE *outfile, char spaces);

int main()
{
FILE *infile;
FILE *outfile;
char alphabet = 0;
char numbers = 0;
char punctuation = 0;
char spaces = 0;

infile = fopen( "input.txt", "r");
if(infile == NULL)
{
printf("Cannot read input file: input.txt\n");
return 100;
}
outfile = fopen( "output.txt", "w");
if(outfile == NULL)
{
printf("Cannot open outputfile: output.txt\n");
return 100;
}

countAlpha(infile, outfile, alphabet);
countDigit(infile, outfile, numbers);
countPunct(infile, outfile, punctuation);
countSpace(infile, outfile, spaces);
return 0;
}

void countAlpha (FILE *infile, FILE *outfile, char alphabet)
{
fscanf(infile, "%c", alphabet);

while(isalpha(alphabet))
alphabet=getchar();
// i'm pretty sure this while loop is where the problem is

fprintf(outfile, "Alphabetic Characters: %c\n", alphabet);
}

void countDigit (FILE *infile, FILE *outfile, char numbers)
{
fscanf(infile, "%c", numbers);

while(isdigit(numbers))
numbers=getchar();

fprintf(outfile, "Numerical Characters: %c\n", numbers);
}

void countPunct (FILE *infile, FILE *outfile, char punctuation)
{
fscanf(infile, "%c", punctuation);

while(ispunct(punctuation))
punctuation=getchar();

fprintf(outfile, "Punctuation Characters: %c\n", punctuation);
}

void countSpace (FILE *infile, FILE *outfile, char spaces)
{
fscanf(infile, "%c", spaces);

while(isspace(spaces))
spaces=getchar();

fprintf(outfile, "White Spaces: %c\n", spaces);
}

unfortunately i still get an error...
am i not allowed to do 4 different scans of the same file like that or
something?
 
E

EkteGjetost

Alright so here's what it looks like "complete" without the pair of
semicolons in the while loops.

#include <stdio.h>
#include <ctype.h>

void countAlpha (FILE *infile, FILE *outfile, char alphabet);
void countDigit (FILE *infile, FILE *outfile, char numbers);
void countPunct (FILE *infile, FILE *outfile, char punctuation);
void countSpace (FILE *infile, FILE *outfile, char spaces);

int main()
{
FILE *infile;
FILE *outfile;
char alphabet = 0;
char numbers = 0;
char punctuation = 0;
char spaces = 0;

infile = fopen( "input.txt", "r");
if(infile == NULL)
{
printf("Cannot read input file: input.txt\n");
return 100;
}
outfile = fopen( "output.txt", "w");
if(outfile == NULL)
{
printf("Cannot open outputfile: output.txt\n");
return 100;
}

countAlpha(infile, outfile, alphabet);
countDigit(infile, outfile, numbers);
countPunct(infile, outfile, punctuation);
countSpace(infile, outfile, spaces);
return 0;
}

void countAlpha (FILE *infile, FILE *outfile, char alphabet)
{
fscanf(infile, "%c", alphabet);

while(isalpha(alphabet))
alphabet=getchar();
// i'm pretty sure this while loop is where the problem is

fprintf(outfile, "Alphabetic Characters: %c\n", alphabet);
}

void countDigit (FILE *infile, FILE *outfile, char numbers)
{
fscanf(infile, "%c", numbers);

while(isdigit(numbers))
numbers=getchar();

fprintf(outfile, "Numerical Characters: %c\n", numbers);
}

void countPunct (FILE *infile, FILE *outfile, char punctuation)
{
fscanf(infile, "%c", punctuation);

while(ispunct(punctuation))
punctuation=getchar();

fprintf(outfile, "Punctuation Characters: %c\n", punctuation);
}

void countSpace (FILE *infile, FILE *outfile, char spaces)
{
fscanf(infile, "%c", spaces);

while(isspace(spaces))
spaces=getchar();

fprintf(outfile, "White Spaces: %c\n", spaces);
}

unfortunately i still get an error...
am i not allowed to do 4 different scans of the same file like that or
something?
 
C

CBFalconer

EkteGjetost said:
.... snip ...

What my problem is, is that i need to write a program that will
count the number of alphabetic characters, numbers, punctuation
marks, and spaces from a text file.

Here's what i've done so far.

#include <stdio.h>
#include <ctype.h>

void countAlpha (FILE *infile, FILE *outfile, char alphabet);
.... snip ...

What possible use are these? What do you expect the functions to
do?
int main()

Use either "int main(void)" or "int main(int argc, char **argv)".
{
FILE *infile;
FILE *outfile;

What are these files for? You already have stdin available for
input, and stdout for output. Use them.
char alphabet = 0;
char numbers = 0;
char punctuation = 0;
char spaces = 0;

infile = fopen( "input.txt", "r");
if(infile == NULL)
{
printf("Cannot read input file: input.txt\n");
return 100;
}
outfile = fopen( "output.txt", "w");
if(outfile == NULL)
{
printf("Cannot open outputfile: output.txt\n");
return 100;
}

Using stdin and stdout you don't need any of the above. Besides
countAlpha(infile, outfile, alphabet);
countDigit(infile, outfile, numbers);
countPunct(infile, outfile, punctuation);
countSpace(infile, outfile, spaces);
return 0;
}

void countAlpha (FILE *infile, FILE *outfile, char alphabet)
{
fscanf(infile, "%c", alphabet);
.... snip rest ...

Never use fscanf if you don't understand it.

You need nothing but a loop in main which reads characters into an
int (not a char) until EOF, and classifies them. Something like:

while (EOF != (ch = getc(stdin)) {
/* classify whatever is in ch and increment something */
}
/* spit out the totals in the counters */

You should be able to run and verify the above loop, even though it
produces no output. Then you can add the "spit out" code, which
will probably show zeroes since you have never incremented
anything. Then you can build some sort of if / else if / else loop
to handle the classification.
 
K

Keith Thompson

EkteGjetost said:
unfortunately i still get an error...

What error do you get (or should we guess)?
am i not allowed to do 4 different scans of the same file like that or
something?

Why would you want to scan the same file 4 times?
 
M

Malcolm

EkteGjetost said:
#include <stdio.h>
#include <ctype.h>

void countAlpha (FILE *infile, FILE *outfile, char alphabet);
void countDigit (FILE *infile, FILE *outfile, char numbers);
void countPunct (FILE *infile, FILE *outfile, char punctuation;
void countSpace (FILE *infile, FILE *outfile, char spaces);

int main()
Why not pass in the file as a parameter?

int main(int argc, char **argv)
{
FILE *fpin;
if(argc != 2)
{
/* write a function that prints out a message about how to use the
program*/
usage();
exit(EXIT_FAILURE);
}

/* open the file */
fpin = fopen(argv[1], "r");

/* rest of program here */
}
{
FILE *infile;
FILE *outfile;
char alphabet = 0;
char numbers = 0;
char punctuation = 0;
char spaces = 0;

infile = fopen( "input.txt", "r");
if(infile == NULL)
{
printf("Cannot read input file: input.txt\n");
return 100;
}
outfile = fopen( "output.txt", "w");
if(outfile == NULL)
{
printf("Cannot open outputfile: output.txt\n");
return 100;
}
countAlpha(infile, outfile, alphabet);
countDigit(infile, outfile, numbers);
countPunct(infile, outfile, punctuation);
countSpace(infile, outfile, spaces);

The problem with this is that you are trying to parse the same stream four
times.

A better way is to declare a buffer of 1024 bytes. Then call fgets(). If the
line is longer than 1024 characters there will be no newline in the end.
This probably indicates a corrupt file, so you can reject it (you might need
to check max line length with whoever wrote the spec).

Then write four functions
int countAlpha(const char *line);
int countDigit(const char *line);

etc

call each function on the lines you input, and keep four running totals.
Then output at the end.
 
R

Richard Bos

Malcolm said:
The problem with this is that you are trying to parse the same stream four
times.

A better way is to declare a buffer of 1024 bytes. Then call fgets(). If the
line is longer than 1024 characters there will be no newline in the end.
This probably indicates a corrupt file, so you can reject it (you might need
to check max line length with whoever wrote the spec).

Then write four functions
int countAlpha(const char *line);
int countDigit(const char *line);

etc

call each function on the lines you input, and keep four running totals.
Then output at the end.

What a dreadful solution! This means that the file will have to be split
into bunches of 1024 characters, essentially a random number; you need
more memory than necessary; and you keep calling these functions over
and over, for no good reason. They need only be called once.

I suggest the OP solve his homework(!) problem by looking up the
Standard function rewind(), from <stdio.h>.

Richard
 
D

Dave Thompson

I would like to first apologize to those of you who read my last post
"desperately need help". As a regular on other forums i can understand how
aggravating it would be to have someone come on who obviously doesn't know
the community and asks for people to do their work for them.

So i've come much more prepared this time.

What my problem is, is that i need to write a program that will count the
number of alphabetic characters, numbers, punctuation marks, and spaces
from a text file.
As others have noted, the "best" solution, for common values of
"best", is to process the file once, keeping all four counts at the
same time. But even for the one-at-a-time approach you have, which may
be preferable or at least reasonable in some more complicated
situations, you have some pretty basic problems.

Since enough time has passed that this probably can't be homework --
and you're unusually polite -- I'll explain more completely.
Here's what i've done so far.

#include <stdio.h>
#include <ctype.h>

void countAlpha (FILE *infile, FILE *outfile, char alphabet);
void countDigit (FILE *infile, FILE *outfile, char numbers);
void countPunct (FILE *infile, FILE *outfile, char punctuation;
void countSpace (FILE *infile, FILE *outfile, char spaces);
See below about the third parameter to these functions ...
int main()
{
FILE *infile;
FILE *outfile;
char alphabet = 0;
char numbers = 0;
char punctuation = 0;
char spaces = 0;
.... and these variables.
infile = fopen( "input.txt", "r");
if(infile == NULL)
{
printf("Cannot read input file: input.txt\n");
return 100;
}
outfile = fopen( "output.txt", "w");
if(outfile == NULL)
{
printf("Cannot open outputfile: output.txt\n");
return 100;
}
A process exit status of 100 is not portable; standard C provides only
zero, and EXIT_SUCCESS and EXIT_FAILURE from stdlib.h. Even on the
many systems where 0 to 255 works, 100 is an unusual value to choose.
I would suggest you use EXIT_FAILURE when posting here, just to avoid
unnecessarily repeated discussion of the issue, and if you want change
it to some other value on your own system(s).
countAlpha(infile, outfile, alphabet);
countDigit(infile, outfile, numbers);
countPunct(infile, outfile, punctuation);
countSpace(infile, outfile, spaces);
return 0;

While the C runtime will fclose() all fopen'ed files for you, some
people, including me, consider it better to do so explicitly. This
also allows you to check for some errors, which for output files
especially don't "appear" until close, although in this case there
isn't much you could reasonably do if you do detect an error.
}

void countAlpha (FILE *infile, FILE *outfile, char alphabet)
{

It is not necessary for 'alphabet' to be a parameter passed from the
caller -- the caller's value is not used, nor needed, for anything --
and is actively misleading. A local variable is better.
fscanf(infile, "%c", alphabet);
The 3rd-and-up arguments to fscanf (and sscanf, and 2nd-and-up to
scanf) must be pointers; this passes and uses at best a completely
wrong pointer and quite possibly isn't even a working call.

If you made it fscanf (infile, "%c", &alphabet) it would be legal, but
except for errors, which you don't handle anyway, equivalent to
alphabet = fgetc /* or getc */ (infile);
which is more specific and thus I think clearer.
while(isalpha(alphabet));
alphabet=getchar();
// i'm pretty sure this while loop is where the problem is
It sure is. First, you've already been told that
while (condition) ; /* dubious semicolon here */
is an empty loop -- it evaluates the condition; if true, it executes
an empty body and evaluates the condition again; et cetera. If as in
this case the condition has no side effects, if true the first time it
is still true every subsequent time and this is an infinite loop.

Even if you changed it to:
while( isalpha(alphabet) ) /* no semicolon! */
alphabet = getchar();
it tries to read from stdin not your selected input file; fix that and
while( isalpha(alphabet) )
alphabet = fgetc (infile);
is wrong logic: this counts the number of _consecutive_ alphabetic
characters at the beginning of the input (file). Plus, depending on
whether 'plain' char is signed on your system, it may malfunction when
it reaches end-of-file, (only) if the input is entirely alphabetic.
fprintf(outfile, "Alphabetic Characters: %c\n", alphabet);
Even if your loop above was correct, this would simply print the first
character encountered that is not alphabetic.

What you want is to read _every_ character from the file; count how
many are of the particular type(s) you are looking for; and then print
that _count_ (or those counts).

int c = fgetc (infile);
/* note not char; the return value of fgetc, getc, or getchar has
an "extended" range: EITHER an unsigned char value, OR
the value EOF which is a negative int usually -1 */
int n = 0;
/* or unsigned, or maybe long or unsigned long depending on
how much input you want/need to handle */
while( c != EOF ) {
if( isalpha(c) )
++n; /* or n += 1 or n = n + 1 if you prefer */
/* could do other types in parallel here */
c = fgetc (infile);
}
fprintf (outfile, "count is %d\n", n); /* or %u %ld %lu */

or you can put the getchar() call (once) within the condition:
int c;
int /* or whatever */ n = 0;
while( (c = fgetc (infile)) != EOF )
if( isalpha(c) ) ++n;

or if you really want you can use fscanf, but check the result:
char c; /* not int; now the exception case is handled differently */
int /* or whatever */ n = 0;
while( fscanf (infile, "%c", &c) == 1 )
if( isalpha(c) ) ++n;
and i just repeated the same things basically for each function after
that.

When i try to run this i get an error before what seems like anything else
happens.

- David.Thompson1 at worldnet.att.net
 
D

Dave Thompson

int c = fgetc (infile);
/* note not char; the return value of fgetc, getc, or getchar has
an "extended" range: EITHER an unsigned char value, OR
the value EOF which is a negative int usually -1 */
if( isalpha(c) )
++n; /* or n += 1 or n = n + 1 if you prefer */

Safe; getchar/fgetc/getc value != EOF is valid unsigned char.

or if you really want you can use fscanf, but check the result:
char c; /* not int; now the exception case is handled differently */
int /* or whatever */ n = 0;
while( fscanf (infile, "%c", &c) == 1 )
if( isalpha(c) ) ++n;
Unsafe: plain char may be signed and input values might be negative;
use isalpha( (unsigned char)c ), or just make c unsigned char to start
with. Sorry.

<snip>

- David.Thompson1 at worldnet.att.net
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,995
Messages
2,570,225
Members
46,815
Latest member
treekmostly22

Latest Threads

Top