Wrap rev 2.

N

name

Back again for more critique... <grin>

------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

#define MAX 10000

/*
wrap.c inserts newlines in place of spaces according to specified
line length. Output filename is {filename}.wrap. Takes two arguments,
filename and line length.

Done: Checks file handling, argument parameters, file type,
and word/line length comparison.

Todo: Need to figure out what sort of memory the larger files might need.
*/

void close(FILE *fp1, FILE *fp2)
{
fclose(fp1);
fclose(fp2);
}

int wordwrap(FILE *ifp, FILE *ofp, char *wl)
{
char buf[MAX];
int c, i, j, space, count, length;

length = atoi(wl);

for (j = 0; j < MAX && ((c=getc(ifp)) != EOF); ++j)
buf[j] = c;

for (i = 0; i < j; ++i)
{
if ((int)buf < 0)
return 1;
if (buf == '\n')
count = space = 0;
if (buf == '\t')
count = count + 8;
if (buf == ' ')
space = i;

++count;

if ((count == length + 1) && (space == 0))
return 2;
else if ((count == length) && (space != 0))
{
buf[space] = '\n';
count = i - space;
}
}

for (i = 0; i < j; ++i)
putc(buf, ofp);
return 0;
}

int main(int argc, char *argv[])
{
FILE *fp1;
FILE *fp2;

char *prog = argv[0];
char *filename1 = argv[1];
char filename2[80];
char *wl = argv[2];

if (argc != 3)
{
printf("Usage: %s: filename, wrap length\n", prog);
return EXIT_FAILURE;
}

if (strlen(argv[1]) > 32)
{
printf("Filename limited to 32 characters. Sorry...\n");
return EXIT_FAILURE;
}

strcpy(filename2, argv[1]);
strcat(filename2, ".wrap");

if (atoi(wl) > 80)
{
printf("Line length limit: 80. Better is < 75.\n");
return EXIT_FAILURE;
}
if (atoi(wl) < 0)
{
printf("Line length must be a positive number.\n");
return EXIT_FAILURE;
}
if ((fp1 = fopen(filename1, "r")) == NULL)
{
fprintf(stderr, "%s: can't open %s\n", prog, filename1);
return EXIT_FAILURE;
}
else if ((fp2 = fopen(filename2, "w")) == NULL)
{
fprintf(stderr, "%s: can't open %s\n", prog, filename2);
return EXIT_FAILURE;
}

switch(wordwrap(fp1, fp2, wl))
{
case 0:
{
printf("Wrapping %s at %s\n", filename1, wl);
printf("Output file adds .wrap to input filename.\n");
close(fp1, fp2);
if (ferror(fp2))
{
fprintf(stderr, "%s: error writing %s\n", prog, filename2);
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
case 1:
{
printf("Not an ASCII file.\n");
close(fp1, fp2);
return EXIT_FAILURE;
}
case 2:
{
printf("Word length exceeds specified line length.\n");
close(fp1, fp2);
return EXIT_FAILURE;
}
default:
{
printf("Unexplained program failure.\n");
close(fp1, fp2);
return EXIT_FAILURE;
}
}
}

----------------------------------------------------

I went with switch/case to avoid having to rewind the file for each check.
Is there a better approach to this?

Thanks for reading and reviewing!
 
A

Arthur J. O'Dwyer

Back again for more critique... <grin>

Apparently you still have some bugs to work out.

% cat test.txt
12345678901234567890 a
12345678901234567890 ab c defghijklmn abcedfhgijk
% ./a.out test.txt 21 ; cat test.txt.wrap
Wrapping test.txt at 21
Output file adds .wrap to input filename.
12345678901234567890
a
12345678901234567890 ab c defghijklmn abcedfhgijk
%

[...]
Todo: Need to figure out what sort of memory the larger files might need.

Watch those extra-long lines and hard tabs in Usenet posts.
Seventy-five characters, please!
http://www.contrib.andrew.cmu.edu/~ajo/free-software/usenetify2.c

void close(FILE *fp1, FILE *fp2)
{
fclose(fp1);
fclose(fp2);
}

This function seems superfluous. And you should be aware that
some Unixy platforms provide a non-standard 'close' function that
might cause some compilers to complain about the name conflict in
non-conforming mode. Doesn't mean you have to change the name, but
at least you'll know what to tell the guys who complain that your
code doesn't compile with their compiler: "use ANSI-standard mode!"

int wordwrap(FILE *ifp, FILE *ofp, char *wl)
{
char buf[MAX];
int c, i, j, space, count, length;

Inconsistent indentation here. And I don't see you initializing
'space' or 'count' anywhere. (gcc told me about this problem, BTW;
are you really compiling with the highest warning levels?)
length = atoi(wl);

Wouldn't it make a lot more sense to perform this conversion
in 'main', rather than delaying it until 'wordwrap'? What does the
delay accomplish? (And 'wl' should be a 'const char *' even if you
decide to keep the conversion here; you don't modify its target.)
for (j = 0; j < MAX && ((c=getc(ifp)) != EOF); ++j)
buf[j] = c;

for (i = 0; i < j; ++i)
{
if ((int)buf < 0)
return 1;


(This is the "not an ASCII file" return code.) What about files
containing, say, bytes with values greater than 126? Aren't they
non-ASCII too? (Consider both signed and unsigned 'char' platforms.)
I suggest you get rid of this check altogether; let the user worry
about file types. Most binary files won't pass your "word length"
check anyway; and even if you do accidentally wrap a binary file,
there's no harm done. So let the user worry about it.
if (buf == '\n')
count = space = 0;
if (buf == '\t')
count = count + 8;


This is not the traditional meaning of '\t'. Traditionally,
a tab code means: "Advance the cursor to the next tab stop." Tab
stops often come every 8 characters, but if the cursor is currently
at position 3, and we see a tab, we ought to be advancing to
position 8: five spaces, not eight. Get pencil and paper and work
out the proper expression; it's not hard.
Consider letting the user specify the tab width; what if he uses
four-space tabs?
Why don't you have 'space = i;' here as well? Isn't a tab a
reasonable place to break a line?
if (buf == ' ')
space = i;

++count;

if ((count == length + 1) && (space == 0))


Consider the following test case. What goes wrong, and how can
you fix it? (The ^I symbols in the output from 'cat' represent
horizontal tabs.)

% cat -T test.txt
^I^I^Iabcdef ghijkl mnopqr stuvwx yzyzyz
^I^I^Iabcde ghijk lmnop qrstu wxyzy zxyz
% ./a.out test.txt 21 ; cat -T test.txt.wrap
Wrapping test.txt at 21
Output file adds .wrap to input filename.
^I^I^Iabcdef ghijkl mnopqr stuvwx yzyzyz
^I^I^Iabcde ghijk lmnop qrstu wxyzy zxyz
%
return 2;
else if ((count == length) && (space != 0))
{
buf[space] = '\n';
count = i - space;
}
}

for (i = 0; i < j; ++i)
putc(buf, ofp);
return 0;
}

int main(int argc, char *argv[])
{
FILE *fp1;
FILE *fp2;

char *prog = argv[0];
char *filename1 = argv[1];


If 'argv' is 0, you've just crashed the program.
char filename2[80];
char *wl = argv[2];

If 'argv' is 0 or 1, you've just crashed the program.
if (argc != 3)
{
printf("Usage: %s: filename, wrap length\n", prog);
return EXIT_FAILURE;
}

if (strlen(argv[1]) > 32)
{
printf("Filename limited to 32 characters. Sorry...\n");
return EXIT_FAILURE;
}

You may know this already, but the above is a ridiculous restriction.
Do you know about 'malloc' and 'free'? Can you see how dynamic memory
allocation could be applied here?
strcpy(filename2, argv[1]);
strcat(filename2, ".wrap");

if (atoi(wl) > 80)
{
printf("Line length limit: 80. Better is < 75.\n");
return EXIT_FAILURE;
}

But your 'wordwrap' function can handle up to 'MAX' characters in
a single line, can't it? Why restrict yourself? (Again, dynamic
memory allocation could be useful here. You only need on the order
of 'atoi(wl)' bytes in 'buf', after all.)
if (atoi(wl) < 0)

Over and over you repeat the same call to 'atoi', the same call
you make again inside 'wordwrap'. This is crying out for a local
'int' variable!
{
printf("Line length must be a positive number.\n");
return EXIT_FAILURE;
}
if ((fp1 = fopen(filename1, "r")) == NULL)
{
fprintf(stderr, "%s: can't open %s\n", prog, filename1);
return EXIT_FAILURE;
}
else if ((fp2 = fopen(filename2, "w")) == NULL)
{
fprintf(stderr, "%s: can't open %s\n", prog, filename2);
return EXIT_FAILURE;
}

switch(wordwrap(fp1, fp2, wl))
{
case 0:
{
printf("Wrapping %s at %s\n", filename1, wl);
printf("Output file adds .wrap to input filename.\n");
close(fp1, fp2);
if (ferror(fp2))

I don't think you can portably query 'ferror' once you've already
closed the file in question. But I'm not sure; check the manual pages
or the Standard if you want to make sure.
{
fprintf(stderr, "%s: error writing %s\n", prog, filename2);

Line too long for Usenet again.
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
case 1:
{
printf("Not an ASCII file.\n");
close(fp1, fp2);
return EXIT_FAILURE;
}
case 2:
{
printf("Word length exceeds specified line length.\n");
close(fp1, fp2);
return EXIT_FAILURE;
}
default:
{
printf("Unexplained program failure.\n");

Can this ever happen? If not, I suggest it would be good practice
to put a comment to that effect; e.g.,

puts("Must be a bug! Contact the author!");
close(fp1, fp2);
return EXIT_FAILURE;
}
}
}

You could have written

int rc;
[...]
rc = wordwrap(...);
if (rc == 1) { ... }
else if (rc == 2) { ... }
else { ... }

but the way you did it is very good. This is the sort of thing
'switch' was made for.

Regardless, you certainly don't want to be "rewinding the file" (i.e.,
re-calling 'wordwrap' and processing the whole file several times) in
your program. Do the wrapping once, and then check to see whether it
succeeded or not. You don't have to do the wrapping twice in order to
make two checks on the return value!

Okay. Fix the bugs, lose the hard tabs, and try again. :)

HTH,
-Arthur
 
C

CBFalconer

name said:
Back again for more critique... <grin>

.... snip all ...

Don't keep starting new threads for the same subject. Generate
replies to articles already in the thread.
 
N

name

... snip all ...

Don't keep starting new threads for the same subject. Generate
replies to articles already in the thread.

Not the same subject. The first thread was for the first attempt, the
second thread for the first revision, and this third thread for the second
revision. In each thread, if you have been following, you will note that I
thank the contributors and assert that I shall return at some point in the
future with another offering. If the time between offerings is not
immediate, a new thread should be started; this is in fact the practice with
thread continuations that are not timely.

I'm surprised at your admonishment, as you are obviously more than
adequately knowledgable about Usenet practices. But then, perhaps you have
not been following the threads themselves...
 
N

name

Apparently you still have some bugs to work out.

% cat test.txt
12345678901234567890 a
12345678901234567890 ab c defghijklmn abcedfhgijk
% ./a.out test.txt 21 ; cat test.txt.wrap
Wrapping test.txt at 21
Output file adds .wrap to input filename.
12345678901234567890
a
12345678901234567890 ab c defghijklmn abcedfhgijk
%

Interesting!! I'll take a look at that.
[...]
Todo: Need to figure out what sort of memory the larger files might need.

Watch those extra-long lines and hard tabs in Usenet posts.
Seventy-five characters, please!
http://www.contrib.andrew.cmu.edu/~ajo/free-software/usenetify2.c

Ummmm... I got no warning from my client editor. But then perhaps I should
not have expected one as this was a file insertion... but my code editor is
set to 75 as well... a mystery! LOL!!!
This function seems superfluous. And you should be aware that
some Unixy platforms provide a non-standard 'close' function that
might cause some compilers to complain about the name conflict in
non-conforming mode. Doesn't mean you have to change the name, but
at least you'll know what to tell the guys who complain that your
code doesn't compile with their compiler: "use ANSI-standard mode!"

Well, it was sort of an afterthought, actually. I was looking at the switch
case section and noted the repetitive fclose statements. So I thought I'd
try to extract them to a function, although I noted I didn't save any lines.
In the future, however, if more checks are implemented, there would be some.

My compile routine is: gcc -g -Wall -ansi -pedantic -o foo foo.c. And I
have had no thought that anyone might even be interested in this almost
certainly kindergarten level C code! It's only my own learning exercise; I
must believe that better versions than this are part of standard utilities
somewhere....
int wordwrap(FILE *ifp, FILE *ofp, char *wl)
{
char buf[MAX];
int c, i, j, space, count, length;

Inconsistent indentation here. And I don't see you initializing
'space' or 'count' anywhere. (gcc told me about this problem, BTW;
are you really compiling with the highest warning levels?)

Ummmm.... WTH?!? I was sure I had corrected this before posting!?

Wouldn't it make a lot more sense to perform this conversion
in 'main', rather than delaying it until 'wordwrap'? What does the
delay accomplish? (And 'wl' should be a 'const char *' even if you
decide to keep the conversion here; you don't modify its target.)

Well, I could have converted it at any point, but don't see that it makes a
difference. Except, if it should be 'const char *', then it should probably
be converted in main. I'll look at this.
for (j = 0; j < MAX && ((c=getc(ifp)) != EOF); ++j)
buf[j] = c;

for (i = 0; i < j; ++i)
{
if ((int)buf < 0)
return 1;


(This is the "not an ASCII file" return code.) What about files
containing, say, bytes with values greater than 126? Aren't they
non-ASCII too? (Consider both signed and unsigned 'char' platforms.)


Oh.... I knew there was something going on here. I tried all the wrong
statements. The latest change was from (!(0 < (int)buf < 127)). What
should have been, and not realizing the difference was between signed and
unsigned 'char', I didn't do the obvious: (((int)buf < 0) ||
((int)buf > 127)). I should have done that first, even if I didn't see
the cause. I have been instructed!!
I suggest you get rid of this check altogether; let the user worry
about file types. Most binary files won't pass your "word length"
check anyway; and even if you do accidentally wrap a binary file,
there's no harm done. So let the user worry about it.

Yeah, I noticed that. Again, I've no thought that anyone else would want to
use this code, so my addition of checks is for "full generality" (got that
from K&R2... <grin>). In reality, this check has no value in this code.

My use would be within a shell script wrapper that does some of these
checks, though which ones aren't immediately obvious.
if (buf == '\n')
count = space = 0;
if (buf == '\t')
count = count + 8;


This is not the traditional meaning of '\t'. Traditionally,
a tab code means: "Advance the cursor to the next tab stop." Tab
stops often come every 8 characters, but if the cursor is currently
at position 3, and we see a tab, we ought to be advancing to
position 8: five spaces, not eight. Get pencil and paper and work
out the proper expression; it's not hard.


I know that tab is not intended for indentation, but for columnation.
Nevertheless, I did it because it seemed a reasonable expectation for the
"mythical (l)user". Just an arbitrary choice, I guess.
Consider letting the user specify the tab width; what if he uses
four-space tabs?
Why don't you have 'space = i;' here as well? Isn't a tab a
reasonable place to break a line?

Ummm... well, I thought about that, and decided that if fewer spaces were
used, the line wraps would be short rather than too long. "space = i"?
Don't understand this. And a tab doesn't seem a reasonable place to expect
to wrap... <grin>

Problem is, full generality should handle all this and do so gracefully.
More work to be done.
if (buf == ' ')
space = i;

++count;

if ((count == length + 1) && (space == 0))


Consider the following test case. What goes wrong, and how can
you fix it? (The ^I symbols in the output from 'cat' represent
horizontal tabs.)

% cat -T test.txt
^I^I^Iabcdef ghijkl mnopqr stuvwx yzyzyz
^I^I^Iabcde ghijk lmnop qrstu wxyzy zxyz
% ./a.out test.txt 21 ; cat -T test.txt.wrap
Wrapping test.txt at 21
Output file adds .wrap to input filename.
^I^I^Iabcdef ghijkl mnopqr stuvwx yzyzyz
^I^I^Iabcde ghijk lmnop qrstu wxyzy zxyz
%


Ah, okay. I'll check this out. Thanks!
return 2;
else if ((count == length) && (space != 0))
{
buf[space] = '\n';
count = i - space;
}
}

for (i = 0; i < j; ++i)
putc(buf, ofp);
return 0;
}

int main(int argc, char *argv[])
{
FILE *fp1;
FILE *fp2;

char *prog = argv[0];
char *filename1 = argv[1];


If 'argv' is 0, you've just crashed the program.


Yeah.. <grin> argv[0] is the program name for warnings.

On the other hand, if argv[0] == '0', more than just the program has
problems!!
char filename2[80];
char *wl = argv[2];

If 'argv' is 0 or 1, you've just crashed the program.

As above...
if (argc != 3)
{
printf("Usage: %s: filename, wrap length\n", prog);
return EXIT_FAILURE;
}

if (strlen(argv[1]) > 32)
{
printf("Filename limited to 32 characters. Sorry...\n");
return EXIT_FAILURE;
}

You may know this already, but the above is a ridiculous restriction.
Do you know about 'malloc' and 'free'? Can you see how dynamic memory
allocation could be applied here?

Oh sure. I think the tradition is 256. I was just implementing a check
with an arbitrary value.

Note that memory management is slated for the next revision. I figured I
should get the particulars running right before I addressed this.

Note also that this is a learning exercise, and that memory management is
the next phase of the curriculum... said:
strcpy(filename2, argv[1]);
strcat(filename2, ".wrap");

if (atoi(wl) > 80)
{
printf("Line length limit: 80. Better is < 75.\n");
return EXIT_FAILURE;
}

But your 'wordwrap' function can handle up to 'MAX' characters in
a single line, can't it? Why restrict yourself? (Again, dynamic
memory allocation could be useful here. You only need on the order
of 'atoi(wl)' bytes in 'buf', after all.)
if (atoi(wl) < 0)
Over and over you repeat the same call to 'atoi', the same call
you make again inside 'wordwrap'. This is crying out for a local
'int' variable!

Make three calls, two for error checking and one for conversion in the wrap
functions. If I was making an iterative call, that would be a real
consideration. However, in the interest of elegance, you're right: there
should be a single call.

I don't think you can portably query 'ferror' once you've already
closed the file in question. But I'm not sure; check the manual pages
or the Standard if you want to make sure.

Ah, okay. The prototype of this routine was from K&R2, but they check
stdout, not fp. Probably not kosher, then.
Line too long for Usenet again.


Can this ever happen? If not, I suggest it would be good practice
to put a comment to that effect; e.g.,

Probably can't happen. OTOH, it is precisely those kinds of things that
happen anyway!
puts("Must be a bug! Contact the author!");

How about: puts("Remediate the link between the keyboard and chair!\n");
close(fp1, fp2);
return EXIT_FAILURE;
}
}
}

You could have written

int rc;
[...]
rc = wordwrap(...);
if (rc == 1) { ... }
else if (rc == 2) { ... }
else { ... }

Yep. Did that but it looked ugly. Too much preparation, etc.
but the way you did it is very good. This is the sort of thing
'switch' was made for.

Regardless, you certainly don't want to be "rewinding the file" (i.e.,
re-calling 'wordwrap' and processing the whole file several times) in
your program. Do the wrapping once, and then check to see whether it
succeeded or not. You don't have to do the wrapping twice in order to
make two checks on the return value!

Well, if I'm going to use this on really long files, perhaps I want it to
bail out as soon as it discovers a problem. Don't want to rewind the file
at all! This way, it bails on the iteration during which the problem occurs.
Okay. Fix the bugs, lose the hard tabs, and try again. :)

Will do.
HTH,
-Arthur

Thanks for the critique, Arthur!! I'll be adding memory management for the
next version, as well as bug/design fixes. AbB!!
 
R

Randy Howard

Well, it was sort of an afterthought, actually. I was looking at the switch
case section and noted the repetitive fclose statements. So I thought I'd
try to extract them to a function, although I noted I didn't save any lines.
In the future, however, if more checks are implemented, there would be some.

The point is, the name "close" is a common system call on several OS platforms.
Use a unique name, such as "my_close()" or "close_open_files()" or something
instead.
 
N

name

The point is, the name "close" is a common system call on several OS platforms.
Use a unique name, such as "my_close()" or "close_open_files()" or something
instead.

Aha, thanks.

Ummm.. I presume that this sort of thing is part of doing "full generality"?
If so, it's almost certainly beyond my skill and knowledge level; I have no
reach to this sort of data short of being informed here. Certainly the code
itself will never see actual use perhaps even by me, much less by others.

But thanks for the explanation and thanks for reading!
 
R

Randy Howard

Aha, thanks.

Ummm.. I presume that this sort of thing is part of doing "full generality"?

I don't know what you mean by "full generality". You can assume that a lot of
the short, simple function names have been taken by somebody though, so all
the good names being gone, my_xyz is usually a decent approach if you insist
on something short and care about portability.
If so, it's almost certainly beyond my skill and knowledge level; I have no
reach to this sort of data short of being informed here.

Google for "close()". I'm sure you'll find a few hits. :)
 
N

name

I don't know what you mean by "full generality". You can assume that a lot of
the short, simple function names have been taken by somebody though, so all
the good names being gone, my_xyz is usually a decent approach if you insist
on something short and care about portability.

Damned if I know either! It's a concept I got from one of the exercises in
K&R2, and I've seen it used elsewhere as well. I took it to mean fully
developed, especially in terms of being able to handle anything thrown at it
with some amount of grace. But then, the question arises: just what does
one imagine "anything" to comprise?

At some point, it seems to me that one needs to closely define what is
expected from an application, such that it will either act correctly in
response to, or reject explicitly, any given input. But I didn't realize
that included deployment as well! Nevertheless, your observation is valid,
and I've renamed the function to 'file_close()'.
Google for "close()". I'm sure you'll find a few hits. :)

Something like clrscrn()? LOL!!!

Thanks!
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Similar Threads

Line/word wrap program. 11
Wrap program revised. 5
Wrap up Text 0
Wrap up Text 8
error 28
Fibonacci 0
Communicating between processes 0
How to accept text and put each letter into a 2d matrix? 0

Members online

No members online now.

Forum statistics

Threads
473,982
Messages
2,570,186
Members
46,740
Latest member
JudsonFrie

Latest Threads

Top