Line/word wrap program.

N

name

Here is a first attempt at a line/word wrapping utility. Seems to work
okay, but lacks some checking stuff, etc.

---------------------------------------------------------

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

#define MAX 10000

/* wrap.c inserts newlines in place of spaces according to specified
line length. Output filename is {filename}.wrap. Takes two arguments,
filename and line length. */

/* Todo: Need to figure out what sort of memory
the larger files might need. File type checking? */

void wordwrap(FILE *ifp, FILE *ofp, char *wl)
{
int c;

char buf[MAX];

int i, space, count, length;

i = space = count = 0;

length = atoi(wl);

for(i = 0; i < MAX && ((c=getc(ifp)) != EOF); ++i)
{
buf = c;
}

for ( i = 0; buf != '\0'; ++i)
{
if ((buf == '\n' || buf == '\t') && buf[i-1] == '\n')
count = space = 0;

if ( buf == ' ')
space = i;

++count;

if ( count == length )
{
buf[space] = '\n';
count = i - space;
}
}

for ( i = 0; buf != EOF; ++i)
{
c = buf;
putc(c, ofp);
}
}

int main(int argc, char *argv[])
{
FILE *fp1;
FILE *fp2;

char *prog = argv[0];
char *filename1 = argv[1];
char filename2[80];
char *wl = argv[2];
int i, l;

l = strlen(argv[1]);
for ( i = 0; i < 75 || i < l; ++i)
filename2 = argv[1];
strcat(filename2, ".wrap");

printf("Wrapping %s at %s\n", filename1, wl);
printf("Output file adds .wrap to input filename.\n");

if (argc != 3)
{
printf("Usage: %s: filename, wrap length\n", prog);
return EXIT_FAILURE;
}
else if ((fp1 = fopen(filename1, "r")) == NULL)
{
fprintf(stderr, "%s: can't open %s\n", prog, filename1);
return EXIT_FAILURE;
}
else if ((fp2 = fopen(filename2, "w")) == NULL)
{
fprintf(stderr, "%s: can't open %s\n", prog, filename2);
return EXIT_FAILURE;
}
else
{
wordwrap(fp1, fp2, wl);
fclose(fp1);
fclose(fp2);
}
if (ferror(fp2))
{
fprintf(stderr, "%s: error writing %s\n", prog, argv[3]);
return EXIT_FAILURE;
}

return EXIT_SUCCESS;
}
 
A

Arthur J. O'Dwyer

Here is a first attempt at a line/word wrapping utility. Seems to work
okay, but lacks some checking stuff, etc.

When run with no command-line arguments, it segfaults.
When run with a filename and a numeric argument, it produces
way-incorrect results. Try each of the following invocations

% ./a.out test.txt
% ./a.out test.txt 10
% ./a.out test.txt 20
% ./a.out test.txt 21
% ./a.out test.txt 100

on a file 'test.txt' containing two lines of 20 'x's each.

xxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxx

I don't get the right answer in /any/ of those cases!

The code you've posted is indeed incredibly ugly. Remove some of
those blank lines, reduce your tabstop to something sensible (e.g., 4),
fix the obvious bug and any more you find along the way, and then
try posting again.

l = strlen(argv[1]);

This is the obvious bug.
for ( i = 0; i < 75 || i < l; ++i)
filename2 = argv[1];
strcat(filename2, ".wrap");


This is one you should find along the way.
fprintf(stderr, "%s: error writing %s\n", prog, argv[3]);

So is this.


-Arthur,
http://www.contrib.andrew.cmu.edu/~ajo/free-software/usenetify2.c
 
N

name

When run with no command-line arguments, it segfaults.

Yep, I got that.
When run with a filename and a numeric argument, it produces
way-incorrect results. Try each of the following invocations

% ./a.out test.txt

That should give you the usage response. Did for me.
% ./a.out test.txt 10
% ./a.out test.txt 20
% ./a.out test.txt 21

Hmmmmmm..... Didn't encounter any spaces, just 'x' and '\n'. Okay, I think
this gives me a clue about checking for binary files as well. I'll think on
it.
% ./a.out test.txt 100

Got that and fixed it. Thanks.
on a file 'test.txt' containing two lines of 20 'x's each.

xxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxx

I don't get the right answer in /any/ of those cases!

You should have gotten the usage response for only one argument.
The code you've posted is indeed incredibly ugly. Remove some of
those blank lines, reduce your tabstop to something sensible (e.g., 4),
fix the obvious bug and any more you find along the way, and then
try posting again.

Not talking about style of presentation, but the code itself. Thanks for
your input, though.
l = strlen(argv[1]);

This is the obvious bug.

Okay....
for ( i = 0; i < 75 || i < l; ++i)
filename2 = argv[1];
strcat(filename2, ".wrap");

This is one you should find along the way.


Don't see the problem here, except that this four line group should follow
the argc check. I suppose that with no argv[1], 'strlen' tries to access
something that doesn't exist, and that would cause a segfault. So the
argument does have to exist. If it is an emply array, however... have to
think on this. Thanks.

fprintf(stderr, "%s: error writing %s\n", prog, argv[3]);

So is this.

Yep, that should be 'filename2' instead of 'argv[3]'. Got it fixed. Thanks.

As I said, there needs to be a lot of different kinds of checking going on,
which is the next step, I guess.

Thanks for reading.
 
A

Arthur J. O'Dwyer

Here is a first attempt at a line/word wrapping utility. Seems to work
okay, but lacks some checking stuff, etc.
[...]
% ./a.out test.txt

That should give you the usage response. Did for me.

Nope; it dereferences the null pointer 'wl' in 'main'.
Hmmmmmm..... Didn't encounter any spaces, just 'x' and '\n'. Okay, I think
this gives me a clue about checking for binary files as well. I'll think on
it.

I have no idea what you're thinking of, but on my system each of these
dumps a bunch of zero bytes after the output data. One major problem
I noticed while idly looking for the bug:
for (i = 0; buf != EOF; ++i) {


The loop condition may never evaluate to false, here. This is not
the bug that is causing the incorrect output, but it's a bug which
might cause an infinite loop on some platforms.
Got that and fixed it. Thanks.

Gee, that was a /different/ bug? Crazy! Keep looking; I'm sure
you'll find plenty more bugs...

Not talking about style of presentation, but the code itself.

The code itself /is incredibly ugly/. The way you can fix this
problem is to remove the extra blank lines /from your code/. And
use a consistent rule to place whitespace /in your code/. Presentation
is very important in communication, and source code is nothing if not
communication. If I just wanted a program to /do/ something, I'd
use the binary executable. When I see source code, I expect it to
be readable. Google up D.E. Knuth's opinions on the subject; I pretty
much agree with him on this topic. :)

for ( i = 0; i < 75 || i < l; ++i)
filename2 = argv[1];
strcat(filename2, ".wrap");

This is one you should find along the way.


Don't see the problem here [...]


Suppose 'strlen(argv[1]) > 75' and work from there. Pay particular
attention to the size of 'filename2' and the behavior of 'strcat'.

Thanks for reading.

You're welcome.

-Arthur,
three-day weekend
 
B

Barry Schwarz

Here is a first attempt at a line/word wrapping utility. Seems to work
okay, but lacks some checking stuff, etc.

---------------------------------------------------------

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

#define MAX 10000

/* wrap.c inserts newlines in place of spaces according to specified
line length. Output filename is {filename}.wrap. Takes two arguments,
filename and line length. */

/* Todo: Need to figure out what sort of memory
the larger files might need. File type checking? */

void wordwrap(FILE *ifp, FILE *ofp, char *wl)
{
int c;

char buf[MAX];

int i, space, count, length;

i = space = count = 0;

length = atoi(wl);

for(i = 0; i < MAX && ((c=getc(ifp)) != EOF); ++i)
{
buf = c;
}

for ( i = 0; buf != '\0'; ++i)


Here you stop the loop when buf is '\0'.
{
if ((buf == '\n' || buf == '\t') && buf[i-1] == '\n')


If the first character is '\t', buf[i-1] does not exist. This will
invoke undefined behavior.
count = space = 0;

if ( buf == ' ')
space = i;

++count;

if ( count == length )
{
buf[space] = '\n';
count = i - space;
}
}

for ( i = 0; buf != EOF; ++i)


How did buf ever get to be EOF? You do not store c in buf when c
is EOF.

Why does this loop stop at a different place than the previous one?
{
c = buf;
putc(c, ofp);
}
}

int main(int argc, char *argv[])
{
FILE *fp1;
FILE *fp2;

char *prog = argv[0];
char *filename1 = argv[1];


You never do anything with filename1 that you couldn't do with argv[1]
directly.
char filename2[80];
char *wl = argv[2];
Ditto.

int i, l;

l = strlen(argv[1]);
for ( i = 0; i < 75 || i < l; ++i)

If l (a really lousy choice for a one character name; for a while I
thought it was one, not ell) is greater than 75, you will not copy the
terminating '\0'. This will cause the following strcat to invoke
undefined behavior.

What happens when l > 80?
filename2 = argv[1];


Is there a reason you didn't use strncpy?
strcat(filename2, ".wrap");

printf("Wrapping %s at %s\n", filename1, wl);
printf("Output file adds .wrap to input filename.\n");

if (argc != 3)

This is too late. You have already played with argv[1] and [2]
without first checking to see if they exist.
{
printf("Usage: %s: filename, wrap length\n", prog);
return EXIT_FAILURE;
}
else if ((fp1 = fopen(filename1, "r")) == NULL)
{
fprintf(stderr, "%s: can't open %s\n", prog, filename1);
return EXIT_FAILURE;
}
else if ((fp2 = fopen(filename2, "w")) == NULL)
{
fprintf(stderr, "%s: can't open %s\n", prog, filename2);
return EXIT_FAILURE;
}
else
{
wordwrap(fp1, fp2, wl);
fclose(fp1);
fclose(fp2);
}
if (ferror(fp2))
{
fprintf(stderr, "%s: error writing %s\n", prog, argv[3]);
return EXIT_FAILURE;
}

return EXIT_SUCCESS;
}

-----------------------------------------------------------

I presume it's ugly, so comments and criticisms welcome.

Thanks for reading.



<<Remove the del for email>>
 
N

name

On Thu, 19 Aug 2004, name wrote:

Here is a first attempt at a line/word wrapping utility. Seems to work
okay, but lacks some checking stuff, etc. [...]
% ./a.out test.txt

That should give you the usage response. Did for me.

Nope; it dereferences the null pointer 'wl' in 'main'.

Well, looks like I've not got a clue here, then. No problem, this is all
about learning anyway... said:
Hmmmmmm..... Didn't encounter any spaces, just 'x' and '\n'. Okay, I think
this gives me a clue about checking for binary files as well. I'll think on
it.

I have no idea what you're thinking of, but on my system each of these
dumps a bunch of zero bytes after the output data. One major problem
I noticed while idly looking for the bug:
for (i = 0; buf != EOF; ++i) {


The loop condition may never evaluate to false, here. This is not
the bug that is causing the incorrect output, but it's a bug which
might cause an infinite loop on some platforms.


Ah yes, other platforms. Which means I'm not writing portable code then.
And that's a very big problem!
Gee, that was a /different/ bug? Crazy! Keep looking; I'm sure
you'll find plenty more bugs...


The code itself /is incredibly ugly/. The way you can fix this
problem is to remove the extra blank lines /from your code/. And
use a consistent rule to place whitespace /in your code/. Presentation
is very important in communication, and source code is nothing if not
communication. If I just wanted a program to /do/ something, I'd
use the binary executable. When I see source code, I expect it to
be readable. Google up D.E. Knuth's opinions on the subject; I pretty
much agree with him on this topic. :)

Oh okay. Sorry. I didn't understand that the way code is printed out with
spacing and so forth is considered as important as the code itself. I had
tried to make enough white space so that it was easy to read. I guess I
could get rid of all the white space and do K&R braces, but as I understand
it, K&R is a "hallowed format" that must always have 8 space indentations,
and you say that's not acceptable.

I'll read Knuth, of course. Thanks for the suggestion.
for ( i = 0; i < 75 || i < l; ++i)
filename2 = argv[1];
strcat(filename2, ".wrap");

This is one you should find along the way.


Don't see the problem here [...]


Suppose 'strlen(argv[1]) > 75' and work from there. Pay particular
attention to the size of 'filename2' and the behavior of 'strcat'.


Well, I could give it 256, I suppose. I'll check this out and see what
happens. The idea with 75 was that the extension would make it 80
characters, but that was arbitrary in any case. More to it than that?
Guess I'll find out!
You're welcome.

-Arthur,
three-day weekend

Thanks for the critique! I'll take it away and study on it.
 
N

name

for ( i = 0; buf != '\0'; ++i)


Here you stop the loop when buf is '\0'.
{
if ((buf == '\n' || buf == '\t') && buf[i-1] == '\n')


If the first character is '\t', buf[i-1] does not exist. This will
invoke undefined behavior.


Uh oh, incipient demonic rhinology! That means I need to check 'i' itself
to make sure it's not negative. Thanks!
count = space = 0;

if ( buf == ' ')
space = i;

++count;

if ( count == length )
{
buf[space] = '\n';
count = i - space;
}
}

for ( i = 0; buf != EOF; ++i)


How did buf ever get to be EOF? You do not store c in buf when c
is EOF.


Ummm, right. Should also have been '\0'? But then I don't store c when
buf is '\0' either. Maybe, having already set the buffer up, I really
don't need a conditional, so "for(i=0;i!=strlen(buf[]);++i) or something
like that. Probably not, though... the conditional looks wrong, so
probably is.
Why does this loop stop at a different place than the previous one?

Just what I was going to ask! LOL!!!
{
c = buf;
putc(c, ofp);
}
}

int main(int argc, char *argv[])
{
FILE *fp1;
FILE *fp2;

char *prog = argv[0];
char *filename1 = argv[1];


You never do anything with filename1 that you couldn't do with argv[1]
directly.


True, but I decided it was easier to follow if I used the same assignment
name style for both file names.
char filename2[80];
char *wl = argv[2];
Ditto.

Yep.
int i, l;

l = strlen(argv[1]);
for ( i = 0; i < 75 || i < l; ++i)

If l (a really lousy choice for a one character name; for a while I
thought it was one, not ell) is greater than 75, you will not copy the
terminating '\0'. This will cause the following strcat to invoke
undefined behavior.

Yeah, I see what you mean. l for length, but the visual confusion makes it
inappropriate. I'll remember that. Thanks!
What happens when l > 80?
filename2 = argv[1];


Is there a reason you didn't use strncpy?


Hmmm.. in fact I thought about it when I was going to use strcat, but forgot
to look into that. Better choice, it would seem.
strcat(filename2, ".wrap");

printf("Wrapping %s at %s\n", filename1, wl);
printf("Output file adds .wrap to input filename.\n");

if (argc != 3)

This is too late. You have already played with argv[1] and [2]
without first checking to see if they exist.

Yep, I figured that out. Check the argc value first to see what was given
before playing with stuff that may not exist. If they don't exist...
segfault!! Got that from Arthur.

Hmmmm... lclint is my friend!!!! I think I'll have an extended visit there
before bringing more code here! Problem is, lclint is a bit laconic at
times...

Thanks for the critique, Barry!
 
B

Barry Schwarz

for ( i = 0; buf != '\0'; ++i)


Here you stop the loop when buf is '\0'.
{
if ((buf == '\n' || buf == '\t') && buf[i-1] == '\n')


If the first character is '\t', buf[i-1] does not exist. This will
invoke undefined behavior.


Uh oh, incipient demonic rhinology! That means I need to check 'i' itself
to make sure it's not negative. Thanks!
count = space = 0;

if ( buf == ' ')
space = i;

++count;

if ( count == length )
{
buf[space] = '\n';
count = i - space;
}
}

for ( i = 0; buf != EOF; ++i)


How did buf ever get to be EOF? You do not store c in buf when c
is EOF.


Ummm, right. Should also have been '\0'? But then I don't store c when
buf is '\0' either. Maybe, having already set the buffer up, I really


Your first loop which calls getc (trimmed from your quote) never
checks for the character value '\0' so it will store that in buf.

snip


<<Remove the del for email>>
 
N

name

How did buf ever get to be EOF? You do not store c in buf when c
is EOF.


Ummm, right. Should also have been '\0'? But then I don't store c when
buf is '\0' either. Maybe, having already set the buffer up, I really


Your first loop which calls getc (trimmed from your quote) never
checks for the character value '\0' so it will store that in buf.


Yeah, I got that after looking at it. It does now. I'm going to repost the
results of addressing some of the points raised in this thread.

Thanks,
 
B

bayxarea-usenet

Was this utility ever completed? I have a use for it actually.

Thanks,

John
 
A

Arthur J. O'Dwyer

Was this [line-wrapping] utility ever completed? I have a use for it
actually.

I'd be willing to bet it never was, but there are certainly plenty of
other utilities like that around. I've got a couple specifically designed
for C programs on my website, www.contrib.andrew.cmu.edu/~ajo . You might
be more interested in the standard UNIX text formatter 'fmt', depending
on what "use" you're thinking of. Try Google, or comp.sources.wanted if
that fails.

-Arthur
 
C

CBFalconer

Was this utility ever completed? I have a use for it actually.
.... snip much code that followed the evil top-posting...

I have no idea about that, but the following is lying about. A
negative line length causes ragged right operation.

/* ----- justify.c -----
Filter text file, right justifying by inserting
spaces between words. Words are anything separated
by blanks, tabs, newlines, formfeeds, bell, etc.

The single (optional) parameter is the output line
length, and defaults to 65. Execution without any
input redirections causes a help message.

This is a quick and dirty utility.
Released to public domain by:
<mailto:[email protected]>
*/

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#define RHDEFAULT 65
#define RHMIN 20

static int rhcol; /* right hand column limit */
static int ragged; /* No rh justification, 0 init */

/* ------------------- */

/* This is very likely to be non-portable */
/* DOES NOT check fp open for reading */
/* NULL fp is considered a keyboard here! */
static int akeyboard(FILE *fp)
{
#ifndef __TURBOC__
# ifdef __STDC__
/* This dirty operation allows gcc -ansi -pedantic */
extern int fileno(FILE *fp);
extern int isatty(int fn);
# endif
#endif
return ((fp != NULL) && isatty(fileno(fp)));
} /* akeyboard */

/* ------------------- */

static void help(char *phrase1, char *phrase2)
{
if (phrase1) fprintf(stderr, "%s", phrase1);
if (phrase2) fprintf(stderr, "%s", phrase2);
fprintf(stderr, "\n"
"Usage: justify [rightmargin] <infile >outfile\n"
" The default rightmargin is 65\n"
" and values less than 20 are rejected\n"
"\n"
"A large value of rightmargin will effectively\n"
"convert all paragraphs into single lines\n"
"\n"
"A negative rightmargin causes ragged right\n"
"\n"
"A blank line delimits paragraphs\n");
} /* help */

/* ------------------- */

static int initialize(int argc, char *argv[])
{
long rightcol;
char *err;

if (akeyboard(stdin) || (argc > 2)) {
help(NULL, NULL);
return 0;
}
rhcol = RHDEFAULT;
if (2 == argc) {
rightcol = strtol(argv[1], &err, 10);
if (rightcol < 0) {
rightcol = -rightcol;
ragged = 1;
}
if ((err == argv[1]) || (rightcol < RHMIN)) {
help("Bad argument: ", argv[1]);
return 0;
}
else rhcol = rightcol;
}
return 1;
} /* initialize */

/* ------------------- */

static void cleanup(void)
{
} /* cleanup */

/* ------------------- */

/* ================================== */
/* Routines for text input and output */
/* ================================== */

static void skipblanks(FILE *f)
{
int ch;

while ( (' ' == (ch = getc(f))) || ('\t' == ch) ||
('\v' == ch) || ('\f' == ch) || ('\a' == ch) )
continue;
ungetc(ch, f);
} /* skipblanks */

/* ------------------- */

/* The file is assumed to hold no control chars */
/* other than \n \t \v \a and \f. A blank line */
/* marks a paragraph ending word */
static int nextword(FILE *f, char *buffer, int max)
{
int i, ch;

skipblanks(f);
if (EOF == (ch = getc(f))) return 0;

/* Detect paragraph endings as \n\n */
if ('\n' == ch) {
skipblanks(f); ch = getc(f);
if ('\n' == ch) { /* paragraph ending */
buffer[0] = buffer[1] = ch; /* wd = "\n\n" */
buffer[2] = '\0';
/* now we have to absorb any more blank lines */
do {
skipblanks(f); ch = getc(f);
} while ('\n' == ch);
ungetc(ch, f);
return 1;
}
}
/* now ch holds the first non-blank. Use all printable */
if (EOF == ch) return 0;
if (!isgraph(ch)) {
fprintf(stderr, "'%c', 0x%x WARN: Invalid character\n",
ch, (unsigned)ch);
}

i = 0;
do {
buffer[i++] = ch;
if (i >= max) { /* truncate over long words */
i--;
break; /* leaving ch for next word */
}
ch = getc(f);
} while (isgraph(ch));

ungetc(ch, f); /* save for next word, may be \n */
buffer = '\0'; /* terminate string */
return 1;
} /* nextword */

/* ------------------- */

static void justify(char *ln, int wdgaps, int xtra, FILE *out)
{
int insert, i;
static int oddln = 0; /* for rt left blank insertion */
char ch;

#ifdef DEBUG
fprintf(out, "%2d %2d ", wdgaps, xtra);
#endif
insert = 0; oddln = !oddln;
if (wdgaps)
while (xtra > wdgaps) {
insert++; xtra -= wdgaps;
}
while ((ch = *ln++)) {
putc(ch, out);
if (' ' == ch) {
if (xtra) {
xtra--;
putc(' ', out);
}
for (i = insert; i; i--) putc(' ', out);
}
}
putc('\n', out);
} /* justify */

/* ------------------- */

static int filter(FILE *in, FILE *out)
{
char *buf;
char *ln;
int wdcount, lnlgh, wdlgh;
char *eop = "\n\n"; /* end of paragraph */
int done, endpar;

if (!(buf = malloc(rhcol+1))) exit(EXIT_FAILURE);
if (!(ln = malloc(rhcol+1))) exit(EXIT_FAILURE);

done = !nextword(in, buf, rhcol + 1);
endpar = !strcmp(buf, eop);

while (!endpar && !done) {
/* form paragraph */
wdlgh = strlen(buf);
wdcount = 0;
*ln = '\0'; lnlgh = 0;

while ((((lnlgh + wdlgh) < rhcol) || !lnlgh)
&& !done && !endpar) {
/* form a line */
if (lnlgh) ln[lnlgh++] = ' ';
strcpy(ln + lnlgh, buf);
lnlgh += wdlgh;
wdcount++;

done = !nextword(in, buf, rhcol + 1);
endpar = !strcmp(buf, eop);
wdlgh = strlen(buf);
}

/* dump the line, wdcount words */
if (endpar || done) lnlgh = rhcol;
if (ragged) fprintf(out, "%s\n", ln);
else justify(ln, wdcount-1, rhcol-lnlgh, out);

if (endpar) {
fputc('\n', out);
done = !nextword(in, buf, rhcol + 1);
endpar = !strcmp(buf, eop);
}
}
return 0;
} /* filter */

/* ------------------- */

int main(int argc, char *argv[])
{
if (!initialize(argc, argv)) return EXIT_FAILURE;
else {
(void)filter(stdin, stdout);
cleanup();
}
return 0;
} /* main */
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Similar Threads


Members online

Forum statistics

Threads
473,982
Messages
2,570,186
Members
46,740
Latest member
JudsonFrie

Latest Threads

Top