Read file

P

Profetas

I have the following code that detects a
<c>
and
</c>

#include <stdio.h>
main(int argc, char *argv[])
{
FILE* fp;
char data[4];

fp =fopen(argv[1], "r");
while (!feof(fp))
{
fgets(data ,2,fp);
if (data[0] == '<')
{
fgets(data ,4,fp);
if (strncmp(data,"c>",2)==0)
{printf("start \n");}
if (strncmp(data,"/c>",3)==0)
{printf("end \n");}
}
printf("%c",data[0]);
}

fclose (fp);
}

and I was wondering if it was possible to return a n of char
after a n chars had been read in the while loop

because this code would detect

<c>
asd
sd
as
d
asd
</c>

but it would fail to detect

asd<c>asdasd
asd
as
asd
asdasd<a</c>asd,</asd<casd

because when it read 3 char it jumps the </ is there any
way to return 2 chars?

Thanks
 
E

Eric Sosman

Profetas said:
I have the following code that detects a
<c>
and
</c>

#include <stdio.h>
main(int argc, char *argv[])
{
FILE* fp;
char data[4];

fp =fopen(argv[1], "r");
while (!feof(fp))

Question 12.2 in the comp.lang.c Frequently Asked
Questions (FAQ) list

http://www.eskimo.com/~scs/C-faq/top.html

explains why this use of feof() won't work. Also,
you should check whether `fp' is NULL to see whether
the fopen() succeeded or failed.
{
fgets(data ,2,fp);
if (data[0] == '<')
{
fgets(data ,4,fp);
if (strncmp(data,"c>",2)==0)
{printf("start \n");}
if (strncmp(data,"/c>",3)==0)
{printf("end \n");}
}
printf("%c",data[0]);
}

fclose (fp);
}

and I was wondering if it was possible to return a n of char
after a n chars had been read in the while loop

because this code would detect

<c>
asd
sd
as
d
asd
</c>

but it would fail to detect

asd<c>asdasd
asd
as
asd
asdasd<a</c>asd,</asd<casd

because when it read 3 char it jumps the </ is there any
way to return 2 chars?

You don't want it to return two characters: once you
have found the '<' you need to examine three more characters
to find out if they are "/c>" or something else.

Your problem is in what happens after you discover that
the subsequent three characters are something other than
"/c>". At the moment, you restart your scan for the '<'
after the three non-matching characters -- but, as you've
discovered, those three characters might have included the
'<' that you're looking for. What's the answer? You must
reconsider the three characters in a new light, as though
the original '<' had not been there at all.

Imagine walking down a line of people, looking for the
President and Vice President of the United States -- and
because of diplomatic protocol, you know that the President
always stands ahead of the Vice President in any line. So
you're looking for George followed by Richard.

"George?" you ask the first person in line. "Kofi," he
replies, so you move to the second person. "George?" you
ask again, and he says "John Paul." On to the third person:
"George?" "That's me," he replies, ungrammatically. Aha!
You've located George, so if the next person is Richard you've
located the executive team. "Richard?" you ask of the fourth
person, and he says "No." False alarm; those two are not the
Prez and the Veep. Onward to the fifth person: "George?" "No,
I'm Richard." Drat. Sixth person: "George?"

I'm sure you can spot the mistake: When you found George
in third position but the fourth was not Richard, you should
*not* have moved immediately to the fifth spot. You should
have restarted your George search with the non-Richard in the
fourth position. The same applies to your character-searching
problem.
 
C

CBFalconer

Eric said:
Profetas wrote:
.... snip ...

You don't want it to return two characters: once you
have found the '<' you need to examine three more characters
to find out if they are "/c>" or something else.

Your problem is in what happens after you discover that
the subsequent three characters are something other than
"/c>". At the moment, you restart your scan for the '<'
after the three non-matching characters -- but, as you've
discovered, those three characters might have included the
'<' that you're looking for. What's the answer? You must
reconsider the three characters in a new light, as though
the original '<' had not been there at all.

Imagine walking down a line of people, looking for the
President and Vice President of the United States -- and
because of diplomatic protocol, you know that the President
always stands ahead of the Vice President in any line. So
you're looking for George followed by Richard.

"George?" you ask the first person in line. "Kofi," he
replies, so you move to the second person. "George?" you
ask again, and he says "John Paul." On to the third person:
"George?" "That's me," he replies, ungrammatically. Aha!
You've located George, so if the next person is Richard you've
located the executive team. "Richard?" you ask of the fourth
person, and he says "No." False alarm; those two are not the
Prez and the Veep. Onward to the fifth person: "George?" "No,
I'm Richard." Drat. Sixth person: "George?"

I'm sure you can spot the mistake: When you found George
in third position but the fourth was not Richard, you should
*not* have moved immediately to the fifth spot. You should
have restarted your George search with the non-Richard in the
fourth position. The same applies to your character-searching
problem.

To avoid backtracking (as from a stream input) he wants the
Knuth-Morris-Pratt algorithm, an example of which follows:

/*
Leor said:
I think so. Here's a version I just threw together:
*/

/* And heres another throw -- binfsrch.c by CBF */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>

/* The difference between a binary and a text file, on read,
is the conversion of end-of-line delimiters. What those
delimiters are does not affect the action. In some cases
the presence of 0x1a EOF markers (MsDos) does.

This is a version of Knuth-Morris-Pratt algorithm. The
point of using this is to avoid any backtracking in file
reading, and thus avoiding any use of buffer arrays.
*/

size_t chrcount; /* debuggery, count of input chars, zeroed */

/* --------------------- */

/* Almost straight out of Sedgewick */
/* The next array indicates what index in id should next be
compared to the current char. Once the (lgh - 1)th char
has been successfully compared, the id has been found.
The array is formed by comparing id to itself. */
void initnext(int *next, const char *id, int lgh)
{
int i, j;

assert(lgh > 0);
next[0] = -1; i = 0; j = -1;
while (i < lgh) {
while ((j >= 0) && (id != id[j])) j = next[j];
i++; j++;
next = j;
}
#if (0)
for (i = 0; i < lgh; i++)
printf("id[%d] = '%c' next[%d] = %d\n",
i, id, i, next);
#endif
} /* initnext */

/* --------------------- */

/* reads f without rewinding until either EOF or *marker
has been found. Returns EOF if not found. At exit the
last matching char has been read, and no further. */
int kmpffind(const char *marker, int lgh, int *next, FILE *f)
{
int j; /* char position in marker to check */
int ch; /* current char */

assert(lgh > 0);
j = 0;
while ((j < lgh) && (EOF != (ch = getc(f)))) {
chrcount++;
while ((j >= 0) && (ch != marker[j])) j = next[j];
j++;
}
return ch;
} /* kmpffind */

/* --------------------- */

/* Find marker in f, display following printing chars
up to some non printing character or EOF */
int binfsrch(const char *marker, FILE *f)
{
int *next;
int lgh;
int ch;
int items; /* count of markers found */

lgh = strlen(marker);
if (!(next = malloc(lgh * sizeof *next))) {
puts("No memory");
exit(EXIT_FAILURE);
}
else {
initnext(next, marker, lgh);
items = 0;
while (EOF != kmpffind(marker, lgh, next, f)) {
/* found, take appropriate action */
items++;
printf("%d %s : \"", items, marker);
while (isprint(ch = getc(f))) {
chrcount++;
putchar(ch);
}
puts("\"");
if (EOF == ch) break;
else chrcount++;
}
free(next);
return items;
}
} /* binfsrch */

/* --------------------- */

int main(int argc, char **argv)
{
FILE *f;

f = stdin;
if (3 == argc) {
if (!(f = fopen(argv[2], "rb"))) {
printf("Can't open %s\n", argv[2]);
exit(EXIT_FAILURE);
}
argc--;
}
if (2 != argc) {
puts("Usage: binfsrch name [binaryfile]");
puts(" (file defaults to stdin text mode)");
}
else if (binfsrch(argv[1], f)) {
printf("\"%s\" : found\n", argv[1]);
}
else printf("\"%s\" : not found\n", argv[1]);
printf("%lu chars\n", (unsigned long)chrcount);
return 0;
} /* main binfsrch */
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,998
Messages
2,570,242
Members
46,835
Latest member
lila30

Latest Threads

Top