String data input and storage

C

Christoph Scholtes

Hi,

I have two questions about the following code snippet. I am trying to
read in a series of strings and save them to character arrays. Since I
dont know how long my string is going to be (and I dont want to waste
memory by allocating an array of 1000 for all the input strings) I
allocate the memory myself according to the length of the string.

char *inputData;
char line[200];

printf("Input: ");
fgets(line, sizeof(line), stdin);

line[strlen(line)-1] = '\0';

inputData = malloc(strlen(line)*sizeof(inputData));

if (inputData == NULL){
fprintf(stderr, "malloc failed!\n");
exit(EXIT_FAILURE);
}

strcpy(inputData, line);
/* do whatever i do with the strings*/
free(inputData);

1. Is this the right way to do this or is there a better/an easier way?

2. I am removing the '\0' from the string. Valid or invalid?

Thanks,
Chris
 
M

Morris Dovey

Christoph Scholtes (in [email protected])
said:

| I have two questions about the following code snippet. I am trying
| to read in a series of strings and save them to character arrays.
| Since I dont know how long my string is going to be (and I dont
| want to waste memory by allocating an array of 1000 for all the
| input strings) I allocate the memory myself according to the length
| of the string.
|
| char *inputData;
| char line[200];
|
| printf("Input: ");
| fgets(line, sizeof(line), stdin);
|
| line[strlen(line)-1] = '\0';
|
| inputData = malloc(strlen(line)*sizeof(inputData));

/* How about: */ inputData = malloc(1+strlen(line)); /* ? */

|
| if (inputData == NULL){
| fprintf(stderr, "malloc failed!\n");
| exit(EXIT_FAILURE);
| }
|
| strcpy(inputData, line);
| /* do whatever i do with the strings*/
| free(inputData);
|
| 1. Is this the right way to do this or is there a better/an easier
| way?

Better/easier depends on the problem context and your skills. Chuck
Falconer, Richard Heathfield, and I have all implemented functions
that input variable-length strings (I like mine best, of course :) -
my code is at the link below.)

| 2. I am removing the '\0' from the string. Valid or invalid?

It's valid; but it turns the 'string' into a simple 'array of char'.
If that's what you want then it's fine.
 
P

pete

Christoph said:
Hi,

I have two questions about the following code snippet. I am trying to
read in a series of strings and save them to character arrays. Since I
dont know how long my string is going to be (and I dont want to waste
memory by allocating an array of 1000 for all the input strings) I
allocate the memory myself according to the length of the string.

I use a linked list for that purpose.

/* BEGIN line_to_string.c */

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>

struct list_node {
struct list_node *next;
void *data;
};

int line_to_string(FILE *fp, char **line, size_t *size);
void list_free(struct list_node *node, void (*free_data)(void *));
void list_fprint(FILE *stream, struct list_node *node);
struct list_node *string_node(struct list_node **head,
struct list_node *tail,
char *data);

int main(void)
{
struct list_node *head, *tail;
int rc;
char *buff_ptr;
size_t buff_size;
long unsigned line_count;

puts(
"\nThis program makes and prints a list of all the lines\n"
"of text entered from the standard input stream.\n"
"Just hit the Enter key to end,\n"
"or enter any line of characters to continue."
);
tail = head = NULL;
line_count = 0;
buff_size = 0;
buff_ptr = NULL;
rc = line_to_string(stdin, &buff_ptr, &buff_size);
while (rc > 1) {
++line_count;
tail = string_node(&head, tail, buff_ptr);
if (tail == NULL) {
break;
}
puts(
"\nJust hit the Enter key to end,\n"
"or enter any other line of characters to continue."
);
rc = line_to_string(stdin, &buff_ptr, &buff_size);
}
switch (rc) {
case EOF:
if (buff_ptr != NULL && strlen(buff_ptr) > 0) {
puts("rc equals EOF\nThe string in buff_ptr is:");
puts(buff_ptr);
++line_count;
tail = string_node(&head, tail, buff_ptr);
}
break;
case 0:
puts("realloc returned a null pointer value");
if (buff_size > 1) {
puts("rc equals 0\nThe string in buff_ptr is:");
puts(buff_ptr);
++line_count;
tail = string_node(&head, tail, buff_ptr);
}
break;
default:
break;
}
if (line_count != 0 && tail == NULL) {
puts("Node allocation failed.");
puts("The last line entered didnt't make it onto the list:");
puts(buff_ptr);
}
free(buff_ptr);
puts("\nThe line buffer has been freed.\n");
printf("%lu lines of text were entered.\n", line_count);
puts("They are:\n");
list_fprint(stdout, head);
list_free(head, free);
puts("\nThe list has been freed.\n");
return 0;
}

int line_to_string(FILE *fp, char **line, size_t *size)
{
int rc;
void *p;
size_t count;

count = 0;
for (rc = getc(fp); rc != EOF; rc = getc(fp)) {
++count;
if (count + 2 > *size) {
p = realloc(*line, count + 2);
if (p == NULL) {
if (*size > count) {
(*line)[count] = '\0';
(*line)[count - 1] = (char)rc;
} else {
ungetc(rc, fp);
}
count = 0;
break;
}
*line = p;
*size = count + 2;
}
if (rc == '\n') {
(*line)[count - 1] = '\0';
break;
}
(*line)[count - 1] = (char)rc;
}
if (rc != EOF) {
rc = count > INT_MAX ? INT_MAX : count;
} else {
if (*size > count) {
(*line)[count] = '\0';
}
}
return rc;
}

void list_free(struct list_node *node, void (*free_data)(void *))
{
struct list_node *next_node;

while (node != NULL) {
next_node = node -> next;
free_data(node -> data);
free(node);
node = next_node;
}
}

void list_fprint(FILE *stream, struct list_node *node)
{
while (node != NULL) {
fputs(node -> data, stream);
putc('\n', stream);
node = node -> next;
}
}

struct list_node *string_node(struct list_node **head,
struct list_node *tail,
char *data)
{
struct list_node *node;

node = malloc(sizeof *node);
if (node != NULL) {
node -> next = NULL;
node -> data = malloc(strlen(data) + 1);
if (node -> data != NULL) {
if (*head == NULL) {
*head = node;
} else {
tail -> next = node;
}
strcpy(node -> data, data);
} else {
free(node);
node = NULL;
}
}
return node;
}

/* END line_to_string.c */
 
D

Digital Logic

Chris,
Two questions/assumptions. First, you say you're going to read a
serious of strings, but this code snippet only reads once. I'm going
to assume that there's a loop surronding the snippet. Second,
depending on what "/* do whatever i do with the strings*/" actually
does your implementation method could change. After you "do whatever"
are you retaining the string and reading another?
From what I can tell your using redundant string buffers. You read
into "line", copy to "inputData", but don't use "line" again. If
you're not using this buffer again you have not need to copy out of it.

-Mark


Christoph said:
Hi,

I have two questions about the following code snippet. I am trying to
read in a series of strings and save them to character arrays. Since I
dont know how long my string is going to be (and I dont want to waste
memory by allocating an array of 1000 for all the input strings) I
allocate the memory myself according to the length of the string.

char *inputData;
char line[200];

printf("Input: ");
fgets(line, sizeof(line), stdin);

line[strlen(line)-1] = '\0';

inputData = malloc(strlen(line)*sizeof(inputData));

if (inputData == NULL){
fprintf(stderr, "malloc failed!\n");
exit(EXIT_FAILURE);
}

strcpy(inputData, line);
/* do whatever i do with the strings*/
free(inputData);

1. Is this the right way to do this or is there a better/an easier way?

2. I am removing the '\0' from the string. Valid or invalid?

Thanks,
Chris
 
C

Christoph Scholtes

Mark,
Chris,
Two questions/assumptions. First, you say you're going to read a
serious of strings, but this code snippet only reads once. I'm going
to assume that there's a loop surronding the snippet. Second,
depending on what "/* do whatever i do with the strings*/" actually
does your implementation method could change. After you "do whatever"
are you retaining the string and reading another?

Yes, this is just a part of the code. Sorry. What I do is read 4 strings
all of them using the same code I posted. No loop, just 4 times the code
with the according variables to store the strings in. I keep all of the
4 strings.
From what I can tell your using redundant string buffers. You read
into "line", copy to "inputData", but don't use "line" again. If
you're not using this buffer again you have not need to copy out of it.

Well, I actually re-use the buffer "line" to read in the three other
strings.

My question was pointed at the structure with fgets->malloc->strcpy to
read in the strings. I was just wondering if there was a smarter/better
way to do this.

Thanks,
Chris
 
C

Christoph Scholtes

Morris said:
| inputData = malloc(strlen(line)*sizeof(inputData));

/* How about: */ inputData = malloc(1+strlen(line)); /* ? */

OK, that makes sense.
Better/easier depends on the problem context and your skills. Chuck
Falconer, Richard Heathfield, and I have all implemented functions
that input variable-length strings (I like mine best, of course :) -
my code is at the link below.)

:) Ok, got that point, too.
| 2. I am removing the '\0' from the string. Valid or invalid?

It's valid; but it turns the 'string' into a simple 'array of char'.
If that's what you want then it's fine.

Never mind that part. I confused the '\n' (which I remove) and the '\0'.

However, in my original code the amount of memory allocated is
apparently too little. For the string "test" the buffer 'line' holds "t
e s t \n \0" right? After replacing the '\n' with '\0' I allocated
strlen(line)=4 bytes to hold the 4-char-string and copy it to the memory
location with strcpy. What happens in this case with the trailing '\0'?
Obviously there is not enough memory allocated to hold it. Is it still
copied or omitted?

Chris
 
P

pete

Christoph said:
What happens in this case with the trailing '\0'?
Obviously there is not enough memory allocated to hold it. Is it still
copied or omitted?

That's what they call "undefined behavior".
strcpy will attempt to copy the '\0' also,
and the result of that attempt is undefined.
It actually renders the behavior
of the entire program undefined,
according to the rules of the language.
 
D

Digital Logic

Chris,
I would recommend one of two things depending on implementation. If
you are doing something different to each of these strings you are
reading I suggest that you take the code you have repeated 4 times and
place it in a function. Its good practice to not allow the same code
to exist multiple times.

If you are doing the same thing to each of these strings I suggest you
put this snippet into a for loop, and use your index your looping
"for(i = 0; i < 4; i++)" as the index into an array of string buffers
"char *buf[4]". Then "buf = malloc(...)".

I would also suggest you consider a slightly different strategy. I
know you are trying to avoid allocating large chunks upfront, but I
don't think that that would actually be to consuming. You may be
saving some by not allocating memory that you will not be using.
However, you are sacrificing processing at runtime because you a
preforming an extra copy. scanf performs a copy of the data from the
stdin kernel buffer into your process's buffer, the "line" variable.
Then you use strcpy to copy from the "line" variable into a seperate
buffer. If you ran scanf directly to the string buffer you were going
to manipulate then you would not have the additional copy. I don't
neccisarily think that there is anything wrong with your
implementation. I just think you might want to give it a second
thought. If you're set on performing the individual allocations then I
think you are set with that snippet, except you should place the code
in a function.

I hope I could be of some help.

-Mark
 
M

Morris Dovey

Christoph Scholtes (in [email protected]) said:

| Morris Dovey wrote:
|
||| inputData = malloc(strlen(line)*sizeof(inputData));
||
|| /* How about: */ inputData = malloc(1+strlen(line)); /* ? */
|
| OK, that makes sense.
|
|| Better/easier depends on the problem context and your skills. Chuck
|| Falconer, Richard Heathfield, and I have all implemented functions
|| that input variable-length strings (I like mine best, of course
|| :) - my code is at the link below.)
|
| :) Ok, got that point, too.
|
||| 2. I am removing the '\0' from the string. Valid or invalid?
||
|| It's valid; but it turns the 'string' into a simple 'array of
|| char'. If that's what you want then it's fine.
|
| Never mind that part. I confused the '\n' (which I remove) and the
| '\0'.
|
| However, in my original code the amount of memory allocated is
| apparently too little. For the string "test" the buffer 'line'
| holds "t e s t \n \0" right? After replacing the '\n' with '\0' I
| allocated strlen(line)=4 bytes to hold the 4-char-string and copy
| it to the memory location with strcpy. What happens in this case
| with the trailing '\0'? Obviously there is not enough memory
| allocated to hold it. Is it still copied or omitted?

Big trouble. The strxxx() finctions only work with strings. An
unterminated sequence of chars in an array or memory area is _not_ a
string. strcpy() will copy chars from src to dst until the first of:
[1] a '\0' is found, or [2] a memory fault of some kind is generated.
Generally [2] is preferable, since you have no way of knowing what got
clobbered in [1].

When you allocate memory to hold a string, you must allocate enough to
hold the '\0' terminator: p = malloc(1+strlen(line)); is probably what
you want.

As you would hope, strcpy() copies the whole string (meaning that it
copies the '\0' terminator).

Here's some essential info:

char string[] = "test"; produces an array of 5 chars that is a string.
char simple[] = 't','e','s','t'; produces an array of 4 chars that is
not a string.

The answer to your last question is that strcpy() will try to copy all
five chars - and whatever follows the four-char space that you
allocated will be clobbered.
 
C

Chris Torek

char *inputData;
char line[200];

printf("Input: ");
fgets(line, sizeof(line), stdin);

This part is OK, except that you should test the return value
from fgets(). If I run:

% ./yourprog < /dev/null

the very first fgets() will return NULL, and line, for all
valid values of i, is still uninitialized (unless line[] has
static duration of course).
line[strlen(line)-1] = '\0';

It is a little safer to find the '\n' and quash it only if it
is present:

char *p;
...
p = strchr(line, '\n');
if (p != NULL)
*p = '\0';

If the newline is *not* present, something odd has occurred.
Perhaps, for instance, I ran:

% ./yourprog < binaryfile

where "binaryfile" contains no newlines, or sequences of more
than 200 chars between newlines, or even embedded '\0' characters.
(Of course, if I did any of those things, it is my fault, but
you may wish to check for it.)
inputData = malloc(strlen(line)*sizeof(inputData));

There are two things wrong with this line, one of which someone
caught elsethread. The first is that you need strlen()+1 bytes to
hold a C string, since strlen() does not count the terminating
'\0'. The second should be obvious: malloc() calls should look
like this:

ptr = malloc(number * sizeof *ptr);

The call above is missing the "*" in front of "inputData" in the
argument to sizeof(). Note that this rule holds even for pointers
to pointers:

*pp = malloc(N * sizeof **pp); /* correct */
*pq = malloc(N * sizeof *pq); /* wrong number of "*"s */
*pr = malloc(N * sizeof ***pr); /* wrong number of "*"s */

(Since subscripts like a are syntactic shortcuts for unary "*"
operators, one can apply the same general rule:

char **pp;
pp = malloc(N1 * sizeof pp[0]);
if (pp == NULL) ...
pp[0] = malloc(N2 * sizeof pp[0][0]);
...

However, I often find this kind of code clearer if I use temporary
variables, and stick with the "p = malloc(N * sizeof *p)" form:

char **pp;
char *p0space;
pp = malloc(N1 * sizeof *pp);
p0space = malloc(N2 * sizeof *p0space);
if (pp == NULL || p0space == NULL) ...
pp[0] = p0space;
...

Of course, N can be omitted if N is just a constant 1, and the
sizeof() can be omitted if the type of *p is sure to be some variant
of "char". But the general form is always safe.)
 
C

Christoph Scholtes

Mark,

Thanks for your input on this. As you can probably tell I dont have much
coding practice, but I am trying to get there. :)
I would recommend one of two things depending on implementation. If
you are doing something different to each of these strings you are
reading I suggest that you take the code you have repeated 4 times and
place it in a function. Its good practice to not allow the same code
to exist multiple times.

Alright. I implemented the whole thing as a function: char*
get_input(void). It returns a pointer to the string, which resides in
the memory allocated within the function. Works.
If you are doing the same thing to each of these strings I suggest you
put this snippet into a for loop, and use your index your looping
"for(i = 0; i < 4; i++)" as the index into an array of string buffers
"char *buf[4]". Then "buf = malloc(...)".


Which would also make sense. The advantage would be that I dont have the
overhead of the function calls right? Not that it really matters for my
application, but just out of curiosity. This is only a little project to
get me some coding practice.
I would also suggest you consider a slightly different strategy. I
know you are trying to avoid allocating large chunks upfront, but I
don't think that that would actually be to consuming. You may be
saving some by not allocating memory that you will not be using.
However, you are sacrificing processing at runtime because you a
preforming an extra copy. scanf performs a copy of the data from the
stdin kernel buffer into your process's buffer, the "line" variable.
Then you use strcpy to copy from the "line" variable into a seperate
buffer. If you ran scanf directly to the string buffer you were going
to manipulate then you would not have the additional copy.

scanf? You mean fgets? Thats what I use. Accessing the buffer directly
would only work if I allocate the memory as a static array up front right?

Thanks Mark and everybody for your input so far.

Christoph
 
P

pete

Christoph Scholtes wrote:
scanf? You mean fgets? Thats what I use.

I think scanf is easier to use than fgets
for the case when you're willing to discard
any characters in a line that's longer than your buffer.
If you see fscanf,
you can figure the scanf equivalent pretty easy.
Try this program with lines that are too long
or lines that aren't even numbers:

/* BEGIN grade.c */

#include <stdlib.h>
#include <stdio.h>

#define LENGTH 3
#define str(x) # x
#define xstr(x) str(x)

int main(void)
{
int rc;
char array[LENGTH + 1];
long number;
const char letter[4] = "DCBA";

fputs("Enter the Numeric grade: ", stdout);
fflush(stdout);
rc = fscanf(stdin, "%" xstr(LENGTH) "[^\n]%*[^\n]", array);
if (!feof(stdin)) {
getchar();
}
while (rc == 1) {
number = strtol(array, NULL, 10);
if (number > 59) {
if (number > 99) {
number = 99;
}
array[0] = letter[(number - 60) / 10];
switch (number % 10) {
case 0:
case 1:
array[1] = '-';
array[2] = '\0';
break;
case 8:
case 9:
array[1] = '+';
array[2] = '\0';
break;
default:
array[1] = '\0';
break;
}
} else {
array[0] = 'F';
array[1] = '\0';
}
printf("The Letter grade is: %s\n", array);
fputs("Enter the Numeric grade: ", stdout);
fflush(stdout);
rc = fscanf(stdin, "%" xstr(LENGTH) "[^\n]%*[^\n]", array);
if (!feof(stdin)) {
getchar();
}
}
return 0;
}

/* END grade.c */
 
D

Default User

Digital said:
Chris,
Two questions/assumptions.


Please don't top-post. Your replies belong following or interspersed
with properly trimmed quotes. See the other 99% of the posts in this
group.




Brian
 
B

Barry Schwarz

OK, that makes sense.


:) Ok, got that point, too.


Never mind that part. I confused the '\n' (which I remove) and the '\0'.

However, in my original code the amount of memory allocated is
apparently too little. For the string "test" the buffer 'line' holds "t
e s t \n \0" right? After replacing the '\n' with '\0' I allocated
strlen(line)=4 bytes to hold the 4-char-string and copy it to the memory
location with strcpy. What happens in this case with the trailing '\0'?
Obviously there is not enough memory allocated to hold it. Is it still
copied or omitted?

Due to an "error" in the call to malloc, you actually allocate too
much space rather than too little so the situation you describe cannot
occur (at least on most desktop systems in common use).


Remove del for email
 
C

Christoph Scholtes

Hi Barry,

Barry said:
Due to an "error" in the call to malloc, you actually allocate too
much space rather than too little so the situation you describe cannot
occur (at least on most desktop systems in common use).

I dont understand. Which "error" are you referring to? Could you explain
this please.

Thanks,
Chris
 
B

Barry Schwarz

Hi Barry,



I dont understand. Which "error" are you referring to? Could you explain
this please.
Since you elected to snip the code in question and the comment I was
responding to, you will have to settle for a paraphrase.

The original code had something like
char *ptr = malloc(N*sizeof ptr);

Since sizeof ptr is probably >= 4, this allocates almost four times as
much space as intended.

Since the correct amount of space should have been N+1 and the comment
I was responding to talked about allocating only N, I pointed out that
it was unlikely he was only allocating N.


Remove del for email
 
C

Christoph Scholtes

Barry said:
Since you elected to snip the code in question and the comment I was
responding to, you will have to settle for a paraphrase.

The original code had something like
char *ptr = malloc(N*sizeof ptr);

Since sizeof ptr is probably >= 4, this allocates almost four times as
much space as intended.

Since the correct amount of space should have been N+1 and the comment
I was responding to talked about allocating only N, I pointed out that
it was unlikely he was only allocating N.

Thanks Barry. You are right. In fact, I tried to be smart about using
malloc and use the variable with sizeof instead of the variable type but
omitting the * ruined it of course... A mistake that probably wont
happen again. :)

Chris
 
J

Joe Wright

Christoph said:
OK, that makes sense.


:) Ok, got that point, too.


Never mind that part. I confused the '\n' (which I remove) and the '\0'.

However, in my original code the amount of memory allocated is
apparently too little. For the string "test" the buffer 'line' holds "t
e s t \n \0" right? After replacing the '\n' with '\0' I allocated
strlen(line)=4 bytes to hold the 4-char-string and copy it to the memory
location with strcpy. What happens in this case with the trailing '\0'?
Obviously there is not enough memory allocated to hold it. Is it still
copied or omitted?

Chris

First, fgets reads lines from text streams. It reads them into memory as
strings by adding the '\0' at the end of the line. It is the trailing
'\n' that defines the line on the stream and it is the trailing '\0'
that defines the string in memory.

The text stream consists of zero or more lines of zero or more
characters. The lines are terminated with '\n'. There is no rational
case for '\0' occurring in a text stream.
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
474,184
Messages
2,570,976
Members
47,536
Latest member
MistyLough

Latest Threads

Top