comparing binary trees in C

F

Franken Sense

I've been trying to develop more savvy with simple abstract data
structures, in particular with binary trees as I see them in K&R, C
Unleashed, and wiki. I've been able to implement binary trees in C in a
manner that is close to chp. 6 of K&R. With forum comments, I think we
obtained a good result.

I'm now presented with a more abstract implementation and need a lot help
digesting it:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/******************************************************************
* linetree.c, AvK 20090429.
* Create a binary tree for a (text) file.
* Textfile is read from stdin.
* Duplicate lines are detected (and suppressed).
* The final output contains every unique input line
* (in the original order), prefixed by its line number(s)
******************************************************************/
struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};

int diff_mem(void *lp, size_t llen,void *rp, size_t rlen);
void dump_tree(FILE *fp, struct node *ptr);
int node_add(struct node **hnd, unsigned num, char *data, size_t size);
struct node ** node_hnd(struct node **hnd, char * data, size_t size);
struct node *node_eat(struct node **hnd );
struct node *node_sort(struct node *ptr, int (*cmp)(struct node *l,struct
node *r) );
struct node *node_eat(struct node **hnd );
int node_cmp_num(struct node *l,struct node *r);

struct node *root = NULL;
unsigned lineno = 0;
/**
* strcmp for non-strings (unterminated)
*/
int diff_mem(void *lp, size_t lsiz,void *rp, size_t rsiz)
{
int diff=0;
size_t siz;

siz = (lsiz < rsiz) ? lsiz : rsiz;
if (!siz) diff = 0;
else {
diff = memcmp(lp,rp, siz);
if (!diff && lsiz != rsiz) diff = (lsiz < rsiz) ? -1 : 1;
}
return diff;
}

struct node ** node_hnd(struct node **hnd, char * data, size_t size)
{
struct node * tail;
int diff;

while (*hnd) {
for (tail = *hnd; tail->size == 0 ; tail = tail->uni.list) {;}
diff = diff_mem(tail->uni.data, tail->size, data,size);
if (diff < 0) hnd = &(*hnd)->prev ;
else if (diff > 0) hnd = &(*hnd)->next ;
else break;
}
return hnd;
}

int node_add(struct node **hnd, unsigned num, char *data, size_t size)
{
struct node *temp;

hnd = node_hnd(hnd, data, size);

if (!*hnd) {
*hnd = malloc (sizeof **hnd + size);
(*hnd)->num = num;
(*hnd)->next = NULL;
(*hnd)->prev = NULL;
(*hnd)->size = size;
memcpy((*hnd)->uni.data , data, size);
(*hnd)->uni.data[size] = 0; /* for strings ... */
return 0;
}
/* For duplicates, we only need to store one copy of the data.
* The node with the data is at the tail of the list,
* the others use the .list part of the union
* for the linked list of duplicates.
*/
for ( ;(*hnd)->size == 0; hnd = &(*hnd)->uni.list) {;}

/* overwrite tail node and append original tail to it */
temp = *hnd;
*hnd = malloc (sizeof **hnd );
memcpy(*hnd, temp, sizeof *temp);
(*hnd)->size = 0;
(*hnd)->uni.list = temp;
temp->prev = NULL;
temp->next = NULL;
temp->num = num;
temp->size = size;

return 1;
}

struct node *node_eat(struct node **hnd )
{
struct node *tmp;
tmp= *hnd;

if (!tmp) return tmp;
if (tmp->prev) return node_eat(&tmp->prev) ;
*hnd = tmp->next;
tmp->next = NULL;
tmp->prev = NULL;
return tmp;
}

int node_cmp_num(struct node *l,struct node *r)
{
if (!l) return -1;
if (!r) return 1;
if (l->num > r->num ) return 1;
if (l->num < r->num ) return -1;
return 0;
}

struct node *node_sort(struct node *ptr
, int (*cmp)( struct node *l,struct node *r) )
{
struct node *new, *tmp, **hnd;
int diff;

for (new=NULL; (tmp = node_eat( &ptr)) ; ) {
for (hnd= &new; *hnd; ) {
diff = (*cmp)(*hnd,tmp);
hnd= (diff>0) ? &(*hnd)->prev : &(*hnd)->next;
}
*hnd = tmp;
}
return new;
}

void dump_tree(FILE *fp, struct node *ptr)
{
struct node *tmp;

if (!ptr) return;
dump_tree(fp, ptr->prev);

for (tmp=ptr; ; tmp=tmp->uni.list ) {
fprintf(fp, "%u " , tmp->num );
if (tmp->size) break;
}


fprintf(fp, ":%*.*s"
, (int) tmp->size
, (int) tmp->size
, tmp->uni.data
);
dump_tree(fp, ptr->next);
}

#define my_file "text44.txt"

int main(int argc, char *argv[] )
{
size_t len;
char buff[200]; /* no check for length ... */
FILE *fp;

if ((fp = fopen(my_file, "r")) == NULL )
{
fprintf(stderr, "can't open file\n");
exit(EXIT_FAILURE);
}

while (fgets(buff, sizeof buff, fp)) {
len = strlen(buff);
node_add(&root, ++lineno, buff, len);
}
root = node_sort(root, node_cmp_num);

dump_tree(stdout, root);
exit(0);
}

// gcc bt2.c -Wall -o out

I get output right off the bat:

input:

The quick brown
fox jumps over the lazy
dog.
dog.
fox

output:

E:\gfortran\dan>out
1 :The quick brown
2 :fox jumps over the lazy
3 4 :dog.
5 :fox
E:\gfortran\dan>

As far as I can tell, this compiles and behaves. Questions:

q1) In this node, is there any difference between prev and next as opposed
to right and left?

struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};

q2) What's going on in this union?

Thanks for your comment and cheers,
--
Frank

Mistakes are a part of being human. Appreciate your mistakes for what they
are: precious life lessons that can only be learned the hard way. Unless
it's a fatal mistake, which, at least, others can learn from.
~~ Al Franken,
 
F

Franken Sense

In Dread Ink, the Grave Hand of Franken Sense Did Inscribe:
struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};

q2) What's going on in this union?

I guess no takers on this question. When faced with code I don't
understand, I usually start by printing out what the passed values are.
Here I've added statements that print the args passed to node_add:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};

int diff_mem(void *lp, size_t llen,void *rp, size_t rlen);
void dump_tree(FILE *fp, struct node *ptr);
int node_add(struct node **hnd, unsigned num, char *data, size_t size);
struct node ** node_hnd(struct node **hnd, char * data, size_t size);
struct node *node_eat(struct node **hnd );
struct node *node_sort(struct node *ptr, int (*cmp)(struct node *l,struct
node *r) );
struct node *node_eat(struct node **hnd );
int node_cmp_num(struct node *l,struct node *r);

struct node *root = NULL;
unsigned lineno = 0;
/**
* strcmp for non-strings (unterminated)
*/
int diff_mem(void *lp, size_t lsiz,void *rp, size_t rsiz)
{
int diff=0;
size_t siz;

siz = (lsiz < rsiz) ? lsiz : rsiz;
if (!siz) diff = 0;
else {
diff = memcmp(lp,rp, siz);
if (!diff && lsiz != rsiz) diff = (lsiz < rsiz) ? -1 : 1;
}
return diff;
}

struct node ** node_hnd(struct node **hnd, char * data, size_t size)
{
struct node * tail;
int diff;

while (*hnd) {
for (tail = *hnd; tail->size == 0 ; tail = tail->uni.list) {;}
diff = diff_mem(tail->uni.data, tail->size, data,size);
if (diff < 0) hnd = &(*hnd)->prev ;
else if (diff > 0) hnd = &(*hnd)->next ;
else break;
}
return hnd;
}

int node_add(struct node **hnd, unsigned num, char *data, size_t size)
{

printf ("in node_add\n");
printf ("passed args are %p %ud %s %d \n",\
(void *) hnd, num, data, size);
struct node *temp;

hnd = node_hnd(hnd, data, size);

if (!*hnd) {
*hnd = malloc (sizeof **hnd + size);
(*hnd)->num = num;
(*hnd)->next = NULL;
(*hnd)->prev = NULL;
(*hnd)->size = size;
memcpy((*hnd)->uni.data , data, size);
(*hnd)->uni.data[size] = 0; /* for strings ... */
return 0;
}
/* For duplicates, we only need to store one copy of the data.
* The node with the data is at the tail of the list,
* the others use the .list part of the union
* for the linked list of duplicates.
*/
for ( ;(*hnd)->size == 0; hnd = &(*hnd)->uni.list) {;}

/* overwrite tail node and append original tail to it */
temp = *hnd;
*hnd = malloc (sizeof **hnd );
memcpy(*hnd, temp, sizeof *temp);
(*hnd)->size = 0;
(*hnd)->uni.list = temp;
temp->prev = NULL;
temp->next = NULL;
temp->num = num;
temp->size = size;

return 1;
}

struct node *node_eat(struct node **hnd )
{
struct node *tmp;
tmp= *hnd;

if (!tmp) return tmp;
if (tmp->prev) return node_eat(&tmp->prev) ;
*hnd = tmp->next;
tmp->next = NULL;
tmp->prev = NULL;
return tmp;
}

int node_cmp_num(struct node *l,struct node *r)
{
if (!l) return -1;
if (!r) return 1;
if (l->num > r->num ) return 1;
if (l->num < r->num ) return -1;
return 0;
}

struct node *node_sort(struct node *ptr
, int (*cmp)( struct node *l,struct node *r) )
{
struct node *new, *tmp, **hnd;
int diff;

for (new=NULL; (tmp = node_eat( &ptr)) ; ) {
for (hnd= &new; *hnd; ) {
diff = (*cmp)(*hnd,tmp);
hnd= (diff>0) ? &(*hnd)->prev : &(*hnd)->next;
}
*hnd = tmp;
}
return new;
}

void dump_tree(FILE *fp, struct node *ptr)
{
struct node *tmp;

if (!ptr) return;
dump_tree(fp, ptr->prev);

for (tmp=ptr; ; tmp=tmp->uni.list ) {
fprintf(fp, "%u " , tmp->num );
if (tmp->size) break;
}


fprintf(fp, ":%*.*s"
, (int) tmp->size
, (int) tmp->size
, tmp->uni.data
);
dump_tree(fp, ptr->next);
}

#define my_file "text44.txt"

int main(int argc, char *argv[] )
{
size_t len;
char buff[200]; /* no check for length ... */
FILE *fp;

if ((fp = fopen(my_file, "r")) == NULL )
{
fprintf(stderr, "can't open file\n");
exit(EXIT_FAILURE);
}

while (fgets(buff, sizeof buff, fp)) {
len = strlen(buff);
node_add(&root, ++lineno, buff, len);
}
root = node_sort(root, node_cmp_num);

dump_tree(stdout, root);
exit(0);
}

// gcc bt3.c -Wall -o out

output:


E:\gfortran\dan>gcc bt3.c -Wall -o out

E:\gfortran\dan>out
in node_add
passed args are 00404010 1d The quick brown
16
in node_add
passed args are 00404010 2d fox jumps over the lazy
24
in node_add
passed args are 00404010 3d dog.
5
in node_add
passed args are 00404010 4d dog.
5
in node_add
passed args are 00404010 5d fox 3
1 :The quick brown
2 :fox jumps over the lazy
3 4 :dog.
5 :fox
E:\gfortran\dan>

I wouldn't have expected the first argument passed to be the same on
successive calls. Does it vary, and if so, how do I print it properly?

I've been using
http://www.dinkum.com/manuals/?manual=compleat&page=lib_prin.html#Print Functions
as a reference.
--
Frank

The biases the media has are much bigger than conservative or liberal.
They're about getting ratings, about making money, about doing stories that
are easy to cover.
~~ Al Franken,
 
B

Barry Schwarz

I've been trying to develop more savvy with simple abstract data
structures, in particular with binary trees as I see them in K&R, C
Unleashed, and wiki. I've been able to implement binary trees in C in a
manner that is close to chp. 6 of K&R. With forum comments, I think we
obtained a good result.

I'm now presented with a more abstract implementation and need a lot help
digesting it:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/******************************************************************
* linetree.c, AvK 20090429.
* Create a binary tree for a (text) file.
* Textfile is read from stdin.
* Duplicate lines are detected (and suppressed).
* The final output contains every unique input line
* (in the original order), prefixed by its line number(s)
******************************************************************/
snip

struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};

q2) What's going on in this union?

Exactly what the comment says. If you have multiple nodes (lines)
with the same text, instead of copying the data into each node, the
designer has taken a shortcut to save memory. The first node with the
data will contain the data starting at &data[0] and extending as far
beyond the end of the structure as necessary (legal because malloc
allocated the extra space). Subsequent nodes with the same data do
not contain the data. Instead, a pointer to the first node with the
data is stored in list.

This poses an interesting problem if a "first" node is ever deleted
from the list. You have to find a node that points to the first, move
the data into this node, and change every other pointer to the first
node to point to the new "primary" node.
 
F

Franken Sense

In Dread Ink, the Grave Hand of Barry Schwarz Did Inscribe:
I've been trying to develop more savvy with simple abstract data
structures, in particular with binary trees as I see them in K&R, C
Unleashed, and wiki. I've been able to implement binary trees in C in a
manner that is close to chp. 6 of K&R. With forum comments, I think we
obtained a good result.

I'm now presented with a more abstract implementation and need a lot help
digesting it:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/******************************************************************
* linetree.c, AvK 20090429.
* Create a binary tree for a (text) file.
* Textfile is read from stdin.
* Duplicate lines are detected (and suppressed).
* The final output contains every unique input line
* (in the original order), prefixed by its line number(s)
******************************************************************/
snip

struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};

q2) What's going on in this union?

Exactly what the comment says. If you have multiple nodes (lines)
with the same text, instead of copying the data into each node, the
designer has taken a shortcut to save memory. The first node with the
data will contain the data starting at &data[0] and extending as far
beyond the end of the structure as necessary (legal because malloc
allocated the extra space). Subsequent nodes with the same data do
not contain the data. Instead, a pointer to the first node with the
data is stored in list.

This poses an interesting problem if a "first" node is ever deleted
from the list. You have to find a node that points to the first, move
the data into this node, and change every other pointer to the first
node to point to the new "primary" node.

I don't think I'm so wild about this shortcut.

So this union consists of alternatively: a pointer to the first character
in data or a pointer to what?
--
Frank

When you encounter seemingly good advice that contradicts other seemingly
good advice, ignore them both.
~~ Al Franken,
 
J

James Kuyper

Franken said:
In Dread Ink, the Grave Hand of Barry Schwarz Did Inscribe:
On Fri, 1 May 2009 01:51:37 -0700, Franken Sense
I'm now presented with a more abstract implementation and need a lot help
digesting it:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/******************************************************************
* linetree.c, AvK 20090429.
* Create a binary tree for a (text) file.
* Textfile is read from stdin.
* Duplicate lines are detected (and suppressed).
* The final output contains every unique input line
* (in the original order), prefixed by its line number(s)
******************************************************************/ snip

struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};

q2) What's going on in this union?
Exactly what the comment says. If you have multiple nodes (lines)
with the same text, instead of copying the data into each node, the
designer has taken a shortcut to save memory. The first node with the
data will contain the data starting at &data[0] and extending as far
beyond the end of the structure as necessary (legal because malloc
allocated the extra space). ...

That works on many implementations, for the same reason the struct hack
works on those same implementations, but it has undefined behavior
because you're accessing an array beyond it's declared length. In C99,
flexible array members were added, which provide much the same
functionality, with the added advantage that they're actually guaranteed
to work by the standard. However, a flexible array member must have no
declared length, and can only occur as the last member of a structure.
It can't be a member of the union which is the last member; the flexible
array member itself has to be the last member.
... Subsequent nodes with the same data do

I don't think I'm so wild about this shortcut.

So this union consists of alternatively: a pointer to the first character
in data or a pointer to what?

No, this consists either of

a) an array named data which is declared as if it were only one byte in
length, but is treated as if it had the full length allowed to it by the
amount of memory allocated by the call to malloc() that created the node.

ob b) a pointer to a struct node.
 
C

CBFalconer

James said:
Franken said:
Barry Schwarz Did Inscribe:
.... snip ...

struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};

q2) What's going on in this union?

Exactly what the comment says. If you have multiple nodes
(lines) with the same text, instead of copying the data into
each node, the designer has taken a shortcut to save memory.
The first node with the data will contain the data starting at
&data[0] and extending as far beyond the end of the structure
as necessary (legal because malloc allocated the extra space).

That works on many implementations, for the same reason the
struct hack works on those same implementations, but it has
undefined behavior because you're accessing an array beyond it's
declared length. In C99, flexible array members were added,
which provide much the same functionality, with the added
advantage that they're actually guaranteed to work by the
standard. However, a flexible array member must have no declared
length, and can only occur as the last member of a structure. It
can't be a member of the union which is the last member; the
flexible array member itself has to be the last member.

However making that structure legal under C99 gives up the ability
to compile under C90. I believe that ALL C90 compilers have been
found, so far, to accept the struct hack. This isn't a reason to
avoid flexible arrays, but is something to keep in mind, and
possibly comment on in the source.
 
J

James Kuyper

CBFalconer said:
James Kuyper wrote: ....

However making that structure legal under C99 gives up the ability
to compile under C90. I believe that ALL C90 compilers have been
found, so far, to accept the struct hack. This isn't a reason to
avoid flexible arrays, but is something to keep in mind, and
possibly comment on in the source.

In my entire career I've only used the struct hack a handful of times.
So far I've only actually used flexible arrays once in code that was
intended for any purpose other than learning about C99. When I did so, I
used a test on __STDC_VERSION__ to determine whether or not flexible
arrays could be counted on. If not, the macro that I used for the array
length was defined as 1. However, if they were, it was simply defined
without being given a value. That's pretty much all that was needed.
 
L

luserXtrog

In Dread Ink, the Grave Hand of James Kuyper Did Inscribe:
No, this consists either of
a) an array named data which is declared as if it were only one byte in
length, but is treated as if it had the full length allowed to it by the
amount of memory allocated by the call to malloc() that created the node.
ob b) a pointer to a struct node.

I'd certainly like to know more about this thing called a struct hack, but
I need to know what happens in other parts of this program before I can put
the picture together.

I would think that the first argument to add_node would vary between calls,
but I'm not seeing it:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct node {
        struct node *prev;
        struct node *next;
        unsigned num;
        size_t size;
        union   {
                char data[1];

The struct hack is this data[1] mumbo jumbo.
You and I both know that it's gonna hold more than one
character. It works because extra bytes are requested
from malloc.
It's similar, but safer, than this sort of thing:

char a[2] = "a", b[3] = "cd";
a[1] = 'b';
printf("%s\n", a);

Can you see how this would probably work?
Now forget you ever saw this and don't tell anybody about it.
Don't ever do something like this!
                struct node *list;
                } uni;
        };

int diff_mem(void *lp, size_t llen,void *rp, size_t rlen);
void dump_tree(FILE *fp, struct node *ptr);
int node_add(struct node **hnd, unsigned num, char *data, size_t size);
struct node ** node_hnd(struct node **hnd, char * data, size_t size);
struct node *node_eat(struct node **hnd );
struct node *node_sort(struct node *ptr, int (*cmp)(struct node *l,struct
node *r) );
struct node *node_eat(struct node **hnd );
int node_cmp_num(struct node *l,struct node *r);

struct node *root = NULL;
unsigned lineno = 0;

I really don't like the placement of these globals.
I can't seem to find them when I'm looking for them.
In my opinion these deserve a far more prominent position.
/**
 * strcmp for non-strings (unterminated)
 */
int diff_mem(void *lp, size_t lsiz,void *rp, size_t rsiz)
{
int diff=0;
size_t siz;

siz = (lsiz < rsiz) ? lsiz : rsiz;
if (!siz) diff = 0;
else    {
        diff = memcmp(lp,rp, siz);
        if (!diff && lsiz != rsiz) diff = (lsiz < rsiz) ? -1 : 1;
        }
return diff;

}

struct node ** node_hnd(struct node **hnd, char * data, size_t size)
{
struct node * tail;
int diff;

while (*hnd) {
        for (tail = *hnd; tail->size == 0 ; tail = tail->uni.list) {;}
        diff = diff_mem(tail->uni.data, tail->size, data,size);
        if (diff < 0) hnd = &(*hnd)->prev ;
        else if (diff > 0) hnd = &(*hnd)->next ;
        else break;
        }
return hnd;

}

int node_add(struct node **hnd, unsigned num, char *data, size_t size)
{

printf ("in node_add\n");
printf ("passed args are %p %ud %s %d \n",\
   (void *) hnd, num, data, size);
struct node *temp;

hnd = node_hnd(hnd, data, size);

if (!*hnd) {
        *hnd = malloc (sizeof **hnd + size);
        (*hnd)->num = num;
        (*hnd)->next = NULL;
        (*hnd)->prev = NULL;
        (*hnd)->size = size;
        memcpy((*hnd)->uni.data , data, size);
        (*hnd)->uni.data[size] = 0; /* for strings ... */
        return 0;
        }
/* For duplicates, we only need to store one copy of the data.
 * The node with the data is at the tail of the list,
 * the others use the .list part of the union
 * for the linked list of duplicates.
 */
for ( ;(*hnd)->size == 0; hnd = &(*hnd)->uni.list) {;}

/* overwrite tail node and append original tail to it */
temp = *hnd;
*hnd = malloc (sizeof **hnd );
memcpy(*hnd, temp, sizeof *temp);
(*hnd)->size = 0;
(*hnd)->uni.list = temp;
temp->prev = NULL;
temp->next = NULL;
temp->num = num;
temp->size = size;

return 1;

}

struct node *node_eat(struct node **hnd )
{
struct node *tmp;
tmp= *hnd;

if (!tmp) return tmp;
if (tmp->prev) return node_eat(&tmp->prev) ;
*hnd = tmp->next;
tmp->next = NULL;
tmp->prev = NULL;
return tmp;

}

int node_cmp_num(struct node *l,struct node *r)
{
if (!l) return -1;
if (!r) return 1;
if (l->num > r->num ) return 1;
if (l->num < r->num ) return -1;
return 0;

}

struct node *node_sort(struct node *ptr
        , int (*cmp)( struct node *l,struct node *r) )
{
struct node *new, *tmp, **hnd;
int diff;

for (new=NULL; (tmp = node_eat( &ptr)) ;    ) {
        for (hnd= &new; *hnd;       ) {
                diff = (*cmp)(*hnd,tmp);
                hnd= (diff>0) ?  &(*hnd)->prev : &(*hnd)->next;
                }
        *hnd = tmp;
        }
return new;

}

void dump_tree(FILE *fp, struct node *ptr)
{
struct node *tmp;

if (!ptr) return;
dump_tree(fp, ptr->prev);

for (tmp=ptr; ; tmp=tmp->uni.list ) {
        fprintf(fp, "%u " , tmp->num );
        if (tmp->size) break;
        }

fprintf(fp, ":%*.*s"
        , (int) tmp->size
        , (int) tmp->size
        , tmp->uni.data
        );
dump_tree(fp, ptr->next);

}

#define my_file "text44.txt"

int main(int argc, char *argv[] )
{
size_t len;
char buff[200]; /* no check for length ... */
FILE *fp;

if ((fp = fopen(my_file, "r")) == NULL )  
    {
      fprintf(stderr, "can't open file\n");
      exit(EXIT_FAILURE);
    }

while (fgets(buff, sizeof buff, fp)) {
        len = strlen(buff);
        node_add(&root, ++lineno, buff, len);

&root is different from (void *)root. But why would you
expect it to change? It's just a pointer. It changes
on the first call when it gets allocated, but after
that the changes appear to be occuring in the leaves
and branches. Uprooting a tree is a drastic step in
any universe of discourse.
 
F

Franken Sense

In Dread Ink, the Grave Hand of James Kuyper Did Inscribe:

No, this consists either of

a) an array named data which is declared as if it were only one byte in
length, but is treated as if it had the full length allowed to it by the
amount of memory allocated by the call to malloc() that created the node.

ob b) a pointer to a struct node.

I'd certainly like to know more about this thing called a struct hack, but
I need to know what happens in other parts of this program before I can put
the picture together.

I would think that the first argument to add_node would vary between calls,
but I'm not seeing it:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};

int diff_mem(void *lp, size_t llen,void *rp, size_t rlen);
void dump_tree(FILE *fp, struct node *ptr);
int node_add(struct node **hnd, unsigned num, char *data, size_t size);
struct node ** node_hnd(struct node **hnd, char * data, size_t size);
struct node *node_eat(struct node **hnd );
struct node *node_sort(struct node *ptr, int (*cmp)(struct node *l,struct
node *r) );
struct node *node_eat(struct node **hnd );
int node_cmp_num(struct node *l,struct node *r);

struct node *root = NULL;
unsigned lineno = 0;
/**
* strcmp for non-strings (unterminated)
*/
int diff_mem(void *lp, size_t lsiz,void *rp, size_t rsiz)
{
int diff=0;
size_t siz;

siz = (lsiz < rsiz) ? lsiz : rsiz;
if (!siz) diff = 0;
else {
diff = memcmp(lp,rp, siz);
if (!diff && lsiz != rsiz) diff = (lsiz < rsiz) ? -1 : 1;
}
return diff;
}

struct node ** node_hnd(struct node **hnd, char * data, size_t size)
{
struct node * tail;
int diff;

while (*hnd) {
for (tail = *hnd; tail->size == 0 ; tail = tail->uni.list) {;}
diff = diff_mem(tail->uni.data, tail->size, data,size);
if (diff < 0) hnd = &(*hnd)->prev ;
else if (diff > 0) hnd = &(*hnd)->next ;
else break;
}
return hnd;
}

int node_add(struct node **hnd, unsigned num, char *data, size_t size)
{

printf ("in node_add\n");
printf ("passed args are %p %ud %s %d \n",\
(void *) hnd, num, data, size);
struct node *temp;

hnd = node_hnd(hnd, data, size);

if (!*hnd) {
*hnd = malloc (sizeof **hnd + size);
(*hnd)->num = num;
(*hnd)->next = NULL;
(*hnd)->prev = NULL;
(*hnd)->size = size;
memcpy((*hnd)->uni.data , data, size);
(*hnd)->uni.data[size] = 0; /* for strings ... */
return 0;
}
/* For duplicates, we only need to store one copy of the data.
* The node with the data is at the tail of the list,
* the others use the .list part of the union
* for the linked list of duplicates.
*/
for ( ;(*hnd)->size == 0; hnd = &(*hnd)->uni.list) {;}

/* overwrite tail node and append original tail to it */
temp = *hnd;
*hnd = malloc (sizeof **hnd );
memcpy(*hnd, temp, sizeof *temp);
(*hnd)->size = 0;
(*hnd)->uni.list = temp;
temp->prev = NULL;
temp->next = NULL;
temp->num = num;
temp->size = size;

return 1;
}

struct node *node_eat(struct node **hnd )
{
struct node *tmp;
tmp= *hnd;

if (!tmp) return tmp;
if (tmp->prev) return node_eat(&tmp->prev) ;
*hnd = tmp->next;
tmp->next = NULL;
tmp->prev = NULL;
return tmp;
}

int node_cmp_num(struct node *l,struct node *r)
{
if (!l) return -1;
if (!r) return 1;
if (l->num > r->num ) return 1;
if (l->num < r->num ) return -1;
return 0;
}

struct node *node_sort(struct node *ptr
, int (*cmp)( struct node *l,struct node *r) )
{
struct node *new, *tmp, **hnd;
int diff;

for (new=NULL; (tmp = node_eat( &ptr)) ; ) {
for (hnd= &new; *hnd; ) {
diff = (*cmp)(*hnd,tmp);
hnd= (diff>0) ? &(*hnd)->prev : &(*hnd)->next;
}
*hnd = tmp;
}
return new;
}

void dump_tree(FILE *fp, struct node *ptr)
{
struct node *tmp;

if (!ptr) return;
dump_tree(fp, ptr->prev);

for (tmp=ptr; ; tmp=tmp->uni.list ) {
fprintf(fp, "%u " , tmp->num );
if (tmp->size) break;
}


fprintf(fp, ":%*.*s"
, (int) tmp->size
, (int) tmp->size
, tmp->uni.data
);
dump_tree(fp, ptr->next);
}

#define my_file "text44.txt"

int main(int argc, char *argv[] )
{
size_t len;
char buff[200]; /* no check for length ... */
FILE *fp;

if ((fp = fopen(my_file, "r")) == NULL )
{
fprintf(stderr, "can't open file\n");
exit(EXIT_FAILURE);
}

while (fgets(buff, sizeof buff, fp)) {
len = strlen(buff);
node_add(&root, ++lineno, buff, len);
printf ("in main \n");
printf ("passed args are %p %ud %s %d \n",\
(void *)root, lineno, buff, len);
}
root = node_sort(root, node_cmp_num);

dump_tree(stdout, root);
exit(0);
}

// gcc bt4.c -Wall -o out


E:\gfortran\dan>gcc bt4.c -Wall -o out

E:\gfortran\dan>out
in node_add
passed args are 00404010 1d The quick brown
16
in main
passed args are 003D2508 1d The quick brown
16
in node_add
passed args are 00404010 2d fox jumps over the lazy
24
in main
passed args are 003D2508 2d fox jumps over the lazy
24
in node_add
passed args are 00404010 3d dog.
5
in main
passed args are 003D2508 3d dog.
5
in node_add
passed args are 00404010 4d dog.
5
in main
passed args are 003D2508 4d dog.
5
in node_add
passed args are 00404010 5d fox 3
in main
passed args are 003D2508 5d fox 3
1 :The quick brown
2 :fox jumps over the lazy
3 4 :dog.
5 :fox
E:\gfortran\dan>

(?)
--
Frank

And by the way, a few months ago, I trademarked the word 'funny.' So when
Fox calls me 'unfunny,' they're violating my trademark. I am seriously
considering a countersuit.
~~ Al Franken, in response to Fox's copyright infringement lawsuit
 
B

Barry Schwarz

On Mon, 4 May 2009 22:26:26 -0700, Franken Sense

snip
I would think that the first argument to add_node would vary between calls,
but I'm not seeing it:

There is no function add_node in your code.

If you meant node_add, you call it in only one place and the argument
is &root. The address of root never changes so why would you think
the argument should?

Perhaps you meant the contents of root should change. In that case,
you printf argument should be (void*)*hnd.

All of which begs the question of why root is at file scope.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};

snip prototypes
struct node *root = NULL;
unsigned lineno = 0;
snip

int node_add(struct node **hnd, unsigned num, char *data, size_t size)
{

printf ("in node_add\n");
printf ("passed args are %p %ud %s %d \n",\
(void *) hnd, num, data, size);
struct node *temp;

If you want help from people with C89 compilers, you would do well to
put your declarations before your statements.


snip rest of function

snip other functions
int main(int argc, char *argv[] )
{
size_t len;
char buff[200]; /* no check for length ... */
FILE *fp;

if ((fp = fopen(my_file, "r")) == NULL )
{
fprintf(stderr, "can't open file\n");
exit(EXIT_FAILURE);
}

while (fgets(buff, sizeof buff, fp)) {
len = strlen(buff);
node_add(&root, ++lineno, buff, len);
printf ("in main \n");
printf ("passed args are %p %ud %s %d \n",\
(void *)root, lineno, buff, len);
}

This invokes undefined behavior is size_t has rank greater than int.
root = node_sort(root, node_cmp_num);

dump_tree(stdout, root);
exit(0);
}

snip
 
F

Franken Sense

In Dread Ink, the Grave Hand of Barry Schwarz Did Inscribe:
If you want help from people with C89 compilers, you would do well to
put your declarations before your statements.

Ok. This version was too flawed for me to want to continue with it. I'm
left to wonder how to turn the node into a linked list.
 
F

Franken Sense

In Dread Ink, the Grave Hand of Barry Schwarz Did Inscribe:
struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};

q2) What's going on in this union?

Exactly what the comment says. If you have multiple nodes (lines)
with the same text, instead of copying the data into each node, the
designer has taken a shortcut to save memory. The first node with the
data will contain the data starting at &data[0] and extending as far
beyond the end of the structure as necessary (legal because malloc
allocated the extra space). Subsequent nodes with the same data do
not contain the data. Instead, a pointer to the first node with the
data is stored in list.

This poses an interesting problem if a "first" node is ever deleted
from the list. You have to find a node that points to the first, move
the data into this node, and change every other pointer to the first
node to point to the new "primary" node.

struct node ** node_hnd(struct node **hnd, char * data, size_t size)
{
struct node * tail;
int diff;

while (*hnd) {
for (tail = *hnd; tail->size == 0 ; tail = tail->uni.list) {;}
diff = diff_mem(tail->uni.data, tail->size, data,size);
if (diff < 0) hnd = &(*hnd)->prev ;
else if (diff > 0) hnd = &(*hnd)->next ;
else break;
}
return hnd;

What happens in this function?
--
Frank

Mistakes are a part of being human. Appreciate your mistakes for what they
are: precious life lessons that can only be learned the hard way. Unless
it's a fatal mistake, which, at least, others can learn from.
~~ Al Franken,
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Similar Threads

Queue in C 25
bench: binary trees 10
Infinite loop problem 1
BST insertion 1
Adding adressing of IPv6 to program 1
A Binary Tree in C 18
URGENT 1
Is there in memory tree structure faster than std::map? 11

Members online

No members online now.

Forum statistics

Threads
473,954
Messages
2,570,116
Members
46,704
Latest member
BernadineF

Latest Threads

Top