F
Franken Sense
I've been trying to develop more savvy with simple abstract data
structures, in particular with binary trees as I see them in K&R, C
Unleashed, and wiki. I've been able to implement binary trees in C in a
manner that is close to chp. 6 of K&R. With forum comments, I think we
obtained a good result.
I'm now presented with a more abstract implementation and need a lot help
digesting it:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/******************************************************************
* linetree.c, AvK 20090429.
* Create a binary tree for a (text) file.
* Textfile is read from stdin.
* Duplicate lines are detected (and suppressed).
* The final output contains every unique input line
* (in the original order), prefixed by its line number(s)
******************************************************************/
struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};
int diff_mem(void *lp, size_t llen,void *rp, size_t rlen);
void dump_tree(FILE *fp, struct node *ptr);
int node_add(struct node **hnd, unsigned num, char *data, size_t size);
struct node ** node_hnd(struct node **hnd, char * data, size_t size);
struct node *node_eat(struct node **hnd );
struct node *node_sort(struct node *ptr, int (*cmp)(struct node *l,struct
node *r) );
struct node *node_eat(struct node **hnd );
int node_cmp_num(struct node *l,struct node *r);
struct node *root = NULL;
unsigned lineno = 0;
/**
* strcmp for non-strings (unterminated)
*/
int diff_mem(void *lp, size_t lsiz,void *rp, size_t rsiz)
{
int diff=0;
size_t siz;
siz = (lsiz < rsiz) ? lsiz : rsiz;
if (!siz) diff = 0;
else {
diff = memcmp(lp,rp, siz);
if (!diff && lsiz != rsiz) diff = (lsiz < rsiz) ? -1 : 1;
}
return diff;
}
struct node ** node_hnd(struct node **hnd, char * data, size_t size)
{
struct node * tail;
int diff;
while (*hnd) {
for (tail = *hnd; tail->size == 0 ; tail = tail->uni.list) {;}
diff = diff_mem(tail->uni.data, tail->size, data,size);
if (diff < 0) hnd = &(*hnd)->prev ;
else if (diff > 0) hnd = &(*hnd)->next ;
else break;
}
return hnd;
}
int node_add(struct node **hnd, unsigned num, char *data, size_t size)
{
struct node *temp;
hnd = node_hnd(hnd, data, size);
if (!*hnd) {
*hnd = malloc (sizeof **hnd + size);
(*hnd)->num = num;
(*hnd)->next = NULL;
(*hnd)->prev = NULL;
(*hnd)->size = size;
memcpy((*hnd)->uni.data , data, size);
(*hnd)->uni.data[size] = 0; /* for strings ... */
return 0;
}
/* For duplicates, we only need to store one copy of the data.
* The node with the data is at the tail of the list,
* the others use the .list part of the union
* for the linked list of duplicates.
*/
for ( ;(*hnd)->size == 0; hnd = &(*hnd)->uni.list) {;}
/* overwrite tail node and append original tail to it */
temp = *hnd;
*hnd = malloc (sizeof **hnd );
memcpy(*hnd, temp, sizeof *temp);
(*hnd)->size = 0;
(*hnd)->uni.list = temp;
temp->prev = NULL;
temp->next = NULL;
temp->num = num;
temp->size = size;
return 1;
}
struct node *node_eat(struct node **hnd )
{
struct node *tmp;
tmp= *hnd;
if (!tmp) return tmp;
if (tmp->prev) return node_eat(&tmp->prev) ;
*hnd = tmp->next;
tmp->next = NULL;
tmp->prev = NULL;
return tmp;
}
int node_cmp_num(struct node *l,struct node *r)
{
if (!l) return -1;
if (!r) return 1;
if (l->num > r->num ) return 1;
if (l->num < r->num ) return -1;
return 0;
}
struct node *node_sort(struct node *ptr
, int (*cmp)( struct node *l,struct node *r) )
{
struct node *new, *tmp, **hnd;
int diff;
for (new=NULL; (tmp = node_eat( &ptr)) ; ) {
for (hnd= &new; *hnd; ) {
diff = (*cmp)(*hnd,tmp);
hnd= (diff>0) ? &(*hnd)->prev : &(*hnd)->next;
}
*hnd = tmp;
}
return new;
}
void dump_tree(FILE *fp, struct node *ptr)
{
struct node *tmp;
if (!ptr) return;
dump_tree(fp, ptr->prev);
for (tmp=ptr; ; tmp=tmp->uni.list ) {
fprintf(fp, "%u " , tmp->num );
if (tmp->size) break;
}
fprintf(fp, ":%*.*s"
, (int) tmp->size
, (int) tmp->size
, tmp->uni.data
);
dump_tree(fp, ptr->next);
}
#define my_file "text44.txt"
int main(int argc, char *argv[] )
{
size_t len;
char buff[200]; /* no check for length ... */
FILE *fp;
if ((fp = fopen(my_file, "r")) == NULL )
{
fprintf(stderr, "can't open file\n");
exit(EXIT_FAILURE);
}
while (fgets(buff, sizeof buff, fp)) {
len = strlen(buff);
node_add(&root, ++lineno, buff, len);
}
root = node_sort(root, node_cmp_num);
dump_tree(stdout, root);
exit(0);
}
// gcc bt2.c -Wall -o out
I get output right off the bat:
input:
The quick brown
fox jumps over the lazy
dog.
dog.
fox
output:
E:\gfortran\dan>out
1 :The quick brown
2 :fox jumps over the lazy
3 4 :dog.
5 :fox
E:\gfortran\dan>
As far as I can tell, this compiles and behaves. Questions:
q1) In this node, is there any difference between prev and next as opposed
to right and left?
struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};
q2) What's going on in this union?
Thanks for your comment and cheers,
--
Frank
Mistakes are a part of being human. Appreciate your mistakes for what they
are: precious life lessons that can only be learned the hard way. Unless
it's a fatal mistake, which, at least, others can learn from.
~~ Al Franken,
structures, in particular with binary trees as I see them in K&R, C
Unleashed, and wiki. I've been able to implement binary trees in C in a
manner that is close to chp. 6 of K&R. With forum comments, I think we
obtained a good result.
I'm now presented with a more abstract implementation and need a lot help
digesting it:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/******************************************************************
* linetree.c, AvK 20090429.
* Create a binary tree for a (text) file.
* Textfile is read from stdin.
* Duplicate lines are detected (and suppressed).
* The final output contains every unique input line
* (in the original order), prefixed by its line number(s)
******************************************************************/
struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};
int diff_mem(void *lp, size_t llen,void *rp, size_t rlen);
void dump_tree(FILE *fp, struct node *ptr);
int node_add(struct node **hnd, unsigned num, char *data, size_t size);
struct node ** node_hnd(struct node **hnd, char * data, size_t size);
struct node *node_eat(struct node **hnd );
struct node *node_sort(struct node *ptr, int (*cmp)(struct node *l,struct
node *r) );
struct node *node_eat(struct node **hnd );
int node_cmp_num(struct node *l,struct node *r);
struct node *root = NULL;
unsigned lineno = 0;
/**
* strcmp for non-strings (unterminated)
*/
int diff_mem(void *lp, size_t lsiz,void *rp, size_t rsiz)
{
int diff=0;
size_t siz;
siz = (lsiz < rsiz) ? lsiz : rsiz;
if (!siz) diff = 0;
else {
diff = memcmp(lp,rp, siz);
if (!diff && lsiz != rsiz) diff = (lsiz < rsiz) ? -1 : 1;
}
return diff;
}
struct node ** node_hnd(struct node **hnd, char * data, size_t size)
{
struct node * tail;
int diff;
while (*hnd) {
for (tail = *hnd; tail->size == 0 ; tail = tail->uni.list) {;}
diff = diff_mem(tail->uni.data, tail->size, data,size);
if (diff < 0) hnd = &(*hnd)->prev ;
else if (diff > 0) hnd = &(*hnd)->next ;
else break;
}
return hnd;
}
int node_add(struct node **hnd, unsigned num, char *data, size_t size)
{
struct node *temp;
hnd = node_hnd(hnd, data, size);
if (!*hnd) {
*hnd = malloc (sizeof **hnd + size);
(*hnd)->num = num;
(*hnd)->next = NULL;
(*hnd)->prev = NULL;
(*hnd)->size = size;
memcpy((*hnd)->uni.data , data, size);
(*hnd)->uni.data[size] = 0; /* for strings ... */
return 0;
}
/* For duplicates, we only need to store one copy of the data.
* The node with the data is at the tail of the list,
* the others use the .list part of the union
* for the linked list of duplicates.
*/
for ( ;(*hnd)->size == 0; hnd = &(*hnd)->uni.list) {;}
/* overwrite tail node and append original tail to it */
temp = *hnd;
*hnd = malloc (sizeof **hnd );
memcpy(*hnd, temp, sizeof *temp);
(*hnd)->size = 0;
(*hnd)->uni.list = temp;
temp->prev = NULL;
temp->next = NULL;
temp->num = num;
temp->size = size;
return 1;
}
struct node *node_eat(struct node **hnd )
{
struct node *tmp;
tmp= *hnd;
if (!tmp) return tmp;
if (tmp->prev) return node_eat(&tmp->prev) ;
*hnd = tmp->next;
tmp->next = NULL;
tmp->prev = NULL;
return tmp;
}
int node_cmp_num(struct node *l,struct node *r)
{
if (!l) return -1;
if (!r) return 1;
if (l->num > r->num ) return 1;
if (l->num < r->num ) return -1;
return 0;
}
struct node *node_sort(struct node *ptr
, int (*cmp)( struct node *l,struct node *r) )
{
struct node *new, *tmp, **hnd;
int diff;
for (new=NULL; (tmp = node_eat( &ptr)) ; ) {
for (hnd= &new; *hnd; ) {
diff = (*cmp)(*hnd,tmp);
hnd= (diff>0) ? &(*hnd)->prev : &(*hnd)->next;
}
*hnd = tmp;
}
return new;
}
void dump_tree(FILE *fp, struct node *ptr)
{
struct node *tmp;
if (!ptr) return;
dump_tree(fp, ptr->prev);
for (tmp=ptr; ; tmp=tmp->uni.list ) {
fprintf(fp, "%u " , tmp->num );
if (tmp->size) break;
}
fprintf(fp, ":%*.*s"
, (int) tmp->size
, (int) tmp->size
, tmp->uni.data
);
dump_tree(fp, ptr->next);
}
#define my_file "text44.txt"
int main(int argc, char *argv[] )
{
size_t len;
char buff[200]; /* no check for length ... */
FILE *fp;
if ((fp = fopen(my_file, "r")) == NULL )
{
fprintf(stderr, "can't open file\n");
exit(EXIT_FAILURE);
}
while (fgets(buff, sizeof buff, fp)) {
len = strlen(buff);
node_add(&root, ++lineno, buff, len);
}
root = node_sort(root, node_cmp_num);
dump_tree(stdout, root);
exit(0);
}
// gcc bt2.c -Wall -o out
I get output right off the bat:
input:
The quick brown
fox jumps over the lazy
dog.
dog.
fox
output:
E:\gfortran\dan>out
1 :The quick brown
2 :fox jumps over the lazy
3 4 :dog.
5 :fox
E:\gfortran\dan>
As far as I can tell, this compiles and behaves. Questions:
q1) In this node, is there any difference between prev and next as opposed
to right and left?
struct node {
struct node *prev;
struct node *next;
unsigned num;
size_t size;
union {
char data[1];
struct node *list;
} uni;
};
q2) What's going on in this union?
Thanks for your comment and cheers,
--
Frank
Mistakes are a part of being human. Appreciate your mistakes for what they
are: precious life lessons that can only be learned the hard way. Unless
it's a fatal mistake, which, at least, others can learn from.
~~ Al Franken,