huffman encoder

A

aarklon

Hi all,

this is the program which I saw in my colleagues book. the program was
run on turbo C++ 3.0 compiler



/*beginning of header file huff.h*/
#ifndef _HUFF_H_
#define _HUFF_H_

#include <io.h>
#include <conio.h>
#include<stdio.h>

typedef struct node
{
unsigned char c;
unsigned long int freq;
struct node *up,*left,*right;
}sn;

typedef struct ftable
{
unsigned long int freq;
}sft;

/*global variables*/
int buf[50],bc;
sft ft;
sn leaf[256];
sn *a,*t[256];

/*function prototypes*/

int initialize(char *);
/* operation: initialization function
post conditions: initializes all leaves to null values,barring the
character whose
frequency they point to.sets frequency values of the leaves
*/
int sortnode(int);
/* operation: sorting function
post conditions: sorts the pointer nodes in the decreasing order of
frequency*/

int getnodecount();
/* operation: get count of nodes with non zero frequency
post conditions: */

int createtree();
/* operation: creating the tree
post conditions: creats a non optimal huffman tree
to generate prefix codes
*/

int comparenode(sn*,sn*);
/* operation: node comparison
post conditions: returns 0 if both pointers point to same data
else returns -1
*/

void addtobuffer(int);
/* operation: initializing the bit buffer
post conditions: holds prefix codes for each leave nodes
*/

void refreshbuffer(FILE *);
/* operation: writing the coded prefix character
post conditions:
*/


/*void freetree(sn*);*/
char* getfilename(char *);
/* operation: obtaining the file name
post conditions: splits the file path to obtain the file name
*/

unsigned char getoddchar();
/* operation: same as that of refreshbuffer
pre conditions: bit buffer with less than 8 bits
*/

#endif

/*end of header file*/

/*
static version of huffman coding.
it is by no means optimal
compresses files above 3.25 kb
upper limit has not been determined upto 50K OK
usage of global variables
imperfect coding
usage of non standard functions lack of garbage collector leaves much
to be desired it might eat your memory if u are running this program on
old dos machines
*/

#include"huff.h"
int main(void)
{
FILE *fp,*fq;
int ch,i;
char fname[100],efile[100];
unsigned long int fsize,efsize;

clrscr(); /*non standard fn*/
printf("\nHuffman encoder");
printf("\nEnter the name of the input file(to be compressed):");
fgets(fname,100,stdin);
fname[strlen(fname)-1]=0;

printf("\nEnter the output filename(compressed file):");
fgets(efile,100,stdin);
efile[strlen(efile)-1]=0;

if(initialize(fname)==-1)
{
printf("\nError Could not open input file..");
return -1;
}
printf("\nInitialization over.\nPreparing to compress...");
if(createtree()==-1)
{
printf("\nMemory allocation error..");
return -1;
}
fq=fopen(efile,"wb");
if(!fq)
{
printf("\nError Could not open output file..");
fclose(fq);
return -1;
}
fp=fopen(fname,"rb");
if(!fp)
{
printf("\nError Could not open input file...");
fclose(fp);
return -1;
}
fsize=filelength(fileno(fp)); /*non std fn*/
/****To write the decoding table */
for(i=0;i<256;i++)
{
ft.freq=leaf.freq;
fwrite(&ft.freq,sizeof(struct ftable),1,fq);
}
/*To write the character that denotes the size of filenamelength*/
fputc(strlen(getfilename(fname))+1,fq);
/*To write the filename*/
fwrite(getfilename(fname),strlen(getfilename(fname))+1,1,fq);
/***Completed writing of decoding table*****/
printf("\nCompressing...");
while(ch=fgetc(fp),ch!=EOF)
{
addtobuffer(ch);
refreshbuffer(fq);
}
fputc(getoddchar(),fq);
fputc(bc,fq);
fclose(fq);
printf("\nCompression complete.");
/*****For display of compression summary****/
fp=fopen(efile,"rb");
if(!fp)
{
printf("\nCould not open output file for analysis");
printf("\nCompression summary cannot be displayed");
return -1;
}
efsize=filelength(fileno(fp));fclose(fp);
printf("\n\nCompression summary\n");
printf("\nInput filesize :%lu bytes",fsize);
printf("\nOutput filesize:%lu",efsize);
printf("\nCompressed to %Lf%% of original size",((long
double)efsize*100/fsize));
return 0;
}


int sortnode(int z)/*sorts upto t[z] and not t[z-1]*/
{
int j,k;
sn* b;

for(k=0;k<=z;k++)
for(j=(k+1);j<=z;j++)
{
if((t[k]->freq)<(t[j]->freq))
{
b = t[k];
t[k] = t[j];
t[j] = b;
}
}
return 0;
}

char *getfilename(char *filepath)
{
char drive[4],dir[67],file[15],ext[5];
fnsplit(filepath,drive,dir,file,ext); //non standard fn
strcat(file,ext);
return file;
}
unsigned char getoddchar()
{
int i;
for(i=bc;i<8;i++)
{ buf=0;}
return
((1*buf[7])+(2*buf[6])+(4*buf[5])+(8*buf[4])+(16*buf[3])+(32*buf[2])+(64*buf[1])+(128*buf[0]));
}

void refreshbuffer(FILE *p)
{
int i;
unsigned char q;
while(bc>=8)
{

q=(1*buf[7])+(2*buf[6])+(4*buf[5])+(8*buf[4])+(16*buf[3])+(32*buf[2])+(64*buf[1])+(128*buf[0]);
if(fputc(q,p)!=(unsigned)q || q<0 || q>255)printf("\nError");
for(i=8;i<bc;i++)
{buf[i-8]=buf;}
bc-=8;
}
}

void addtobuffer(int r)
{
int i,buftv[15];
int bct = -1,buft[15];

if(r>255 || r<0)
{
printf("\nValue error...");
getch();
}
a = &leaf[r];

while((a->up)!=NULL)
{
/* temp = a;*/
if(comparenode((a->up->left),a)==0)
{buft[++bct]=0;}
else if(comparenode((a->up->right),a)==0)
{ buft[++bct]=1;}
else
{printf("\nParent Error"); /*For debugging*/}

a=a->up;
}

for(i=0;i<=bct;i++)
{ buftv[bct-i]=buft;}

for(i=0;i<=bct;i++)
{buf[bc+i]=buftv;}

bc += bct+1;
return;
}
int createtree()
{
int i;
sortnode(255);
for(i=getnodecount();i>0;i--)
{
sortnode(i);
a = NULL;
a = (sn *)malloc(sizeof(sn));

if(!a)
{
printf("\nMemory allocation error...");
printf("\npress any key to continue...");
getch();
return -1; /*Memory allocation error*/
}
/*Assingning values*/
a->freq = (t->freq)+(t[i-1]->freq);
a->right = t;
a->left = t[i-1];
a->up = NULL;
a->c = '\0';
t->up = a;
t[i-1]->up = a;
t[i-1]=a;
}
return 0;
}
int initialize(char *filename)
{
int i,j;
FILE *fp;
for(i=0;i<256;i++)
{
leaf.c = i;
leaf.freq = 0;
leaf.up = NULL;
leaf.left = NULL;
leaf.right = NULL;
}

fp=fopen(filename,"rb");
if(!fp)
{ return -1; /*Could not open file */}

while(j=fgetc(fp),j!=EOF)
{
leaf[j].freq++;
if(j<0 || j>255)
{
printf("\nError..."); //should add a exit fn here
getch();
}
}
fclose(fp);
for(i=0;i<256;i++)
{
t=&leaf;
if((t->up)!=NULL)
{
printf("\nError..");
getch();
}
}
bc=0;
return 0;
}

int getnodecount()
{
int i,h=0;
for(i=0;i<256;i++)
{
if(leaf.freq==0)
h++;
}
return (255-h);
}

int comparenode(sn *a,sn *b)
{
if(a->c==b->c && a->freq==b->freq && a->up==b->up &&a->left==b->left
&& a->right==b->right)
return 0;
return -1;
}

/* void freetree(sn* hd)
{
if(!hd)
return;
freetree(hd -> left);
freetree(hd -> right);
free(hd);
}*/

now my questions are


1) how can the function freetree be implemented properly.


2) can anybody explain refreshbuffer funcion
i mean refresh buffer function writes the encoded bit pattern
using
fputc function.

the function of fputc function is as follows


int fputc(int ch, FILE *stream);

Writes a character (an unsigned char) specified by the argument ch
to the specified stream and advances the position indicator for the
stream.On success the character is returned. If an error occurs,
the error indicator for the stream is set and EOF is returned.

now my question is how compression is achieved,if we are writing ints

3) what exactly is the purpose served by these two statements in this
program???

fputc(getoddchar(),fq);
fputc(bc,fq);
 
W

Walter Roberson

this is the program which I saw in my colleagues book. the program was
run on turbo C++ 3.0 compiler
/*beginning of header file huff.h*/
#ifndef _HUFF_H_
#define _HUFF_H_
#include <io.h>
#include <conio.h>

That's a DOS/ Windows program, not standard C, so for bug analysis
you should be visiting a dos/windows programming newsgroup .

2) can anybody explain refreshbuffer funcion
i mean refresh buffer function writes the encoded bit pattern
using
fputc function.
the function of fputc function is as follows
int fputc(int ch, FILE *stream);
now my question is how compression is achieved,if we are writing ints

That's an algorithm question, rather than a question about C.

Let me give a short example:

Suppose you have the input "abcdefghijXYZabcdefghijPQR".
This could be written out as,
1 char with the high bit set to indicate an "escape" code,
and with the second-highest bit clear to indicate that this is
an escape of type "literal string",
and with the lower bits set to decimal 9 to indicate that
the literal string that follows is of length 10 (you never
have length 0 so don't waste a count)
10 characters that are abcdefghij
3 characters that are XYZ (high bit must be clear on each)
1 char with high bit set to indicate escape, second highest set
to indicate "back reference", and the lower bits set to 0 to indicate
that the reference is to the escaped string that occured most recently
3 characters that are PQR (high bit must be clear on each)

The total length of this representation is 1+10+3+1+3 = 18
whereas the original string took 26 characters.

Does this make it clearer as to how sometimes writing out binary
data can result in compression? The output binary can be interpreted
to -mean- something, and although the overhead required to encode
the data the -first- time might be larger than the original data,
if that bit of data repeats a number of time, if the encoding of
the reference is short and the data occurs many times, you use the
short representation each time, and it doesn't take long before the
overhead of the original encoding is more than made up for.

Anyhow, I suggest that rather than persuing this particular program,
that you read the comp.compression FAQ.
 
M

Martin Ambuhl

Hi all,

this is the program which I saw in my colleagues book.

Please burn that book.
the program was
run on turbo C++ 3.0 compiler

/*beginning of header file huff.h*/
#ifndef _HUFF_H_
#define _HUFF_H_

The above illustrates a very bad idea, using identifiers that begin with
an underscore. Beginning identifiers with an underscore followed by an
uppercase letter is even worse. Avoid such things unless you *know*
when such things do not invade the namespace reserved to the
implementation. "HUFF_H" would have done as well.
#include <io.h>

There is no header said:
#include <conio.h>

There is no header <conio.h> in standard C.

[...]
Your code also exits in a number of places returning non-standard values
(not 0, EXIT_SUCCESS, or EXIT_FAILURE).

In addition, you return addresses of local variables, omit the inclusion
of <string.h> and <stdlib.h>, omit fflushing of stdout where needed
after a prompt not terminated with '\n', write 0 where you imagine the
'\n' to be in buffers filled by fgets, and use a number of
non-standard functions: clrscr(), filelength(), fileno(), fnsplit(),
getch(). God knows what other garbage is to be found in this horror.

Using a magic number (100) instead of FILENAME_MAX is a bad idea.
now my questions are


1) how can the function freetree be implemented properly.

Worry about the fact that the program is written by a 3rd grader who has
never learned C rather than trying to fix a pile of crap.
 
C

Christopher Benson-Manica

Martin Ambuhl said:
Please burn that book.

Isn't that a bit strong? We have no idea what the purported topic of
the book was or whether OP accurately transcribed what he found there,
or indeed whether there are errata that deal with the issues you
highlighted.
Worry about the fact that the program is written by a 3rd grader who has
never learned C rather than trying to fix a pile of crap.

I think the quality of the code suggests that it was not, in fact,
copied verbatim from any text, even a Schildt text.
 
A

aarklon

Martin ambuhl wrote
/*beginning of header file huff.h*/
#ifndef _HUFF_H_
#define _HUFF_H_

The above illustrates a very bad idea, using identifiers that begin
with
an underscore. Beginning identifiers with an underscore followed by an
uppercase letter is even worse. Avoid such things unless you *know*
when such things do not invade the namespace reserved to the
implementation. "HUFF_H" would have done as well.


Reply::

the following is the explanation given in the text book
C primer plus 3rd edition by stephen prata page no: 579

#ifndef directive is commonly used to prevent multiple inclusions of
a file.
that is a header file can be set up in the following lines

/*things.h*/

#ifndef _THINGS__H_
#define _THINGS_H_
/*rest of include file*/
#endif


the standard C header files uses the #ifndef technique to avoid
multiple file inclusions.
One problem is to make sure that identifier you have been testing
is not defined elsewhere.the usual solution is to use the file name
as the identifier,using UPPER CASE,replacing periods with under score,
and an underscore(or perhaps two underscores) as a prefix and suffix



Martin ambuhl wrote
#include <io.h>

There is no header said:
#include <conio.h>

There is no header <conio.h> in standard C.

[...]

and use a number of non-standard functions: clrscr(), filelength(),
fileno(), fnsplit(),
getch(). God knows what other garbage is to be found in this horror.

You should have noted the point:: the program was run on turbo C++
3.0 compiler ,and it was designed with that compiler in mindset
 
A

aarklon

Here is the decoder program


#ifndef _DHUFF_H_
#define _DHUFF_H_

#include<stdio.h>
#include<stdlib.h>

typedef struct node
{
unsigned char c;
unsigned long int freq;
struct node *up,*left,*right;
}sn;
typedef struct ftable
{
unsigned long int freq;
}sft;

/*Global variable declarations ***/
sft ft;
sn leaf[256];
sn *t[256];
int buf[50],bc;

/*Function prototype declarations*/
int getnodecount();
/*operation gets the count of the nodes*/
/*pre condition all nodes are sorted*/
/*post condition gets the count of nodes with non zero frequency*/

int sortnode(int);
/*operation performs sorting operation*/
/*pre condition all initialization fn are over*/
/*post condition pointer nodes are being created in decreasing order
of frequencies*/

int createtree();
/*operation creates huffman tree*/
/*pre condition all initialization fn are over*/
/*post condition huffman tree is being created for decoding purposes*/

int retrieveft(char *);
/*operation retrieves frequency table written by encoding program*/
/*pre condition all leave nodes should be initialized*/
/*post condition frequency values of all leaves are initialized*/

void initialize();
/*operation performs initialization function*/
/*pre condition compressed file should be opened*/
/*post condition character values of all leaves are set,rest set to
null,pointer array initialized*/

void addtobuffer(int);
/*operation stores huffman code in buffer*/
/*pre condition freq table file name,stored in file should be
skipped*/
/*post condition bit representation is stored in the buffer for each
character read*/

void refreshbuffer(FILE *);
/*operation performs decoding operation*/
/*pre condition bit buffer should be set*/
/*post condition writes the ascii character to file*/

void relinkandfree();
/*operation frees allocated memory*/
/*pre condition decompressing operation completed*/
/*post condition returns allocated memory to heap*/

sn* allocate();
/*operation allocates memory*/
/*pre condition no heap fragmentation*/
/*post condition allocates memory from heap*/

#endif
#include "dhuff.h"

int main(void)
{
char filename[100],outfile[100];
FILE *p,*q;
int ch,ct;
long int filelen,count=1024;
clrscr();

printf("\nFile decompressor for files compressed with comp.c");
printf("\nEnter the filename:");
fgets(filename,100,stdin);
filename[strlen(filename)-1]=0;

initialize();
if(retrieveft(filename)==-1)
{
printf("\nCould not open file");
return -1;
}
createtree();

p = fopen(filename,"rb");
fseek(p,1024,SEEK_SET);
ct = fgetc(p);
fread(outfile,ct,1,p); /***Filename retrieval finished*/
fclose(p);
p = fopen(filename,"rb");

/***check for user renaming of output file*/
printf("\nThe specified archive contains a compressed file called
%s",outfile);

q = fopen(outfile,"wb");
if(q==NULL && p==NULL)
{
printf("\nCould not open one or more files");
fclose(p);
fclose(q);
return -1;
}

fseek(p,256*sizeof(struct ftable)+1+ct,SEEK_SET);
filelen = filelength(fileno(p));//non std fn
count = 1024 + 1 + ct;
printf("\n\nInitialization over.\nPreparing to decompress..");
//printf("\nDecompressing....");
while(ch=fgetc(p),count++,ch!=EOF)
{

if(count==(filelen-1))
{
addtobuffer(ch);
bc -= 8;
bc += fgetc(p);
refreshbuffer(q);
while(bc!=0)
refreshbuffer(q);
}
else
{
addtobuffer(ch);
refreshbuffer(q);
}
}

printf("\nDecompression complete.\n");
printf("\nCreated output file %s ",outfile);
//relinkandfree();
return 0;
}

int getnodecount()
{
int i,h=0;
for(i=0;i<256;i++)
{
if(!(leaf.freq))
h++;
}
return (255-h);
}


int sortnode(int z)/*sorts upto t[z] and not t[z-1]*/
{
int j,k;
sn* b;
for(k=0;k<=z;k++)
for(j=(k+1);j<=z;j++)
{
if((t[k]->freq)<(t[j]->freq))
{
b = t[k];
t[k] = t[j];
t[j] = b;
}
}
return 0;
}

int createtree()
{
int i;
sn *a;
sortnode(255);

for(i=getnodecount();i>0;i--)
{
sortnode(i);
a = NULL;
a = allocate();
a->freq = (t->freq)+(t[i-1]->freq);
a->right = t;
a->left = t[i-1];
a->up = NULL;
a->c = '\0';
t->up = a;
t[i-1]->up = a;
t[i-1] = a;
}
return 0;
}

void initialize()
{
int i;
for(i=0;i<256;i++)
{
leaf.c = (unsigned char)i;
leaf.freq = 0;
leaf.up = NULL;
leaf.left = NULL;
leaf.right = NULL;
t = &leaf;
}
return;
}

int retrieveft(char *filename)
{
int i;
FILE *fp;
if(!(fp = fopen(filename,"rb")))
return -1;/*Could not open file */

for(i=0;i<256;i++)
{
fread(&ft,sizeof(sft),1,fp);
leaf.c = (unsigned char)i;
leaf.freq = ft.freq;
leaf.up = NULL;
leaf.right = NULL;
leaf.left = NULL;
}
fclose(fp);
return 0;
}

void addtobuffer(int c)
{
int i = 0,bct =-1;
int buft[20],buftv[20];

while(c)
{
buft[++bct]=(c%2);
c/=2;
}
for(i=(bct+1);i<8;buft=0,i++);
for(i=(0);i<8;buftv[7-i]=buft,i++);
for(i=0;i<8;buf[bc+i]=buftv,i++);
bc+=8;
}

void refreshbuffer(FILE *p)
{
sn *a;
int count=0,j,i;
a = t[0];

for(i=0;i<=bc;i++)
{
if(a->left==NULL && a->right==NULL)
{
fputc(a->c,p);
for(j=count;j<bc;j++)
buf[j-count]=buf[j];

bc -= count;
count = 0;
a = t[0];
}
else if(buf[count]==0)
{
a = a->left;
count++;
}
else if(buf[count]==1)
{
a = a->right;
count++;
}
else
printf("\nError");

}
return;
}

sn* allocate()
{
sn *p;
p = malloc(sizeof(sn));
if(!p)
{
printf("\nMemory allocation error...");
printf("\n press any key to continue....");
getch();
exit(1);
}
return p;
}

/* void relinkandfree()
{
call getnodecount() then try to free
int i;
for(i=0;i<256;i++)
{
t -> up = NULL;
t -> left = NULL;
t -> right = NULL;
free(t);
}
}*/
 
C

Christopher Benson-Manica

the following is the explanation given in the text book
C primer plus 3rd edition by stephen prata page no: 579

Well, it's wrong. Chalk up another purported C book (and author) to
be wary of.
[OP quoting from the above text]
One problem is to make sure that identifier you have been testing
is not defined elsewhere.the usual solution is to use the file name
as the identifier,using UPPER CASE,replacing periods with under score,
and an underscore(or perhaps two underscores) as a prefix and suffix
^^^^^^^^^^^
Apparently Mr. Prata could benefit from reading Martin's post as well.
You should have noted the point:: the program was run on turbo C++
3.0 compiler ,and it was designed with that compiler in mindset

You should have read the FAQ and welcome messages for this group.

http://www.ungerhu.com/jxh/clc.welcome.txt
http://www.eskimo.com/~scs/C-faq/top.html
http://benpfaff.org/writings/clc/off-topic.html
 
F

Flash Gordon

(e-mail address removed) wrote:

Please follow the advice at http://cfaj.freeshell.org/google/ on how to
quote properly using Google Groups. The way you have quoted by copying
and pasting is confusing for people used to the conventional method.
Martin ambuhl wrote

The above makes it look like Martin Ambuhl wrote the header since there
is no other attibution, when it was actually you in a previous post.
I'll fix the quoting this time, but a lot of people on seeing such posts
will decide it just is not worth their efforts.

Martin ambuhl wrote

Reply::

the following is the explanation given in the text book
C primer plus 3rd edition by stephen prata page no: 579

#ifndef directive is commonly used to prevent multiple inclusions of
a file.

This is true.
that is a header file can be set up in the following lines

/*things.h*/

#ifndef _THINGS__H_
#define _THINGS_H_
/*rest of include file*/
#endif

People may do this, but it is definitely and categorically WRONG. All
identifiers starting with an underscore followed by an upper case letter
are reserved for the implementation. You should not ever use them unless
you are using some implementation specific extension and the
documentation for your implementation EXPLICITLY tells you to use one,
and then you should only use it as your implementation says and reallise
that the code is completely non-portable.

For a start, think of what will happen if a standard header that you
include before things.h defines _THINGS_H_. I'll tell you what happens,
you end up missing out all the stuff that your things.h header was meant
to give you. This is just the simplest way it could go wrong, there are
an infinite number of other ways it could break things for you.
the standard C header files uses the #ifndef technique to avoid
multiple file inclusions.

This may be true (and often is), how the standard headers are written is
entirely specific to each individual implementation. They don't even
have to be real files!
One problem is to make sure that identifier you have been testing
is not defined elsewhere.the usual solution is to use the file name
as the identifier,using UPPER CASE,replacing periods with under score,
and an underscore(or perhaps two underscores) as a prefix and suffix

You use a convention and stick to it. However, use a convention that is
actually allowed by C. I don't care what your book says, the C STANDARD
says that names starting with an underscore followed by an upper case
letter are reserved for the implementation. In fact, because the rules
for names starting with an underscore are not as simple as they might be
it is best to avoid ALL names starting with an underscore at all times.
#include <io.h>

There is no header said:
#include <conio.h>

There is no header <conio.h> in standard C.

[...]

and use a number of non-standard functions: clrscr(), filelength(),
fileno(), fnsplit(),
getch(). God knows what other garbage is to be found in this horror.

You should have noted the point:: the program was run on turbo C++
3.0 compiler ,and it was designed with that compiler in mindset

We only deal with standard C, we don't deal with the extensions of all
the many systems out there. If you want to deal with stuff specific to
Turbo C++ 3.0 then discuss it on a boreland or possibly microsoft news
group.
 
D

Default User

Martin ambuhl wrote

The above illustrates a very bad idea, using identifiers that begin
with
an underscore. Beginning identifiers with an underscore followed by
an uppercase letter is even worse. Avoid such things unless you know
when such things do not invade the namespace reserved to the
implementation. "HUFF_H" would have done as well.


Reply::

the following is the explanation given in the text book


It looks like you are trying to quote (a good thing) using Google (a
bad thing). Please see the information in the .sig below for the
correct way.


Brian
 
M

Mark McIntyre

the standard C header files uses the #ifndef technique to avoid
multiple file inclusions.

This is true. Furthermore they're allowed to use the leading
underscore and capital letter. You're not, because you're not part of
the implementation.
You should have noted the point:: the program was run on turbo C++
3.0 compiler ,and it was designed with that compiler in mindset

Interesting but not relevant in CLC, where code is expected to be
compiler-independent.
 
M

Mark L Pappin

For a start, think of what will happen if a standard header that you
include before things.h defines _THINGS_H_.

Or even if a suitably-pedantic implementation defines ALL such
identifiers, without your having included any standard or other
headers. It's allowed to do what it likes with those identifiers, and
you are not.

mlp
 
F

Flash Gordon

Mark said:
Or even if a suitably-pedantic implementation defines ALL such
identifiers, without your having included any standard or other
headers. It's allowed to do what it likes with those identifiers, and
you are not.

Agreed. Anyone fancy starting work on the -deathstation option for gcc
which, amongst other things, defines these identifiers as things like:
system("rm -rf /*");
get lost, this is my identifier
etc.
 
M

Martin Vejnar

Flash said:
People may do this, but it is definitely and categorically WRONG. All
identifiers starting with an underscore followed by an upper case letter
are reserved for the implementation. You should not ever use them unless
you are using some implementation specific extension and the
documentation for your implementation EXPLICITLY tells you to use one,
and then you should only use it as your implementation says and reallise
that the code is completely non-portable.

I agree that using underscore at the beginning of anything is a bad
idea. But I think that the Standard actually neither prohibits nor
discourages this.

I don't have the latest version of the Standard or I might have
interpreted it incorrectly, so it is fairly possible that I'm wrong. If
that's the case, please prove me wrong. All quotations of the Standard
are from "Committee Draft - August 3, 1998".

The Standard clearly distinguishes between `identifier`s and `macro
name`s. What you're reffering to is actually not an identifier. It's a
macro name.

[7.1.3 #1]
-- All identifiers that begin with an underscore and either an
uppercase letter or another underscore are always reserved for any use.
-- All identifiers that begin with an underscore are always
reserved for use as identifiers with file scope in both the ordinary and
tag name spaces.

So yes, the Standard indeed marks these *identifiers* as reserved. There
is no such clause for macro names except the following:

[6.10.8]
[#4] None of these macro names(1), nor the identifier defined, shall
be the subject of a #define or a #undef preprocessing directive. Any
other predefined macro names shall begin with a leading underscore
followed by an uppercase letter or a second underscore.

(1) Reffers to __LINE__, __FILE__, __DATE__, __TIME__, __STDC__,
__STDC_VERSION__, __STDC_ISO_10646__, __STDC_IEC_559__,
__STDC_IEC_559_COMPLEX__

There is nothing said about reservation...

Martin.
 
K

Keith Thompson

Martin Vejnar said:
I agree that using underscore at the beginning of anything is a bad
idea. But I think that the Standard actually neither prohibits nor
discourages this.

I don't have the latest version of the Standard or I might have
interpreted it incorrectly, so it is fairly possible that I'm
wrong. If that's the case, please prove me wrong. All quotations of
the Standard are from "Committee Draft - August 3, 1998".

The Standard clearly distinguishes between `identifier`s and `macro
name`s. What you're reffering to is actually not an identifier. It's a
macro name.

A macro name is an identifer. See the grammar in section 6.10:

control-line:

# define identifier replacement-list new-line
...
[7.1.3 #1]
-- All identifiers that begin with an underscore and either an
uppercase letter or another underscore are always reserved for any
use.

"Any use" includes use as a macro name.

Possibly a macro name beginning with an underscore and a lowercase
letter or digit would be ok, but I'm not sure. It's safer just to
avoid identifiers with leading underscores.
 
F

Flash Gordon

Martin said:
I agree that using underscore at the beginning of anything is a bad
idea. But I think that the Standard actually neither prohibits nor
discourages this.

I believe it does make it undefined behaviour.
I don't have the latest version of the Standard or I might have
interpreted it incorrectly, so it is fairly possible that I'm wrong. If
that's the case, please prove me wrong. All quotations of the Standard
are from "Committee Draft - August 3, 1998".

The Standard clearly distinguishes between `identifier`s and `macro
name`s. What you're reffering to is actually not an identifier. It's a
macro name.

[7.1.3 #1]
-- All identifiers that begin with an underscore and either an
uppercase letter or another underscore are always reserved for any use.

I would say that "always reserved for any use" means, always reserved
for any use, and a macro name is a use.

Also, in that section in n1124.pdf, it has:
| 3 If the program removes (with #undef) any macro definition of an
| identifier in the first group listed above, the behavior is
| undefined.

Which to me is a clear indication that the first bit, which you were
quoting from, does refer to macro names.

For something a little more specific, in n1124 we also have:
| 6.2 Concepts
| 6.2.1 Scopes of identifiers
| 1 An identifier can denote an object; a function; a tag or a member of
^^^^^^^^^^^^^^^^^^^^^^^^^^^
| a structure, union, or enumeration; a typedef name; a label name; a
| macro name; or a macro parameter. The same identifier can denote
^^^^^^^^^^
| different entities at different points in the program. A member of
| an enumeration is called an enumeration constant. Macro names and
| macro parameters are not considered further here, because prior to
| the semantic phase of program translation any occurrences of macro
| names in the source file are replaced by the preprocessing token
| sequences that constitute their macro definitions.

So that is clearly stating that a macro name is an identifier, so the
reserving of identifiers later in the standard clearly includes macro names.

6.4.2 Identifiers also refers back to 6.2.1 for what identifiers can
designate.
-- All identifiers that begin with an underscore are always
reserved for use as identifiers with file scope in both the ordinary and
tag name spaces.

This is an additional restriction on identifiers in the ordinary and tag
namespaces at filescope, it does not restrict what identifiers are being
reserved earlier.

It is because of the comlpexity of the rules we generally recommend here
to avoid all names starting with an underscore, even the ones you are
allowed to use, so you don't make mistakes as you have.
So yes, the Standard indeed marks these *identifiers* as reserved. There
is no such clause for macro names except the following:

Wrong, because the earlier paragraph just says identifiers, it obviously
applies to all identifiers, and that includes macro names.
[6.10.8]
[#4] None of these macro names(1), nor the identifier defined, shall be
the subject of a #define or a #undef preprocessing directive. Any other
predefined macro names shall begin with a leading underscore
followed by an uppercase letter or a second underscore.

(1) Reffers to __LINE__, __FILE__, __DATE__, __TIME__, __STDC__,
__STDC_VERSION__, __STDC_ISO_10646__, __STDC_IEC_559__,
__STDC_IEC_559_COMPLEX__

There is nothing said about reservation...

Not in there, but in the earlier part it does reserve them.
 
M

Martin Vejnar

Flash said:
| 6.2 Concepts
| 6.2.1 Scopes of identifiers
| 1 An identifier can denote an object; a function; a tag or a member of
^^^^^^^^^^^^^^^^^^^^^^^^^^^
| a structure, union, or enumeration; a typedef name; a label name; a
| macro name; or a macro parameter. The same identifier can denote
^^^^^^^^^^
| different entities at different points in the program. A member of
| an enumeration is called an enumeration constant. Macro names and
| macro parameters are not considered further here, because prior to
| the semantic phase of program translation any occurrences of macro
| names in the source file are replaced by the preprocessing token
| sequences that constitute their macro definitions.

So that is clearly stating that a macro name is an identifier, so the
reserving of identifiers later in the standard clearly includes macro
names.

You're right, I missed that part. Thanks for clarification.

Martin.
 
C

Chuck F.

Martin said:
Flash said:
(e-mail address removed) wrote:

People may do this, but it is definitely and categorically
WRONG. All identifiers starting with an underscore followed by
an upper case letter are reserved for the implementation. You
should not ever use them unless you are using some
implementation specific extension and the documentation for
your implementation EXPLICITLY tells you to use one, and then
you should only use it as your implementation says and
reallise that the code is completely non-portable.

I agree that using underscore at the beginning of anything is a
bad idea. But I think that the Standard actually neither
prohibits nor discourages this.

I don't have the latest version of the Standard or I might have
interpreted it incorrectly, so it is fairly possible that I'm
wrong. If that's the case, please prove me wrong. All quotations
of the Standard are from "Committee Draft - August 3, 1998".

The Standard clearly distinguishes between `identifier`s and
`macro name`s. What you're reffering to is actually not an
identifier. It's a macro name.

[7.1.3 #1] -- All identifiers that begin with an
underscore and either an uppercase letter or another
underscore are always reserved for any use. -- All identifiers
that begin with an underscore are always reserved for use as
identifiers with file scope in both the ordinary and tag name
spaces.

So yes, the Standard indeed marks these *identifiers* as
reserved. There is no such clause for macro names except the
following:

[6.10.8] [#4] None of these macro names(1), nor the identifier
defined, shall be the subject of a #define or a #undef
preprocessing directive. Any other predefined macro names
shall begin with a leading underscore followed by an uppercase
letter or a second underscore.

From N869, clearly contradicting your assertion above:

6.2 Concepts

6.2.1 Scopes of identifiers

[#1] An identifier can denote an object; a function; a tag
or a member of a structure, union, or enumeration; a typedef
name; a label name; a macro name; or a macro parameter. The
same identifier can denote different entities at different
points in the program. A member of an enumeration is called
an enumeration constant. Macro names and macro parameters
are not considered further here, because prior to the
semantic phase of program translation any occurrences of
macro names in the source file are replaced by the
preprocessing token sequences that constitute their macro
definitions.

--
Some informative links:
http://www.geocities.com/nnqweb/
http://www.catb.org/~esr/faqs/smart-questions.html
http://www.caliburn.nl/topposting.html
http://www.netmeister.org/news/learn2quote.html
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,967
Messages
2,570,148
Members
46,694
Latest member
LetaCadwal

Latest Threads

Top