Add 1 more variable to print

E

ela

I tried to add one more variable to print by using our previous programmer's
program but failed. I tried using pointer variable but also failed. the
compilable codes are as follows and

lines following the comment
"/////////////// I want to print MCC so I add this variable here"

were added by me.

Sorry but unable to provide the ana files to test but I wish this problem
can still be solved by somebody.


#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <assert.h>
#include <map>
#include <vector>
#include <set>
using namespace std;

const char* anaFilename = "%d.txt";

char buffer[65536];
char buffer2[65536];
void wrongArgv(char **argv){
fprintf(stderr,
"Usage: %s [select how many] [total # model] \n",\
*argv);
exit(1);
}

int main(int argc, char **argv){
map<char*, vector<double> > tptable;
if(argc != 3)
wrongArgv(argv);
int select = atoi(argv[1]);
int all = atoi(argv[2]);
if(select<=0 || all<=0 || select > all)
wrongArgv(argv);
// use map for priority queue
map<double, int> mcc; //matthews correlation coefficient
for(int i=0; i<all; ++i){
sprintf(buffer,anaFilename, i);
FILE *ana = fopen(buffer, "r");
for(int j=1; j<=17; ++j)
fgets(buffer, sizeof(buffer), ana);
double TP, TN, FP, FN, MCC;
{
int zero, one;
fgets(buffer, sizeof(buffer), ana);
assert(3==sscanf(buffer," %d %lf %lf",&zero, &TN, &FP));
assert(0==zero);
fgets(buffer, sizeof(buffer), ana);
assert(3==sscanf(buffer," %d %lf %lf",&one, &FN, &TP));
assert(1==one);

/////////////// I want to print MCC so I add this variable here
MCC = (TP*TN-FP*FN)/ sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN));



tptable["TP"].push_back(TP);
tptable["TN"].push_back(TN);
tptable["FP"].push_back(FP);
tptable["FN"].push_back(FN);



/////////////// I want to print MCC so I add this variable here
tptable["MCC"].push_back(MCC);
}
assert(mcc.find(MCC)==mcc.end());
mcc[MCC] = i;
fclose(ana);
}
set<int> top;
map<double,int>::reverse_iterator riter=mcc.rbegin();
for(int i=0;
i<select;
++i, ++riter){
top.insert((*riter).second);
}
{
FILE *file = fopen("Eval.txt","w");
fprintf(file,"PPV\tNPV\tSP\tSE\tMCC\n");
for(int i=0; i<all; ++i){
double tp = tptable["TP"];
double fp = tptable["FP"];
double tn = tptable["TN"];
double fn = tptable["FN"];

/////////////// I want to print MCC so I add this variable here
double tpmcc = tptable["MCC"];



fprintf(file,"%1.3f \t%1.3f\t%1.3f\t%1.3f\n",ppv,npv,sp,se,tpmcc);
}
freopen("SEL.txt","w",file);
fprintf(file,"%d",*top.begin());
for(set<int>::iterator it = ++top.begin();
it!=top.end();
++it){
fprintf(file," %d",*it);
}
fclose(file);
}
return 0;
}
 
A

Alf P. Steinbach

* ela:
I tried to add one more variable to print by using our previous programmer's
program but failed. I tried using pointer variable but also failed. the
compilable codes are as follows and

lines following the comment
"/////////////// I want to print MCC so I add this variable here"

were added by me.

Please check the FAQ on how to post a question about Code That Does Not Work.

Sorry but unable to provide the ana files to test but I wish this problem
can still be solved by somebody.

What *is* "this problem"?

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <assert.h>
#include <map>
#include <vector>
#include <set>
using namespace std;

const char* anaFilename = "%d.txt";

Bad idea.

char buffer[65536];
char buffer2[65536];

Bad idea.

void wrongArgv(char **argv){
fprintf(stderr,
"Usage: %s [select how many] [total # model] \n",\
*argv);

Bad idea.
exit(1);
}

int main(int argc, char **argv){
map<char*, vector<double> > tptable;

Bad idea.

if(argc != 3)
wrongArgv(argv);
int select = atoi(argv[1]);
int all = atoi(argv[2]);
if(select<=0 || all<=0 || select > all)
wrongArgv(argv);
// use map for priority queue
map<double, int> mcc; //matthews correlation coefficient

Bad idea.

for(int i=0; i<all; ++i){
sprintf(buffer,anaFilename, i);
Unnecessary.


FILE *ana = fopen(buffer, "r");
for(int j=1; j<=17; ++j)
fgets(buffer, sizeof(buffer), ana);


double TP, TN, FP, FN, MCC;

Bad idea.

{
int zero, one;
fgets(buffer, sizeof(buffer), ana);
assert(3==sscanf(buffer," %d %lf %lf",&zero, &TN, &FP));
assert(0==zero);
fgets(buffer, sizeof(buffer), ana);
assert(3==sscanf(buffer," %d %lf %lf",&one, &FN, &TP));
assert(1==one);

/////////////// I want to print MCC so I add this variable here
MCC = (TP*TN-FP*FN)/ sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN));



tptable["TP"].push_back(TP);
tptable["TN"].push_back(TN);
tptable["FP"].push_back(FP);
tptable["FN"].push_back(FN);

Bad idea.

/////////////// I want to print MCC so I add this variable here
tptable["MCC"].push_back(MCC);
}
assert(mcc.find(MCC)==mcc.end());
Unnecessary.


mcc[MCC] = i;

Bad idea.

fclose(ana);
}
set<int> top;
map<double,int>::reverse_iterator riter=mcc.rbegin();

Bad idea.

for(int i=0;
i<select;
++i, ++riter){
top.insert((*riter).second);
}
{
FILE *file = fopen("Eval.txt","w");
fprintf(file,"PPV\tNPV\tSP\tSE\tMCC\n");
for(int i=0; i<all; ++i){
double tp = tptable["TP"];
double fp = tptable["FP"];
double tn = tptable["TN"];
double fn = tptable["FN"];


Bad idea.

/////////////// I want to print MCC so I add this variable here
double tpmcc = tptable["MCC"];



fprintf(file,"%1.3f \t%1.3f\t%1.3f\t%1.3f\n",ppv,npv,sp,se,tpmcc);


I count four % and five args.

}
freopen("SEL.txt","w",file);
fprintf(file,"%d",*top.begin());
for(set<int>::iterator it = ++top.begin();
it!=top.end();
++it){
fprintf(file," %d",*it);
}
fclose(file);
}
return 0;
}

This code is horrible.

But presumably your problem is identified by the last of my in-code comments.


Cheers & hth.,

- Alf
 
J

Joe Smith

Alf P. Steinbach said:
* ela:
void wrongArgv(char **argv){
fprintf(stderr,
"Usage: %s [select how many] [total # model] \n",\
*argv);

Bad idea.

What in particular do you take issue with there?

I see a few minor sytlistic things, but nothing too major.
First of all, streams should probably have been used here, rather than the
old printf-style function. I would have specified argv[0], rather than using
*argv.

But that is all that I see as too terrible. Minor issues are that path
components may end up in the printed file name, and that there is also no
guarantee that the all platforms would use argv[0] to store the executable
name.

The location where the function is called does not ensure that (argvc > 0),
and the phrasing "select how many" seems rather awful.
 
A

Alf P. Steinbach

* Joe Smith:
Alf P. Steinbach said:
* ela:
void wrongArgv(char **argv){
fprintf(stderr,
"Usage: %s [select how many] [total # model] \n",\
*argv);

Bad idea.

What in particular do you take issue with there?

At the technical level (which was only what I was commenting on) mostly

* the formal argument type,

* the assumption that argv[0] will be the name of the program name, and

* the line continuation, which is just plain silly.

Regarding the program name assumption, it may be valid in *nix, but not formally
(formally it may an empty string there), and in practice it's not valid in
Windows, due to C++ runtime libraries for Windows getting that part wrong:


<code>
#include <iostream>

int main( int argc, char* argv[] )
{
std::cout << "Usage: " << argv[0] << " FILENAME" << std::endl;
}
</code>


<example>
C:\temp> md "gah! urgh"

C:\temp> gnuc x.cpp -o "gah! urgh"\teach

C:\temp> c:\temp\gah!^ urgh\teach
Usage: c:\temp\gah! FILENAME

C:\temp> _
</example>


At the design level, of course, some better message would be desirable...

I see a few minor sytlistic things, but nothing too major.
First of all, streams should probably have been used here, rather than
the old printf-style function. I would have specified argv[0], rather
than using *argv.

I'm not sure any of those two would have been an improvement.

Using iostreams may increase the size of a little program such as this some
orders of magnitude. Hm. I hate iostreams.

And I don't think anyone should deal with code using argv without understanding
pointer dereferencing and indexing, where it's just a matter of taste.

But that is all that I see as too terrible. Minor issues are that path
components may end up in the printed file name, and that there is also
no guarantee that the all platforms would use argv[0] to store the
executable name.

The location where the function is called does not ensure that (argvc >
0), and the phrasing "select how many" seems rather awful.

Yeah.


Cheers,

- Alf
 
J

James Kanze

At the technical level (which was only what I was commenting on) mostly [...]
* the assumption that argv[0] will be the name of the
program name, and
Regarding the program name assumption, it may be valid in
*nix, but not formally (formally it may an empty string
there), and in practice it's not valid in Windows, due to C++
runtime libraries for Windows getting that part wrong:

In which way? As far as I know, the situation in Windows is
identical to that of Unix. And it's not guaranteed; strictly
speaking, Unix doesn't provide the name of the program, so
there's no way an implementation could get it correct.
According to Posix, argv[0] is whatever the program which
started your program wants it to be---the usual shells *do* put
something related to the name of the program here, but other
programs may not. (There's also the question what is actually
required if a value is furnished: the C standard says "the name
of the program", the C++ "the name used to invoke the program".
Neither of which necessarily means anything, at least in certain
contexts.)

Windows, of course, has a GetCommandLine function, which Unix
doesn't, but this still leaves the question open: what does it
return if the process wasn't invoked from the command line?
(What does it return, for example, if the lpCommandLine argument
of CreateProcess was NULL?)

Also, argv[0] may be NULL (if argc is 0).

(All this to say that there's not much you can *portably* count
on. Depending on the program and the environment from which it
is started, of course, you can often obtain useful information
from it, of course, but it's not really portable.)
<code>
#include <iostream>
int main( int argc, char* argv[] )
{
std::cout << "Usage: " << argv[0] << " FILENAME" << std::endl;}

<example>
C:\temp> md "gah! urgh"
C:\temp> gnuc x.cpp -o "gah! urgh"\teach
C:\temp> c:\temp\gah!^ urgh\teach
Usage: c:\temp\gah! FILENAME
C:\temp> _
</example>
At the design level, of course, some better message would be
desirable...

:). Of course, you have to deal with the information available
to you. Under Unix, I trim everything up to and including the
last '/', and hope for the best.
I see a few minor sytlistic things, but nothing too major.
First of all, streams should probably have been used here,
rather than the old printf-style function. I would have
specified argv[0], rather than using *argv.
I'm not sure any of those two would have been an improvement.

Both are more idiomatic. And ostream actually works, despite
its flaws, and isn't an error waiting to be triggered.
Using iostreams may increase the size of a little program such
as this some orders of magnitude. Hm. I hate iostreams.

And it may not. It tends to increase the size (and the compile
times) on some systems because it is a template---this is an
obvious design error introduced by the standards committee. But
in this case, the apparent increase in the size of the
executable is probably due to the fact that you can't
dynamically link a template (at least in most implementations).

Dietmar Kuehl once wrote an implementation of iostream that
didn't suffer from these problems---the standard certainly
allows an explicit specialization for char and wchar_t, which
could be placed in a shared library. But no one seems to have
been interested---he made the code freely available, but none of
the implementations I know took advantage of it. Which leads me
to conclude that there aren't any problems in practice with the
current implementations of iostream, at least with regards to
size or performance. To tell the truth, that surprises me, but
you can't argue with the facts.
And I don't think anyone should deal with code using argv
without understanding pointer dereferencing and indexing,
where it's just a matter of taste.

The use of argv is, in the end, more or less conventional. You
follow the conventions, and don't worry about how it is
implemented. In practice, there's certainly no need to think of
it as a pointer. (On the other hand, I'd seriously suggest that
the first thing one do is copy it into a vector<string>. But I
don't think that that's "existing practice".)
 
J

Juha Nieminen

Alf said:
Using iostreams may increase the size of a little program such as this
some orders of magnitude. Hm. I hate iostreams.

Really? Maybe in the early 90's, not today.

//------------------------------------
#include <cstdio>

int main()
{
std::printf("Hello.\n");
}
//------------------------------------

In my Linux box, size of executable: 6068 bytes.

//------------------------------------
#include <iostream>

int main()
{
std::cout << "Hello.\n";
}
//------------------------------------

Size of executable: 6080 bytes.

I wouldn't call an increase of 12 bytes "some orders of magnitude".
 
A

Alf P. Steinbach

* Juha Nieminen:
Really? Maybe in the early 90's, not today.

I'm sorry, that is incorrect.

//------------------------------------
#include <cstdio>

int main()
{
std::printf("Hello.\n");
}
//------------------------------------

In my Linux box, size of executable: 6068 bytes.

//------------------------------------
#include <iostream>

int main()
{
std::cout << "Hello.\n";
}
//------------------------------------

Size of executable: 6080 bytes.

I wouldn't call an increase of 12 bytes "some orders of magnitude".


<your example code>
C:\temp> gnuc --version | find "g++"
g++ (GCC) 3.4.5 (mingw-vista special r3)

C:\temp> gnuc x.cpp -s -o x & for %i in (x.exe) do @echo %~zi
5632

C:\temp> gnuc y.cpp -s -o x & for %i in (y.exe) do @echo %~zi
98304

C:\temp> _
</your example code>


Which executable size increase is a little over 1 decimal order of magnitude,
and a little over 4 binary orders of magnitude.


Cheers & hth.,

- Alf
 
J

Juha Nieminen

Alf said:
g++ (GCC) 3.4.5 (mingw-vista special r3)

So because a compiler for a system it was not really never designed
for is unable to use dynamically linked libraries properly in that
system, you hate iostreams?

Yes, it makes a lot of sense. iostreams are clearly the culprit here.
 
J

James Kanze

So because a compiler for a system it was not really never
designed for is unable to use dynamically linked libraries
properly in that system, you hate iostreams?
Yes, it makes a lot of sense. iostreams are clearly the
culprit here.

Well, if that's the only argument against them, I guess he has
to use it:). (I can actually think of a few better ones. I
rather like iostream, but it's not perfect.)

With regards to the size issue, of course, it depends on the
implementation. There are two reasons why I would expect the
executable size to be larger using iostream:

-- It's templated, and it is more difficult to use dynamic
linking where templates are involved. Of course, a good
implementation will have explicit specializations for char
and wchar_t, so this problem won't affect them. But Sun CC
doesn't, and the size of the executable *is* ten times
larger. (Sun CC's implementation of iostream is
particularly bad. Almost a model of how not to implement
it.)

-- It's part of the C++ standard library, not the C standard
library. On a lot (most?, all?) systems, the C stnadard
library is a bundled dynamic object---it's present on all
systems, regardless of whether a C compiler is installed or
not. And it usually contains the system API as well, so you
definitely want to link it dynamically. The C++ standard
library, on the other hand, is rarely present unless you
have a C++ compiler installed, and any version specific
behavior will depend on the version of the compiler. So in
no case do you want to link it dynamically---you want your
code to run even on systems where it isn't installed, and
you want to be sure you get the version which corresponds to
your compiler, and not some other version.

-- At least where I work, most programs are a little bit bigger
than hello, world. (Say, about a million times bigger, in
terms of number of lines.) The difference in size is
actually an arithmetic constant. So even if I take Sun CC
(where hello, world is 61412 with printf, 501320 with
cout---the worst example I could find), what it really means
isn't that iostream makes the program close to 10 times
larger; what it means is that iostream adds about 450KB to
your program. If I take the main application I'm working
on, it's over 78 MB---a half a meg more or less isn't
important. And of course, this is an extreme case. For
most implementations, the differences are a lot less.

FWIW: I find a similar ratio in size difference between
std::vector and malloc/free, probably for the same reasons
(templates, C++ library vs. C library). Arguing against
iostream on these grounds is the same as arguing against vector,
rather than malloc/free. (Note that the argument is iostream
vs. printf. I'm sure that it's possible to design a solution
which is better than either of them, but printf is about the
worst IO system ever invented, and using it instead of iostream
is about the same as using malloc/free instead of vector.)
 
A

Alf P. Steinbach

* Juha Nieminen:
So because a compiler for a system it was not really never designed
for is unable to use dynamically linked libraries properly

I'm sorry, both of your suppositions here are incorrect.

James Kanze has some more informative argumentation else-thread.

in that
system, you hate iostreams?

I'm sorry, since the question builds on at least two incorrect suppositions
there is no meaningful way to answer it.

Yes, it makes a lot of sense. iostreams are clearly the culprit here.

Yes, for that program.


Cheers, & hth.,

- Alf
 
W

woodbrian77

Well, if that's the only argument against them, I guess he has
to use it:).  (I can actually think of a few better ones.  I
rather like iostream, but it's not perfect.)

With regards to the size issue, of course, it depends on the
implementation.  There are two reasons why I would expect the
executable size to be larger using iostream:

 -- It's templated, and it is more difficult to use dynamic
    linking where templates are involved.  Of course, a good
    implementation will have explicit specializations for char
    and wchar_t, so this problem won't affect them.  But Sun CC
    doesn't, and the size of the executable *is* ten times
    larger.  (Sun CC's implementation of iostream is
    particularly bad.  Almost a model of how not to implement
    it.)

 -- It's part of the C++ standard library, not the C standard
    library.  On a lot (most?, all?) systems, the C stnadard
    library is a bundled dynamic object---it's present on all
    systems, regardless of whether a C compiler is installed or
    not.  And it usually contains the system API as well, so you
    definitely want to link it dynamically.  The C++ standard
    library, on the other hand, is rarely present unless you
    have a C++ compiler installed, and any version specific
    behavior will depend on the version of the compiler.  So in
    no case do you want to link it dynamically---you want your
    code to run even on systems where it isn't installed, and
    you want to be sure you get the version which corresponds to
    your compiler, and not some other version.

 -- At least where I work, most programs are a little bit bigger
    than hello, world.  (Say, about a million times bigger, in
    terms of number of lines.)  The difference in size is
    actually an arithmetic constant.  So even if I take Sun CC
    (where hello, world is 61412 with printf, 501320 with
    cout---the worst example I could find), what it really means
    isn't that iostream makes the program close to 10 times
    larger; what it means is that iostream adds about 450KB to
    your program.  If I take the main application I'm working
    on, it's over 78 MB---a half a meg more or less isn't
    important.  

I agree with your analysis, but don't go along with the
conclusion. Half a meg is too much. It's difficult to produce
good results if the tools bring along layers of fat like that.
And of course, this is an extreme case.  For
    most implementations, the differences are a lot less.

FWIW: I find a similar ratio in size difference between
std::vector and malloc/free, probably for the same reasons
(templates, C++ library vs. C library).  Arguing against
iostream on these grounds is the same as arguing against vector,
rather than malloc/free.  (Note that the argument is iostream
vs. printf.  I'm sure that it's possible to design a solution
which is better than either of them, but printf is about the
worst IO system ever invented, and using it instead of iostream
is about the same as using malloc/free instead of vector.)

I disagree with your analogy. I think it makes sense to use
vector rather than malloc/free, but that iostreams were one
step forward and one step backward. At this point I think it's
best to admit that iostreams is dead in the water. Pardon the
pun.


Brian Wood
Ebenezer Enterprises
www.webEbenezer.net

I recommend the articles on social topics by Professor Katz --
"http://wuphys.wustl.edu/~katz
Even though the Iranian president is deeply anti-semitic
Professor Katz writes about building earthquake safe housing
that could be used to help the people of Iran survive
earthquakes in that mountainous part of the world.
 
J

joshuamaurice

I disagree with your analogy.  I think it makes sense to use
vector rather than malloc/free, but that iostreams were one
step forward and one step backward.  At this point I think it's
best to admit that iostreams is dead in the water.  Pardon the
pun.

iostreams are one step forward for introducing type safety to io.
Thank iostreams for that.

I don't know what your problem with iostreams is, but they're dead
IMHO a simple reason: the C++ standards committee tried to do
internationalization in the standard libraries, but
internationalization and Unicode support is hard, and they did it
badly. Too much implementation defined (locale names, size of wchar_t,
which locales are present, etc.), no support for variable width
encodings (UTF-8 and UTF-16 included), a convoluted interface
(iostream and strbufs), etc. I much prefer the Java way of base stream
classes, like StringWriter and FileOutputStream, and stream wrappers,
like OutputStreamWriter. Properly done, it would be about as efficient
as iostreams are now; both involve virtual function calls.
 
J

James Kanze

[...]
I agree with your analysis, but don't go along with the
conclusion. Half a meg is too much. It's difficult to
produce good results if the tools bring along layers of fat
like that.

Half a meg is an extreme case. Only Sun CC is anywhere close to
that. And with Sun CC, the half a meg is less a problem than
the performance problems---the iostream implementation is
really, really bad. The fact that the standard version of the
library is close to 100 times slower than the pre-standard one
is a far greater problem than the fact that it results in a half
a mega more in the size of the executable.

On the same platform, g++ doesn't have these problems. Nor does
the alternative library provided by Sun, the STLport, although
it has other problems.
I disagree with your analogy. I think it makes sense to use
vector rather than malloc/free, but that iostreams were one
step forward and one step backward. At this point I think
it's best to admit that iostreams is dead in the water.
Pardon the pun.

I agree that the standard committee made them worse, that the
pre-standard iostream was considerably better. But what else do
you use? printf and company are probably the worst designed IO
system which has ever existed. And the basic principles which
made the pre-standard iostream so beautiful are still present:
sinking and sourcing remains separate from formating and
parsing, both can easily be customized for user defined types
(or sinks and sources), and you can still use logical markup,
rather than physical. From a user point of view, for serial
text input and output, you can almost ignore all the junk the
committee added. Provided you have a reasonably good
implementation, and not the one provided with Sun CC.
 
J

James Kanze

iostreams are one step forward for introducing type safety to
io. Thank iostreams for that.

FWIW: C is, I think, the only language which has ever provided
IO that wasn't type safe. It's (another) major mistake in C.
In that regards, iostreams only bring C++ up to the level of
other languages. Iostreams have other very important
characteristics, which make them in fact superiour to the IO in
just about every other language I know: they cleanly separate
the sinking and sourcing from the formatting and parsing, and
allow customization (e.g. for user defined types or for user
defined sinks and sources) at both levels; they allow user
defined, application specific formatting specifications, without
the physical formatting being embedded in the output statement.

The major problem with iostreams is that people don't know them.
Another minor problem is poor error reporting.
I don't know what your problem with iostreams is, but they're
dead

I don't know why people say they're dead. I've never seen a C++
program that didn't use them.
IMHO a simple reason: the C++ standards committee tried to do
internationalization in the standard libraries, but
internationalization and Unicode support is hard, and they did
it badly. Too much implementation defined (locale names, size
of wchar_t, which locales are present, etc.), no support for
variable width encodings (UTF-8 and UTF-16 included), a
convoluted interface (iostream and strbufs), etc. I much
prefer the Java way of base stream classes, like StringWriter
and FileOutputStream, and stream wrappers, like
OutputStreamWriter. Properly done, it would be about as
efficient as iostreams are now; both involve virtual function
calls.

Actually, it's not that different from iostreams. Although they
did get it right that the code conversion should be a filtering
streambuf. (One could also argue that the same thing holds for
buffering. But performance arguments play a role here---in
Java, if you're reading character by character, you get a
virtual function call per character. In C++, you only get a
virtual call if the buffer is empty; otherwise, the fucntion is
fully inlilned. I don't know whether this makes a difference
today, but it certainly did back when C++ was young.)
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,995
Messages
2,570,230
Members
46,819
Latest member
masterdaster

Latest Threads

Top