Peter said:
Dear news group,
I have created a small programming challenge for those of you who are
interested in challenging your Standard C++ programming skills. The
challenge is about counting character frequency in large texts,
perhaps useful for spam filtering or classical crypto analysis. You
can read more about it here:
http://blog.p-jansson.com/2009/06/programming-challenge-letter-frequency.html
With kind regards,
Peter Jansson
This is my final, corrected ISO C++ version. Please erase all my previous submissions from your site, and
include only this:
#include <valarray>
#include <fstream>
#include <cstdlib>
#include <cstdio>
#include <iostream>
#include <string>
#include <cctype>
#include <ctime>
int main(int argc, char **argv)
{
using namespace std;
// Warning: long double has problems with MINGW compiler for Windows.
//
// The C++ basic character set is using the value range [0, 127].
// If we used vector<long double>, it would not have any run-time difference in any modern compiler.
valarray<long double> characterFrequencies(128);
// The array where the read characters will be stored.
char buffer[4* BUFSIZ]= {};
// If argc!= 2, then either the number of arguments is not correct, or the platform does not
// support arguments.
if(argc!= 2)
{
cerr<< "\nUsage: "<< argv[0]<< " fileNameToRead\n\n";
return EXIT_FAILURE;
}
// We disable synchronisation with stdio, to speed up C++ standard I/O.
ios_base::sync_with_stdio(false);
cout<< fixed;
string characters= "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
clock_t time1, time2;
// We start timing.
time1= clock();
// We open the file
ifstream inputFile(argv[argc -1]);
// An error happened
if(not inputFile)
{
cerr<< "\nCould not open file for reading, exiting...\n\n";
return EXIT_FAILURE;
}
do
{
inputFile.read(buffer, sizeof(buffer));
for(streamsize i= 0; i< inputFile.gcount(); ++i)
++characterFrequencies[ buffer
];
}while(inputFile);
// Since rule 1 is: "Your program should be case insensitive when it counts letters",
// we add the results of lowercase characters and their equivallent uppercase letters together.
cout<< "\n\n\nThe letter frequencies are:\n";
long double totalcharacterFrequencies= 0;
for(string::size_type i= 0; i< characters.size(); ++i)
totalcharacterFrequencies+= characterFrequencies[ characters ]+ characterFrequencies[
tolower(characters) ];
for(string::size_type i= 0; i< characters.size(); ++i)
cout<< characters<< ": "<< (characterFrequencies[ characters ]+ characterFrequencies[
tolower(characters) ])/ totalcharacterFrequencies* 100<< "%\n";
// We "stop" timing.
time2= clock();
// We convert the timing to seconds.
double totalTimeInSeconds= static_cast<double>(time2- time1)/ CLOCKS_PER_SEC;
cout<<"\n\nThe whole process took "<< totalTimeInSeconds<< " seconds.\n";
cout<<"\n\nHave a nice day!\n";
}
--
Ioannis A. Vranos
C95 / C++03 Developer
http://www.cpp-software.net