~700ms was good enough to test it against D.
Well, ignore the old verions!!!
I have a new java verion that is much faster than previous verion!
My old verion with 40 meg file
C:\>java -server WordCount bible2.txt>log.txt
Time: 4797 ms
My new version with 40 meg file
C:\>java -server WordCount2 bible2.txt>log.txt
Time: 3125 ms
The C++ verion with 40 meg bible2.txt
C:\>wc1 bible2.txt>log.txt
Time: 5390 ms
Pardon me while I laugh
)
Ha ha ha ha ha
The new verion below
-----
Also, if the folliwng doesn't work
source can be found here too
http://www.pastebin.ca/963017
//counts the words in a text file...
//combined effort: wlfshmn from #java on IRC
//Undernet and Razii
import java.io.*;
import java.util.*;
public final class WordCount2
{
private static final Map<String, int[]> dictionary =
new HashMap<String, int[]>(800000);
private static int tWords = 0;
private static int tLines = 0;
private static long tBytes = 0;
public static void main(final String[] args) throws Exception
{
System.out.println("Lines\tWords\tBytes\tFile\n");
//TIME STARTS HERE final
long start = System.currentTimeMillis();
for (String arg : args)
{
File file = new File(arg);
if (!file.isFile())
{
continue;
}
int numLines = 0;
int numWords = 0;
long numBytes = file.length();
BufferedReader input = new BufferedReader(new
InputStreamReader(new FileInputStream(arg),
"ISO-8859-1"));
StreamTokenizer st = new StreamTokenizer(input);
st.ordinaryChar('/'); st.ordinaryChar('.');
st.ordinaryChar('-'); st.ordinaryChar('"');
st.ordinaryChar('\''); st.eolIsSignificant(true);
while (st.nextToken() != StreamTokenizer.TT_EOF)
{
if (st.ttype == StreamTokenizer.TT_EOL)
{
numLines++;
}
else if (st.ttype == StreamTokenizer.TT_WORD)
{
numWords++;
int[] count = dictionary.get(st.sval);
if (count != null)
{ count[0]++;}
else
{ dictionary.put(st.sval, new int[]{1});}
}
}
System.out.println( numLines + "\t" + numWords + "\t" + numBytes +
"\t" + arg);
tLines += numLines;
tWords += numWords;
tBytes += numBytes;
}
//only converting it to TreepMap so the result
//appear ordered, I could have
//moved this part down to printing phase
//(i.e. not include it in time).
TreeMap<String, int[] > sort = new TreeMap<String, int[]>
(dictionary);
//TIME ENDS HERE final
long end = System.currentTimeMillis();
System.out.println("---------------------------------------");
if (args.length > 1)
{
System.out.println(tLines + "\t" + tWords + "\t" + tBytes +
"\tTotal");
System.out.println("---------------------------------------");
}
for (Map.Entry<String, int[]> pairs : sort.entrySet())
{
System.out.println(pairs.getValue()[0] + "\t" + pairs.getKey());
}
System.out.println("Time: " + (end - start) + " ms");
}
}