G
George Mpouras
Create a test file with 20000000 same lines of 50 commas (it will be
1020000000 bytes)
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Now I have a perl and C program running almost the same code. Perl needs
about 6 minutes to finish while the C version finishes at 20 seconds.
The difference is huge. What can I do for a faster perl version
Following the two programs
--== C ==--
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
int main(void) {
char *line = NULL;
char *p = NULL;
int NC = 1;
int n = 0;
size_t len = 0;
ssize_t read;
while (getline(&line, &len, stdin) != -1) {
n = 1;
for (p = line; *p != '\0'; p++) {
if (*p == ',') { n++; }
else if (*p == '\\') { p++; }
}
if (n > NC) {
NC = n;
}
}
if (line) free(line);
printf("%d\n", NC);
return EXIT_SUCCESS;
}
--== Perl ==--
#!/usr/bin/perl
my $NC = 1;
open DATA, '<', '/work/test.txt' or die "$^E\n";
while (<DATA>)
{
my ($i, $n, $Length) = (0, 1, length $_);
while ( $i++ < $Length )
{
my $c = substr $_, $i, 1;
if ( $c eq ',' ) { $n++ } elsif ($c eq '\\') {$i++}
}
$NC = $n if $n > $NC
}
close DATA;
print "Fields $NC\n"
1020000000 bytes)
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Now I have a perl and C program running almost the same code. Perl needs
about 6 minutes to finish while the C version finishes at 20 seconds.
The difference is huge. What can I do for a faster perl version
Following the two programs
--== C ==--
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
int main(void) {
char *line = NULL;
char *p = NULL;
int NC = 1;
int n = 0;
size_t len = 0;
ssize_t read;
while (getline(&line, &len, stdin) != -1) {
n = 1;
for (p = line; *p != '\0'; p++) {
if (*p == ',') { n++; }
else if (*p == '\\') { p++; }
}
if (n > NC) {
NC = n;
}
}
if (line) free(line);
printf("%d\n", NC);
return EXIT_SUCCESS;
}
--== Perl ==--
#!/usr/bin/perl
my $NC = 1;
open DATA, '<', '/work/test.txt' or die "$^E\n";
while (<DATA>)
{
my ($i, $n, $Length) = (0, 1, length $_);
while ( $i++ < $Length )
{
my $c = substr $_, $i, 1;
if ( $c eq ',' ) { $n++ } elsif ($c eq '\\') {$i++}
}
$NC = $n if $n > $NC
}
close DATA;
print "Fields $NC\n"