S
Skeleton Man
Hi,
I have the following script (see below) which queries various sites for
statistics about a list of domains. The problem is that 1 in 3 attempts to
run it result in "Segmentation Fault". Sometimes it will work fine 10 times
in a row, other times it will segfault 3 times in a row before it works.
How can I tell what's causing the segfault ? It doesn't happen all the time,
but when it does I'm pretty sure it's $thr->join; that does it. (if I
uncomment the print statements, it will say "Waiting for thread number 1 to
join" followed immediately by "Segmentation Fault"..
Chris
#!/usr/bin/t-perl -W
use strict;
use threads;
use Thread::Queue;
use LWP::UserAgent;
my $dls = './domains.txt'; # List of domain names
my $out = './results.csv'; # Where to output results to
my @nme = ('Yahoo','Alexa','Overture');
my @url =
('http://search.yahoo.com/search?p=link:http://domain.com&ei=UTF-8&f
r=FP-tab-web-t&x=wrt',
'http://www.alexa.com/data/ds/linksin?q=link:domain.com&url=',
'http://inventory.overture.com/d/searchinventory/suggestion/?mkt=us&lang=en_
US&term=');
my @rgx = (qr/of about ((\d|,)+)/,
qr/Sites \s+1\s+-\s+\d+\s+of\s+((\d|,)+)/i,
qr/color=E8E8E8> (\d+)/);
my $t1 = time();
open (FILE, "<$dls") || die "Unable to open $dls: $!";
my @domains = <FILE>;
close (FILE);
my @stream;
my $cthreads;
# Start 5 new threads per search engine:
for (my $i=0;$i<=$#url;$i++){
$stream[$i] = new Thread::Queue;
for (0..4){
my $tid = $i*$_;
my $qnm = "q".($i+1);
$cthreads->[$tid] = threads->new(\&fetchStats, $stream[$i], $url[$i],
$rgx[$i], $nme[$i],$qnm);
#print "Thread number ".($tid + 1)." created.\n";
}
}
# Add our domains to the queue:
foreach my $dom (@domains){
chomp($dom);
#print "Domain '$dom' added to the queue.\n";
foreach my $str (@stream){ $str->enqueue($dom); }
}
# Join all our threads (except ourselves):
foreach my $thr (threads->list) {
if ($thr->tid && !threads::equal($thr, threads->self) ){
#print "Waiting for thread number ".$thr->tid." to join\n";
$thr->join;
#print "Thread number ".$thr->tid." has joined.\n";
}
}
my $t2 = time();
my $t3 = $t2-$t1;
print "Total execution time: $t3 seconds\n";
sub fetchStats
{
my ($upstream,$url,$rgx,$nme,$qnm) = @_;
while (my $dom = $upstream->dequeue){
my $ua = new LWP::UserAgent;
$ua->parse_head(0);
$ua->agent("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0");
$ua->proxy('http','http://59.18.143.176:8080/');
$ua->timeout(2);
$ua->max_size(9400);
#print "Checking $dom\n";
my $response = $ua->get($url.$dom);
if (!$response->is_success){
#$response = $ua->get($url.$dom);
#print "Retrying $dom...\n";
}
if ($response->is_success){
my $txt = $response->content;
$txt =~ s/\r|\n//gs;
my $hits = ($txt =~ $rgx) ? $1 : 0;
print "$dom ($nme): $hits hits\n";
}else{
print "$dom ($nme): timeout\n";
}
if ($upstream->pending < 1){ $upstream->enqueue(undef); }
}
}
I have the following script (see below) which queries various sites for
statistics about a list of domains. The problem is that 1 in 3 attempts to
run it result in "Segmentation Fault". Sometimes it will work fine 10 times
in a row, other times it will segfault 3 times in a row before it works.
How can I tell what's causing the segfault ? It doesn't happen all the time,
but when it does I'm pretty sure it's $thr->join; that does it. (if I
uncomment the print statements, it will say "Waiting for thread number 1 to
join" followed immediately by "Segmentation Fault"..
Chris
#!/usr/bin/t-perl -W
use strict;
use threads;
use Thread::Queue;
use LWP::UserAgent;
my $dls = './domains.txt'; # List of domain names
my $out = './results.csv'; # Where to output results to
my @nme = ('Yahoo','Alexa','Overture');
my @url =
('http://search.yahoo.com/search?p=link:http://domain.com&ei=UTF-8&f
r=FP-tab-web-t&x=wrt',
'http://www.alexa.com/data/ds/linksin?q=link:domain.com&url=',
'http://inventory.overture.com/d/searchinventory/suggestion/?mkt=us&lang=en_
US&term=');
my @rgx = (qr/of about ((\d|,)+)/,
qr/Sites \s+1\s+-\s+\d+\s+of\s+((\d|,)+)/i,
qr/color=E8E8E8> (\d+)/);
my $t1 = time();
open (FILE, "<$dls") || die "Unable to open $dls: $!";
my @domains = <FILE>;
close (FILE);
my @stream;
my $cthreads;
# Start 5 new threads per search engine:
for (my $i=0;$i<=$#url;$i++){
$stream[$i] = new Thread::Queue;
for (0..4){
my $tid = $i*$_;
my $qnm = "q".($i+1);
$cthreads->[$tid] = threads->new(\&fetchStats, $stream[$i], $url[$i],
$rgx[$i], $nme[$i],$qnm);
#print "Thread number ".($tid + 1)." created.\n";
}
}
# Add our domains to the queue:
foreach my $dom (@domains){
chomp($dom);
#print "Domain '$dom' added to the queue.\n";
foreach my $str (@stream){ $str->enqueue($dom); }
}
# Join all our threads (except ourselves):
foreach my $thr (threads->list) {
if ($thr->tid && !threads::equal($thr, threads->self) ){
#print "Waiting for thread number ".$thr->tid." to join\n";
$thr->join;
#print "Thread number ".$thr->tid." has joined.\n";
}
}
my $t2 = time();
my $t3 = $t2-$t1;
print "Total execution time: $t3 seconds\n";
sub fetchStats
{
my ($upstream,$url,$rgx,$nme,$qnm) = @_;
while (my $dom = $upstream->dequeue){
my $ua = new LWP::UserAgent;
$ua->parse_head(0);
$ua->agent("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0");
$ua->proxy('http','http://59.18.143.176:8080/');
$ua->timeout(2);
$ua->max_size(9400);
#print "Checking $dom\n";
my $response = $ua->get($url.$dom);
if (!$response->is_success){
#$response = $ua->get($url.$dom);
#print "Retrying $dom...\n";
}
if ($response->is_success){
my $txt = $response->content;
$txt =~ s/\r|\n//gs;
my $hits = ($txt =~ $rgx) ? $1 : 0;
print "$dom ($nme): $hits hits\n";
}else{
print "$dom ($nme): timeout\n";
}
if ($upstream->pending < 1){ $upstream->enqueue(undef); }
}
}