random segfaults

S

Skeleton Man

Hi,

I have the following script (see below) which queries various sites for
statistics about a list of domains. The problem is that 1 in 3 attempts to
run it result in "Segmentation Fault". Sometimes it will work fine 10 times
in a row, other times it will segfault 3 times in a row before it works.

How can I tell what's causing the segfault ?

Chris


#!/usr/bin/t-perl -W

use strict;
use threads;
use Thread::Queue;
use LWP::UserAgent;

my $dls = './domains.txt'; # List of domain names
my $out = './results.csv'; # Where to output results to

my @nme = ('Yahoo','Alexa','Overture');
my @url =
('http://search.yahoo.com/search?p=link:http://domain.com&ei=UTF-8&f
r=FP-tab-web-t&x=wrt',
'http://www.alexa.com/data/ds/linksin?q=link:domain.com&url=',

'http://inventory.overture.com/d/searchinventory/suggestion/?mkt=us&lang=en_
US&term=');

my @rgx = (qr/of about ((\d|,)+)/,
qr/Sites \s+1\s+-\s+\d+\s+of\s+((\d|,)+)/i,
qr/color=E8E8E8> (\d+)/);

my $t1 = time();
open (FILE, "<$dls") || die "Unable to open $dls: $!";
my @domains = <FILE>;
close (FILE);

my @stream;
my $cthreads;


# Start 5 new threads per search engine:

for (my $i=0;$i<=$#url;$i++){

$stream[$i] = new Thread::Queue;

for (0..4){
my $tid = $i*$_;
my $qnm = "q".($i+1);
$cthreads->[$tid] = threads->new(\&fetchStats, $stream[$i], $url[$i],
$rgx[$i], $nme[$i],$qnm);
#print "Thread number ".($tid + 1)." created.\n";
}

}

# Add our domains to the queue:
foreach my $dom (@domains){
chomp($dom);
#print "Domain '$dom' added to the queue.\n";

foreach my $str (@stream){ $str->enqueue($dom); }
}


# Join all our threads (except ourselves):

foreach my $thr (threads->list) {
if ($thr->tid && !threads::equal($thr, threads->self) ){
#print "Waiting for thread number ".$thr->tid." to join\n";
$thr->join;
#print "Thread number ".$thr->tid." has joined.\n";
}
}


my $t2 = time();
my $t3 = $t2-$t1;
print "Total execution time: $t3 seconds\n";


sub fetchStats
{
my ($upstream,$url,$rgx,$nme,$qnm) = @_;

while (my $dom = $upstream->dequeue){

my $ua = new LWP::UserAgent;
$ua->parse_head(0);
$ua->agent("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0;)");
$ua->proxy('http','http://59.18.143.176:8080/');
$ua->timeout(2);
$ua->max_size(9400);



#print "Checking $dom\n";

my $response = $ua->get($url.$dom);

if (!$response->is_success){
#$response = $ua->get($url.$dom);
#print "Retrying $dom...\n";
}

if ($response->is_success){

my $txt = $response->content;
$txt =~ s/\r|\n//gs;

my $hits = ($txt =~ $rgx) ? $1 : 0;

print "$dom ($nme): $hits hits\n";

}else{
print "$dom ($nme): timeout\n";
}

if ($upstream->pending < 1){ $upstream->enqueue(undef); }
}
}
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,995
Messages
2,570,225
Members
46,815
Latest member
treekmostly22

Latest Threads

Top