R
Robin
This is a very a simple search engine that prints the filenames of the files
and a link to them, it's not very advanced but I used it as an excersise
into writing search engines, and I'm planning on making it into a more
advanced one in the future I am aware of the race conditions in the header
and footer subs and their suckiness and am working now to fix them, I got
some advice from someone on the perl beginners yahoo list, but to work their
code I have to understand it and I don't so once I read up I'll fix the race
condition, anyway, any comments would be nice.
-Robin
btw, unless I change newsreaders which I haven't had much success with, I've
downloaded like 5 and none seem to work for me cause they don't work with my
mail server my indenting is gonna be screwed up, so please bear with me.
#!/usr/bin/perl
use strict;
use warnings;
use Fcntl qw flock);
use CGI qwall);
$CGI:OST_MAX=1024 * 100; # max 100K posts
$CGI:ISABLE_UPLOADS = 1; # no uploads
$" = '';
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
my @directories = ("./", "../"); #change this to the directories you want to
have searched. Include the slash at the end of the directory.
my $action = url_param ('action');
my $rootfile = url (relative=>1);
my $headerfile = "searchheader.txt";
my $footerfile = "searchfooter.txt";
my $errorfile = "ERR.txt";
my @head = getheader ($headerfile);
my @foot = getfooter ($footerfile);
my $date = getdate ();
my @errors;
my @finaldirs;
checkerrors ();
if ($action eq "search")
{
search ();
}
else
{
newsearch ();
}
sub search
{
print header;
print (@head);
#code for parsing results
foreach my $dir (@directories)
{
opendir (DIR, $dir);
my @files_from_dir = readdir (DIR);
closedir (DIR);
foreach my $filefromdir (@files_from_dir)
{
if (! -d $filefromdir)
{
push (@finaldirs, "$dir$filefromdir")
}
}
}
my $query = param ('query');
my @finalresults;
foreach my $file (@finaldirs)
{
open (FILE, $file) or push (@errors, "A file open error occured on $file:
$!.");
flock (FILE, LOCK_SH) or push (@errors, "A file lock error occured on
$file: $!.");
checkerrors ();
my @contents;
my $contents;
@contents = <FILE> if (-f $file);
close (FILE);
chomp (@contents);
$contents = join ('', @contents);
$contents =~ s/<.*>//g;
my $result;
if ($contents =~ m/$query/ and $query)
{
$result = "<a href=\"$file\">$file</a><br>";
push (@finalresults, $result);
}
}
print <<END;
<p><strong><em>Infused Search</em></strong>
<br>
<br>
Search Results:
</p>
END
print @finalresults;
print <<END;
<hr size="1">
</body>
</html>
END
}
sub newsearch
{
print header;
print (@head);
print <<END;
<strong><em>Infused Search</em></strong>
<br>
<br>
<hr size="1">
<form name="form1" method="post" action="search.pl?action=search">
<input type="text" name="query">
<input type="submit" name="Submit" value="Submit">
</form>
<hr size="1">
END
print (@foot);
}
sub checkerrors
{
if (@errors)
{
print header;
print "<html><body><center>";
print "There were errors while trying to execute Infused Search. They are
listed as follows.<br><br>\n";
foreach my $error (@errors)
{
print ($error, "<br>\n");
}
my $errflag = 0;
if (! open (ERRORF, ">>$errorfile") and flock (ERRORF, LOCK_EX))
{
print "There was an error logging the errors: file cannot be locked or
opened.<br>";
$errflag = 1;
}
else
{
print ERRORF ("Current date: $date", "\n");
foreach my $error2 (@errors)
{
print ERRORF $error2, "\n";
}
}
close (ERRORF);
if (! $errflag)
{
print "<br>", "Errors have been logged in $errorfile.";
}
print "</body></html>";
exit (0);
}
else
{
return;
}
}
sub getheader
{
my $header_sub = shift;
my (@headertoret);
if (-e $header_sub)
{
open (HEADERF, $header_sub) or push (@errors, "A file open error occured
on $header_sub: $!.");
flock (HEADERF, LOCK_SH) or push (@errors, "A file lock error occured on
$header_sub: $!.");
@headertoret = <HEADERF>;
close (HEADERF);
}
else
{
open (HEADERF, ">$header_sub") or push (@errors, "A file open error
occured on $header_sub: $!.");
flock (HEADERF, LOCK_EX) or push (@errors, "A file lock error occured on
$header_sub: $!.");
@headertoret = <<END;
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>Infused Search</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
END
print HEADERF @headertoret;
close (HEADERF);
}
return (@headertoret);
}
sub getfooter
{
my $footer_sub = shift;
my (@footertoret);
if (-e $footer_sub)
{
open (HEADERF, $footer_sub) or push (@errors, "A file open error occured
on $footer_sub: $!.");
flock (HEADERF, LOCK_SH) or push (@errors, "A file lock error occured on
$footer_sub $!.");
@footertoret = <HEADERF>;
close (HEADERF);
}
else
{
open (HEADERF, ">$footer_sub") or push (@errors, "A file open error
occured on $footer_sub $!.");
flock (HEADERF, LOCK_EX) or push (@errors, "A file lock error occured on
$footer_sub $!.");
@footertoret = <<END;
</body></html>
END
print HEADERF @footertoret;
close (HEADERF);
}
return (@footertoret);
}
sub getdate
{
my ($day, $mon, $year)=(localtime)[3,4,5];
$mon++; #month is returned in a 0-11 range
$year +=1900;
my $date = $mon . "/" . $day . "/" . $year;
return $date;
}
and a link to them, it's not very advanced but I used it as an excersise
into writing search engines, and I'm planning on making it into a more
advanced one in the future I am aware of the race conditions in the header
and footer subs and their suckiness and am working now to fix them, I got
some advice from someone on the perl beginners yahoo list, but to work their
code I have to understand it and I don't so once I read up I'll fix the race
condition, anyway, any comments would be nice.
-Robin
btw, unless I change newsreaders which I haven't had much success with, I've
downloaded like 5 and none seem to work for me cause they don't work with my
mail server my indenting is gonna be screwed up, so please bear with me.
#!/usr/bin/perl
use strict;
use warnings;
use Fcntl qw flock);
use CGI qwall);
$CGI:OST_MAX=1024 * 100; # max 100K posts
$CGI:ISABLE_UPLOADS = 1; # no uploads
$" = '';
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
my @directories = ("./", "../"); #change this to the directories you want to
have searched. Include the slash at the end of the directory.
my $action = url_param ('action');
my $rootfile = url (relative=>1);
my $headerfile = "searchheader.txt";
my $footerfile = "searchfooter.txt";
my $errorfile = "ERR.txt";
my @head = getheader ($headerfile);
my @foot = getfooter ($footerfile);
my $date = getdate ();
my @errors;
my @finaldirs;
checkerrors ();
if ($action eq "search")
{
search ();
}
else
{
newsearch ();
}
sub search
{
print header;
print (@head);
#code for parsing results
foreach my $dir (@directories)
{
opendir (DIR, $dir);
my @files_from_dir = readdir (DIR);
closedir (DIR);
foreach my $filefromdir (@files_from_dir)
{
if (! -d $filefromdir)
{
push (@finaldirs, "$dir$filefromdir")
}
}
}
my $query = param ('query');
my @finalresults;
foreach my $file (@finaldirs)
{
open (FILE, $file) or push (@errors, "A file open error occured on $file:
$!.");
flock (FILE, LOCK_SH) or push (@errors, "A file lock error occured on
$file: $!.");
checkerrors ();
my @contents;
my $contents;
@contents = <FILE> if (-f $file);
close (FILE);
chomp (@contents);
$contents = join ('', @contents);
$contents =~ s/<.*>//g;
my $result;
if ($contents =~ m/$query/ and $query)
{
$result = "<a href=\"$file\">$file</a><br>";
push (@finalresults, $result);
}
}
print <<END;
<p><strong><em>Infused Search</em></strong>
<br>
<br>
Search Results:
</p>
END
print @finalresults;
print <<END;
<hr size="1">
</body>
</html>
END
}
sub newsearch
{
print header;
print (@head);
print <<END;
<strong><em>Infused Search</em></strong>
<br>
<br>
<hr size="1">
<form name="form1" method="post" action="search.pl?action=search">
<input type="text" name="query">
<input type="submit" name="Submit" value="Submit">
</form>
<hr size="1">
END
print (@foot);
}
sub checkerrors
{
if (@errors)
{
print header;
print "<html><body><center>";
print "There were errors while trying to execute Infused Search. They are
listed as follows.<br><br>\n";
foreach my $error (@errors)
{
print ($error, "<br>\n");
}
my $errflag = 0;
if (! open (ERRORF, ">>$errorfile") and flock (ERRORF, LOCK_EX))
{
print "There was an error logging the errors: file cannot be locked or
opened.<br>";
$errflag = 1;
}
else
{
print ERRORF ("Current date: $date", "\n");
foreach my $error2 (@errors)
{
print ERRORF $error2, "\n";
}
}
close (ERRORF);
if (! $errflag)
{
print "<br>", "Errors have been logged in $errorfile.";
}
print "</body></html>";
exit (0);
}
else
{
return;
}
}
sub getheader
{
my $header_sub = shift;
my (@headertoret);
if (-e $header_sub)
{
open (HEADERF, $header_sub) or push (@errors, "A file open error occured
on $header_sub: $!.");
flock (HEADERF, LOCK_SH) or push (@errors, "A file lock error occured on
$header_sub: $!.");
@headertoret = <HEADERF>;
close (HEADERF);
}
else
{
open (HEADERF, ">$header_sub") or push (@errors, "A file open error
occured on $header_sub: $!.");
flock (HEADERF, LOCK_EX) or push (@errors, "A file lock error occured on
$header_sub: $!.");
@headertoret = <<END;
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>Infused Search</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
END
print HEADERF @headertoret;
close (HEADERF);
}
return (@headertoret);
}
sub getfooter
{
my $footer_sub = shift;
my (@footertoret);
if (-e $footer_sub)
{
open (HEADERF, $footer_sub) or push (@errors, "A file open error occured
on $footer_sub: $!.");
flock (HEADERF, LOCK_SH) or push (@errors, "A file lock error occured on
$footer_sub $!.");
@footertoret = <HEADERF>;
close (HEADERF);
}
else
{
open (HEADERF, ">$footer_sub") or push (@errors, "A file open error
occured on $footer_sub $!.");
flock (HEADERF, LOCK_EX) or push (@errors, "A file lock error occured on
$footer_sub $!.");
@footertoret = <<END;
</body></html>
END
print HEADERF @footertoret;
close (HEADERF);
}
return (@footertoret);
}
sub getdate
{
my ($day, $mon, $year)=(localtime)[3,4,5];
$mon++; #month is returned in a 0-11 range
$year +=1900;
my $date = $mon . "/" . $day . "/" . $year;
return $date;
}