#!/usr/bin/perl $|=1; # Copyright 1999 Jose M. Vidal # Authors: # Jose M. Vidal, vidal@multiagent.com, http://jmvidal.ece.sc.edu # Michael Schmitz # # This program is free software. You can redistribute it and/or modify # it under the terms of the GNU General Public License # # $Id: search.pl,v 1.33 2003/05/11 19:31:23 jmvidal Exp $ use CGI param, header, escape; use Fcntl; $dbase = "urls.db"; ## Name of db file. $otherbase = "/usr/local/etc/bk2site/searchbase.html"; ##Name of the template file $searchprog = "/cgi-bin/bk2site/search.pl"; ##CHANGE this if your program is somewhere else. # makes the functions that emulate bk2site's builtin function print # debug outout to STDERR $debug_emulateBuiltins = 0; print header(); $database = param("db"); ## Name of db file. if ($database eq "") { $database = $dbase; }; unless ($database =~ /^[\w\.\/]+/ ) { print "Bad database $database name"; exit; } $query = param("q"); $num = param("num"); unless ($num =~ /^[0-9]*$/ ) { print "Bad num"; exit; } if ($num eq "") { $num = 20; }; if ($num < 1) { $num = 20; }; $stq = param("stq"); unless ($stq =~ /^[0-9]*$/ ) { print "Bad stq $stq"; exit; } if ($stq eq "") { $stq = 0; }; $escquery = escape($query); # this was not secure # open(DB,$database) || sysopen(DB, $database, O_RDONLY) || die ("Can't open database(urls.db)= $database"); $checkDB = ; chop($checkDB); $usingmetadata = 0; if ($checkDB eq "#bk2site urls.db generated file metadata") { $numlines = 10; $usingmetadata = 1; } else { if ($checkDB ne "#bk2site urls.db generated file") { die ("Invalid database: $database"); } else { $numlines = 9; } } $urltemplate = ; chop($urltemplate); $newgif = ; chop($newgif); $timecutoff = ; chop($timecutoff); $oldesttime = time - $timecutoff * 86400; open(OTHERBASE,$otherbase)|| die ("Can't open $otherbase"); $temp = $urltemplate; while ( $temp =~ /%IFCOMHAS\((.*?)\)\((.*?)\)/ ){ push @directives, $1; # if ($1 eq $query) { # $query = $query; # } $temp =~ s/%IFCOMHAS\(.*?\)\(.*?\)//; }; while () { if (/(.*)%QUERY(.*)/){ $_ =~ s/%QUERY/$query/g; } if (/(.*)%ESCQUERY(.*)/){ $_ =~ s/%ESCQUERY/$escquery/g; } if (/(.*)%NUMBER(.*)/){ $_ =~ s/%NUMBER/$num/g; } if (/(.*)(.*)/){ $_ =~ s//$database/g; } if (/(.*)(.*)/){ print "$1"; $afterComment = $2; last; } print; } print "\n"; @allRecords = ; #there are 8 lines per record, so mutiply index by 8 ## 0 is the type ## 1 is the ParentTitle ## 2 is the url or relative dir ## 3 is the Title ## 4 is the comment ## 5 is the creation time ## 6 is the last modified time ## 7 is the last visit time ## 8 is the number of user hits ## 9 is meta data $typeN = 0; $parentTitleN = 1; $urlN = 2; $titleN = 3; $commentN = 4; $creationtimeN = 5; $modifiedtimeN = 6; $visittimeN = 7; $urlhitsN = 8; $metaN = 9; $numRecords = ($#allRecords + 1)/$numlines; for ($i=0; $i <= $numRecords; ++$i){ #initialize hits to 0 $numHits[$i] = 0; ##keyword hits $order[$i] = $i; ## given them an initial ordering } $categoryMatches = 0; $siteMatches = 0; if (length($query) > 1) { for ($i=0; $i < $numRecords; ++$i){ #do search $cr = ($i * $numlines); @words = split(/ +/, $query); $url = $allRecords[$cr + $urlN]; $title = $allRecords[$cr + $titleN]; $comment = $allRecords[$cr + $commentN]; $comment =~ s/
/ /g; if ($usingmetadata){ $meta = $allRecords[$cr + $metaN]; }; @hasdirectives = (); foreach $dir (@directives) { #directives are strings that appear %IFCOMHAS(here)(new stuff) $temp = $dir; $temp =~ s/([\*\+\?\$\.^])/\\$1/g; #escape all nasties, to allow *cool* to be a directive if ($usingmetadata) { if ($comment =~ s/$temp//g || $meta =~ s/$temp//g ) { push @hasdirectives, $dir; }; } else { if ($comment =~ s/$temp//g) { #take *cool* out of comment and place it in hasdirectives. push @hasdirectives, $dir; }; }; }; foreach $q2 (@words) { if (length($q2) < 2) { #ignore 1-char queries next; }; $q = $q2; $oldNumHits = $numHits[$i]; foreach $dir (@hasdirectives){ #search also on the directives that were taken out of the comment if ($q eq $dir) { $numHits[$i]++; } }; $q =~ s/([\*\+\?\$\.^])/\\$1/g; #escape all nasties # Query only URL's for LEAFS, not relative directories of FOLDER. if (($allRecords[$cr + $typeN]) eq "LEAF\n") { $numHits[$i] += ($url =~ s/($q)/\1/gi); } $numHits[$i] += ($title =~ s/($q)/\1<\/B>/gi); if ($usingmetadata){ $numHits[$i] += ($comment =~ s/($q)/\1<\/B>/gi || $meta =~ s/($q)/\1<\/B>/gi); } else { $numHits[$i] += ($comment =~ s/($q)/\1<\/B>/gi); }; if (($oldNumHits == 0) && ($numHits[$i] > 0)) { if (($allRecords[$cr + $typeN]) eq "FOLDER\n") { $categoryMatches++; } else { if (($allRecords[$cr + $typeN]) eq "LEAF\n") { $siteMatches++; } else { print "ERROR: bad format in database file\n"; exit; }; }; }; }; # Do not store url because it has not been changed. # $allRecords[$cr + $urlN] = $url; $allRecords[$cr + $titleN] = $title; if (@hasdirectives){ foreach $dir (@hasdirectives) { $comment .= $dir . " "; }; }; $allRecords[$cr + $commentN] = $comment; }; }; @order = sort {$numHits[$b] <=> $numHits[$a]} @order; print "
$categoryMatches category matches and $siteMatches site matches."; $endIndex = $stq + $num -1; $maxIndex = $categoryMatches + $siteMatches - 1; if ($endIndex > $maxIndex) { $endIndex = $maxIndex; } if ($stq > 0 || $endIndex < $maxIndex) { print " Showing $stq -- $endIndex."; } print "
\n"; # Inside the following loops indexMatches is preincremented $Index = -1; if (($categoryMatches > 0) && ($categoryMatches > $stq)) { print "Category Matches: $categoryMatches
\n
    \n"; for ($i=0; $i < $numRecords; ++$i){ $rn = $order[$i]; if (($numHits[$rn] > 0) && ($allRecords[($rn*$numlines) + $typeN] eq "FOLDER\n")) { $Index++; if ($Index < $stq) { next; } elsif ($Index > $endIndex) { last; } $pt = $allRecords[($rn*$numlines)+$parentTitleN]; chop($pt); $url = $allRecords[($rn*$numlines)+$urlN]; $url =~ s/\?/%3F/g; chop($url); $title = $allRecords[($rn*$numlines)+$titleN]; chop($title); # $title =~ tr/_/ /; # print "
  • $pt$title\n"; #urls for directories have searchtorootpath already prepended. print "
  • $pt$title\n"; } } print "
\n"; } else { $Index = $categoryMatches -1; } if (($siteMatches > 0) && (($categoryMatches + $siteMatches) > $stq)){ print "Site Matches: $siteMatches
\n
    \n"; for ($i=0; $i < $numRecords; ++$i){ $rn = $order[$i]; if (($numHits[$rn] > 0) && ($allRecords[($rn*$numlines) + $typeN] eq "LEAF\n")) { $pt = $allRecords[($rn*$numlines)+$parentTitleN]; chop($pt); $notprinteddir = 1; for ($j=0; $j < $numRecords; ++$j){ $rn = $order[$j]; $npt = $allRecords[($rn*$numlines)+$parentTitleN]; chop($npt); if (($numHits[$rn] > 0) && ($allRecords[($rn*$numlines) + $typeN] eq "LEAF\n") && ($npt eq $pt)) { $numHits[$rn] = 0; #so we dont get chosen again. $Index++; if ($Index < $stq) { next; } elsif ($Index > $endIndex) { last; } if ($notprinteddir){ print "
  • $pt\n
      \n"; $notprinteddir =0; }; $url = $allRecords[($rn*$numlines)+$urlN]; chop($url); $title = $allRecords[($rn*$numlines)+$titleN]; chop($title); $title =~ tr/_/ /; $comment = $allRecords[($rn*$numlines)+$commentN]; chop($comment); $numberofhits = $allRecords[($rn*$numlines)+$urlhitsN]; $creationtime = $allRecords[($rn*$numlines)+$creationtimeN]; ($ctsec, $ctmin, $cthour, $ctday, $ctmon, $ctyear, $ctwday, $ctyday, $ctisdst) = gmtime $creationtime; $ctyearf = $ctyear + 1900; if ($ctyear >= 100){ $ctyear -= 100; } $ctfull = localtime $creationtime; $ctmon++; $visittime = $allRecords[($rn*$numlines)+$visittimeN]; ($vtsec, $vtmin, $vthour, $vtday, $vtmon, $vtyear, $vtwday, $vtyday, $vtisdst) = gmtime $visittime; $vtyearf = $vtyear + 1900; if ($vtyear >= 100){ $vtyear -= 100; } $vtfull = localtime $visittime; $vtmon++; $modtime = $allRecords[($rn*$numlines)+$modifiedtimeN]; ($mtsec, $mtmin, $mthour, $mtday, $mtmon, $mtyear, $mtwday, $mtyday, $mtisdst) = gmtime $modtime; $mtyearf = $mtyear + 1900; if ($mtyear >= 100){ $mtyear -= 100; } $mtfull = localtime $modtime; $mtmon++; $urlhtml = $urltemplate; #Use this line (instead of next one) if you want to count hits. # $urlhtml =~ s/%URL/\/cgi-bin\/redirect.pl?url=$url/g; $urlhtml =~ s/%URL/$url/g; $urlhtml =~ s/%TITLE/$title/g; $urlhtml =~ s/%HITS/$numberofhits/g; $urlhtml =~ s/%DAYCRE/$ctday/g; $urlhtml =~ s/%DAYVIS/$vtday/g; $urlhtml =~ s/%DAYMOD/$mtday/g; $urlhtml =~ s/%MONTHCRE1/$ctmon/g; $urlhtml =~ s/%MONTHVIS1/$vtmon/g; $urlhtml =~ s/%MONTHMOD1/$mtmon/g; $urlhtml =~ s/%YEARCRE/$ctyear/g; $urlhtml =~ s/%YEARFCRE/$ctyearf/g; $urlhtml =~ s/%YEARVIS/$vtyear/g; $urlhtml =~ s/%YEARFVIS/$vtyearf/g; $urlhtml =~ s/%YEARMOD/$mtyear/g; $urlhtml =~ s/%YEARFMOD/$mtyearf/g; $urlhtml =~ s/%TIMEFCRE/$ctfull/g; $urlhtml =~ s/%TIMEFVIS/$vtfull/g; $urlhtml =~ s/%TIMEFMOD/$mtfull/g; while ( $urlhtml =~ /%IFCOMHAS\((.*?)\)\((.*?)\)/ ){ $v1 = $1; $v2 = $2; $v1 =~ s/([*+?$.^])/\\$1/g; if ($comment =~ s/$v1//g) { $urlhtml =~ s/%IFCOMHAS\(.*?\)\(.*?\)/$v2/; } else { $urlhtml =~ s/%IFCOMHAS\(.*?\)\(.*?\)//; }; }; if ($creationtime > $oldesttime) { $timestring = "$ctmon/$ctday/$ctyear"; $newgifhtml = $newgif; $newgifhtml =~ s/%URL/$url/g; $newgifhtml =~ s/%TITLE/$title/g; $newgifhtml =~ s/%HITS/$numberofhits/g; $newgifhtml =~ s/%DAYCRE/$ctday/g; $newgifhtml =~ s/%DAYVIS/$vtday/g; $newgifhtml =~ s/%DAYMOD/$mtday/g; $newgifhtml =~ s/%MONTHCRE1/$ctmon/g; $newgifhtml =~ s/%MONTHVIS1/$vtmon/g; $newgifhtml =~ s/%MONTHMOD1/$mtmon/g; $newgifhtml =~ s/%YEARCRE/$ctyear/g; $newgifhtml =~ s/%YEARFCRE/$ctyearf/g; $newgifhtml =~ s/%YEARVIS/$vtyear/g; $newgifhtml =~ s/%YEARFVIS/$vtyearf/g; $newgifhtml =~ s/%YEARMOD/$mtyear/g; $newgifhtml =~ s/%YEARFMOD/$mtyearf/g; $newgifhtml =~ s/%TIMEFCRE/$ctfull/g; $newgifhtml =~ s/%TIMEFVIS/$vtfull/g; $newgifhtml =~ s/%TIMEFMOD/$mtfull/g; $urlhtml =~ s/%NEW/$newgifhtml/g; } else { $urlhtml =~ s/%NEW//g; }; if ($comment ne "") { $urlhtml =~ s/%CONDDASH/-/g; } else { $urlhtml =~ s/%CONDDASH//g; }; $urlhtml =~ s/%(LONG)?COMMENT/$comment/g; # Let the subroutines that emulate bk2site's builtin # functions process the string before sending it to the # resulting document. $urlhtml = processFunctions(qq{$urlhtml}); print "
    • $urlhtml"; } } if ($notprinteddir == 0) { print "
    \n"; } if ($Index > $endIndex) { last; } } } print "
\n"; } if ($stq > 0 || $endIndex < $maxIndex) { print "
"; for ($i =0; $i <= $maxIndex; $i += $num){ if (($stq >= $i) && ($stq < ($i + $num))){ print " $i"; } else { print " $i "; } } print "
"; }; print "\n"; print $afterComment; while (){ if (/(.*)%QUERY(.*)/){ $_ =~ s/%QUERY/$query/g; } if (/(.*)%ESCQUERY(.*)/){ $_ =~ s/%ESCQUERY/$escquery/g; } if (/(.*)%NUMBER(.*)/){ $_ =~ s/%NUMBER/$num/g; } print; } close(DB); sub processFunctions { my ($input, $output_prefix) = @_; my $output; my $prefix; my $function; my $rest; my $args; print STDERR "$output_prefix". "entering processFunctions($input)\n" if $debug_emulateBuiltins; $output_prefix .= " "; if ( $input =~ /(&(FILL|NOHTML|REPLACE|CUT|NOLINEBREAKS|NOACCENTS|FS)\[)/ ) { $prefix = $`; $function = $2; $rest = $'; print STDERR "$output_prefix\$prefix = $prefix\n" if $debug_emulateBuiltins; print STDERR "$output_prefix\$function = $function\n" if $debug_emulateBuiltins; print STDERR "$output_prefix\$rest = $rest\n" if $debug_emulateBuiltins; $args = processFunctions($rest, $output_prefix); print STDERR "$output_prefix\$args = $args\n" if $debug_emulateBuiltins; $res = eval("builtin_$function(q{$args}, \"$output_prefix\")"); if ( !defined($res) ) { # Something was wrong with the arguments to the emulated builtin # function. Ideally we should return the unmodified string # argument, but if builtin_* hadn't been able to parse the input # string it is unfeasible here. I think returning the entire # input string is the best we can do. #$res = $input; $res = "&$function\[$args"; } $output = "$prefix$res"; } else { $output = $input; } print STDERR "$output_prefix" . "returning $output\n" if $debug_emulateBuiltins; return($output); } # the following builtin function is not yet implemented sub builtin_FILL { my ($arg, $output_prefix) = @_; my $result; my $string; my $column; my $rest; print STDERR "${output_prefix}FILL($arg)\n" if $debug_emulateBuiltins; $output_prefix .= " "; if ( $arg =~ /^([^|[\]]*)\|(\d+)\]/ ) { $string = $1; $column = $2; $rest = $'; # OK, how do we have to fill $string to column $column? Apart from # that, I think this kind of layout aspects should be left to the # browser. # At this time we simply take $string as the result of filling. $result = "$string$rest"; } else { # The parameter list if malformed. $result = undef; } print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins; return($result); } sub builtin_NOHTML { my ($arg, $output_prefix) = @_; my $result; my $string; my $rest; print STDERR "${output_prefix}NOHTML($arg)\n" if $debug_emulateBuiltins; $output_prefix .= " "; if ( $arg =~ /^([^|[\]]*)\]/ ) { $string = $1; $rest = $'; $string =~ s/<[^>]+>//g; $result = "$string$rest"; } else { # The parameter list if malformed. $result = undef; } print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins; return($result); } sub builtin_REPLACE { my ($arg, $output_prefix) = @_; my $result; my $string; my $from; my $to; my $rest; print STDERR "${output_prefix}REPLACE($arg)\n" if $debug_emulateBuiltins; $output_prefix .= " "; if ( $arg =~ /^([^|[\]]*)\|([^|[\]]*)\|([^|[\]]*)\]/ ) { $string = $1; $from = qr/$2/; $to = $3; $rest = $'; print STDERR "\$string = $string\n" if $debug_emulateBuiltins; print STDERR "\$from = $from\n" if $debug_emulateBuiltins; print STDERR "\$to = $to\n" if $debug_emulateBuiltins; print STDERR "\$rest = $rest\n" if $debug_emulateBuiltins; if ( $string && $from ) { $string =~ s/$from/$to/g; } $result = "$string$rest"; } else { # The parameter list if malformed. $result = undef; } print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins; return($result); } sub builtin_CUT { my ($arg, $output_prefix) = @_; my $result; my $string; my $n; my $rest; print STDERR "${output_prefix}CUT($arg)\n" if $debug_emulateBuiltins; $output_prefix .= " "; if ( $arg =~ /^([^|[\]]*)\|(\d+)\]/ ) { $string = $1; $n = $2; $rest = $'; $result = "${\(substr($string, 0, $n))}$rest"; } else { # The parameter list if malformed. $result = undef; } print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins; return($result); } sub builtin_NOLINEBREAKS { my ($arg, $output_prefix) = @_; print STDERR "${output_prefix}NOLINEBREAKS($arg)\n" if $debug_emulateBuiltins; $output_prefix .= " "; if ( $arg =~ /^([^|[\]]*)\]/ ) { $string = $1; $rest = $'; $string =~ s/[\n\b\t]/ /g; $result = "$string$rest"; } else { # The parameter list if malformed. $result = undef; } print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins; return($result); } # the following builtin function is not yet implemented sub builtin_NOACCENTS { my ($arg, $output_prefix) = @_; print STDERR "${output_prefix}NOACCENTS($arg)\n" if $debug_emulateBuiltins; $output_prefix .= " "; if ( $arg =~ /^([^|[\]]*)\]/ ) { $string = $1; $rest = $'; # What do we have to do with $string??? # At this time we simply take $string as the result. $result = "$string$rest"; } else { # The parameter list if malformed. $result = undef; } print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins; return($result); } sub builtin_FS { my ($arg, $output_prefix) = @_; print STDERR "${output_prefix}FS($arg)\n" if $debug_emulateBuiltins; $output_prefix .= " "; if ( $arg =~ /^([^|[\]]*)\]/ ) { $string = $1; $rest = $'; $string =~ s/([.!?]).*/$1/; $result = "$string$rest"; } else { # The parameter list if malformed. $result = undef; } print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins; return($result); }