#!/usr/bin/nice /usr/bin/perl my $startTime = time; alarm( 360 ); use CGI; use HTTP::Request; use LWP::UserAgent; if ( -f "/users/popex/www/cgi-bin/init.pl" ) { require( "/users/popex/www/cgi-bin/init.pl" ); $CFG{"baseDomain"} = "http://www.folkestonegerald.com"; } else { # put in some fake routines... } $CFG{"debug"} = 1; $CFG{"gbbsVociferous"} = 1; $CFG{"badDomain"}->{"translate.google.com"} ++; $CFG{"badDomain"}->{"translate.google.dz"} ++; $CFG{"badDomain"}->{"translate.google.it"} ++; $CFG{"badDomain"}->{"translate.google.com.mx"} ++; $CFG{"badDomain"}->{"translate.google.com.es"} ++; $CFG{"badDomain"}->{"translate.google.com.ar"} ++; $CFG{"badDomain"}->{"translate.google.ru"} ++; $CFG{"badDomain"}->{"translate.google.es"} ++; $CFG{"badDomain"}->{"translate.google.pl"} ++; $CFG{"badDomain"}->{"translate.google.com.mx"} ++; $CFG{"badDomain"}->{"forum.clarkeology.com"} ++; # fake $CFG{"badDomain"}->{"forum.claremahon.com"} ++; # fake $CFG{"badDomain"}->{"forum.folkestonegerald.com"} ++; # fake $CFG{"badDomain"}->{"groups.google.com"} ++; # too much spam $CFG{"badDomain"}->{"mashal.beon.ru"} ++; # too much spam $CFG{"badDomain"}->{"groups.google.de"} ++; # too much spam $CFG{"badDomain"}->{"groups.google.ai"} ++; # too much spam $CFG{"badDomain"}->{"203.84.199.31"} ++; # translation $CFG{"badDomain"}->{"209.85.135.104"} ++; # translation $CFG{"badDomain"}->{"81.21.76.62"} ++; # translation $CFG{"badDomain"}->{"209.85.141.104"} ++; # translation $CFG{"badDomain"}->{"66.102.9.104"} ++; # translation $CFG{"badDomain"}->{"209.85.171.104"} ++; # translation $CFG{"badDomain"}->{"209.85.173.104"} ++; # translation $CFG{"badDomain"}->{"209.85.171.104"} ++; # translation $CFG{"badDomain"}->{"209.85.175.104"} ++; # cache $CFG{"badDomain"}->{"209.85.215.104"} ++; # cache $CFG{"badDomain"}->{"de.babelfish.yahoo.com"} ++; # translation $CFG{"badDomain"}->{"tw.info.search.yahoo.com"} ++; # translation $CFG{"badDomain"}->{"images.google.com"} ++; $CFG{"badDomain"}->{"images.google.com.vn"} ++; $CFG{"badDomain"}->{"images.google.com.eg"} ++; $CFG{"badDomain"}->{"images.google.si"} ++; $CFG{"badDomain"}->{"images.google.is"} ++; $CFG{"badDomain"}->{"images.google.hu"} ++; $CFG{"badDomain"}->{"images.google.it"} ++; $CFG{"badDomain"}->{"images.google.lt"} ++; $CFG{"badDomain"}->{"images.google.hr"} ++; $CFG{"badDomain"}->{"images.google.ba"} ++; $CFG{"badDomain"}->{"images.google.cz"} ++; $CFG{"badDomain"}->{"images.google.com.pk"} ++; $CFG{"badDomain"}->{"images.google.com.hk"} ++; $CFG{"badDomain"}->{"images.google.co.ma"} ++; $CFG{"badDomain"}->{"images.google.co.th"} ++; $CFG{"badDomain"}->{"images.google.co.id"} ++; $CFG{"badDomain"}->{"images.google.co.il"} ++; $CFG{"badDomain"}->{"images.google.com.bh"} ++; $CFG{"badDomain"}->{"images.google.com.sa"} ++; $CFG{"badDomain"}->{"images.google.com.sv"} ++; $CFG{"badDomain"}->{"images.google.ae"} ++; $CFG{"badDomain"}->{"images.google.bg"} ++; $CFG{"badDomain"}->{"images.google.lv"} ++; $CFG{"badDomain"}->{"images.google.gr"} ++; $CFG{"badDomain"}->{"images.google.co.ve"} ++; $CFG{"badDomain"}->{"images.google.co.sk"} ++; $CFG{"badDomain"}->{"images.google.sk"} ++; $CFG{"badDomain"}->{"images.google.co.cl"} ++; $CFG{"badDomain"}->{"images.google.co.ma"} ++; $CFG{"badDomain"}->{"images.google.com.bh"} ++; $CFG{"badDomain"}->{"images.google.com.ec"} ++; $CFG{"badDomain"}->{"images.google.co.in"} ++; $CFG{"badDomain"}->{"images.google.cl"} ++; $CFG{"badDomain"}->{"images.google.co.ec"} ++; $CFG{"badDomain"}->{"images.google.com.co"} ++; $CFG{"badDomain"}->{"images.google.com.ar"} ++; $CFG{"badDomain"}->{"images.google.pt"} ++; $CFG{"badDomain"}->{"images.google.ro"} ++; $CFG{"badDomain"}->{"images.google.ru"} ++; $CFG{"badDomain"}->{"images.google.co.jp"} ++; $CFG{"badDomain"}->{"images.google.co.sk"} ++; $CFG{"badDomain"}->{"images.google.co.nz"} ++; $CFG{"badDomain"}->{"images.google.com.mt"} ++; $CFG{"badDomain"}->{"images.google.com.my"} ++; $CFG{"badDomain"}->{"images.google.es"} ++; $CFG{"badDomain"}->{"images.google.pl"} ++; $CFG{"badDomain"}->{"images.google.co.uk"} ++; $CFG{"badDomain"}->{"images.google.fr"} ++; $CFG{"badDomain"}->{"images.google.be"} ++; $CFG{"badDomain"}->{"images.google.com.tw"} ++; $CFG{"badDomain"}->{"images.google.com.br"} ++; $CFG{"badDomain"}->{"images.google.com.gr"} ++; $CFG{"badDomain"}->{"images.google.at"} ++; $CFG{"badDomain"}->{"images.google.it"} ++; $CFG{"badDomain"}->{"images.google.ca"} ++; $CFG{"badDomain"}->{"images.google.ch"} ++; $CFG{"badDomain"}->{"images.google.no"} ++; $CFG{"badDomain"}->{"images.google.com.sg"} ++; $CFG{"badDomain"}->{"images.google.com.in"} ++; $CFG{"badDomain"}->{"images.google.se"} ++; $CFG{"badDomain"}->{"images.google.com.mx"} ++; $CFG{"badDomain"}->{"images.google.com.ph"} ++; $CFG{"badDomain"}->{"images.google.fr"} ++; $CFG{"badDomain"}->{"images.google.de"} ++; $CFG{"badDomain"}->{"images.wwwgoogle.de"} ++; $CFG{"badDomain"}->{"images.google.dk"} ++; $CFG{"badDomain"}->{"images.google.ie"} ++; $CFG{"badDomain"}->{"images.google.fi"} ++; $CFG{"badDomain"}->{"images.google.com.au"} ++; $CFG{"badDomain"}->{"images.google.com.uy"} ++; $CFG{"badDomain"}->{"images.google.co.hu"} ++; $CFG{"badDomain"}->{"images.google.co.za"} ++; $CFG{"badDomain"}->{"www.orgww.clarkeology.com"} ++; $CFG{"badDomain"}->{"www.virtual-mall.orgww.clarkeology.com"} ++; # $CFG{"altBaseDomains"} ||= [ $CFG{"baseDomain"}, "www.clarkeology.com", "www.folkestonegerald.com", "www.elstowlodge.com", "www.themarbleplasteringcompany.com", "www.popex.com" ]; $CFG{"altBaseDomains"} ||= [ $CFG{"baseDomain"} ]; $CFG{"refererCodeCredit"} ||= p( # "Original script by " . # a( { href => "http://www.popdex.com/google.php" }, "Popdex.com" ) . # ", " . a( { href => "http://www.clarkeology.com/refererCode" }, "Pauly's source here somewhere" ) . "." ); # alarm( 60 ); use strict; use vars qw( %CFG %count ); # Google Search Request Script - originally by Shanti Braford # ------------------------------------------------ # This script parses your Apache logs to determine Google search # requests used to access your site. # # LICENSE # # This script is free for COMMERCIAL or NON-COMMERCIAL use, # so long as credit is given in the form of a link back # to Popdex.com. Feel free to post, distribute, or modify this code, # so long as this LICENSE is included in your # re-distribution. # I will tidy this up sometime soon so it works without needing # my init.pl script up the top there! # The original at http://www.popdex.com works just fine, but just for google. $CFG{"fakeRefererMin"} ||= 5; $CFG{"displayAllMin"} ||= 1; $CFG{"displayAllMax"} ||= 100; $CFG{"displaySideBarMax"} ||= 10; $CFG{"displaySideBarMin"} ||= 1; $CFG{"minWordCount"} ||= 25; $CFG{"minQVarCount"} ||= 20; $CFG{"minWordPairCount"} ||= 1; $CFG{"minGoogleSpelling"} ||= 1; $CFG{"maxGoogleSpelling"} ||= 10; $CFG{"refererMin"} ||= 1; $CFG{"latestQueriesCount"} ||= 20; $CFG{"offsiteListMin"} ||= 10; $CFG{"offsiteListMax"} ||= 100; my $engine = engine(); my %chart; my %qVarCount; my %offsite; my %offsitePage; my %nonSearchDomainCount; my %pairCount; my %searchTerms; my %searchWords; my %googleSpelling; my @latestQueries = (); my $totalQueryCount = 0; my $previousQuery = 0; my $loggedTransfer = 0; my $dbh = dbh(); getHosts( $dbh ); # foreach my $file ( param( "file" )) { for ( my $i = 0; $i < scalar( @ARGV ); $i ++ ) { my $file = $ARGV[$i]; my $baseDomain; debug( "file is " . $file ); if ( $file =~ m!/(www\.[^/]+)! ) { $baseDomain = "http://" . $1; debug( "baseDomain is " . $baseDomain ); } open( ALOG, "<" . $file ) or warn "can't open " . $file . ": $!"; while ( ) { my $line = $_; # debug( $line ); my @line = split( / /, $line ); my $host = $line[0]; my $get = $line[6]; my $url = $line[10]; # Trim the " off either end $url =~ s/"$//; $url =~ s/^"//; # Trim the end / off so that # http://foo/1/2/3/ is the same as # http://foo/1/2/3 $get =~ s/\/$//; # debug( "Checking " . $get . " and " . $url ); if ( $get =~ /\.js$/ ) { # debug( "Skipping js: " . $get ); } elsif ( $get =~ /\.css$/ ) { # debug( "Skipping css: " . $get ); } elsif ( $get =~ /\.gif$/ ) { # debug( "Skipping gif: " . $get ); } elsif ( $get =~ /\.jpg$/ ) { # debug( "Skipping jpg: " . $get ); } elsif ( $get =~ /\.png$/ ) { # debug( "Skipping png: " . $get ); } elsif ( $url =~ /http:\/\/(.*)/ ) { my ( $domain, $rest ) = getdomain_combined_with_www( formatlink( $1 )); if ( my $frame = $CFG{"framePrefix"} ) { if ( $get =~ /^$frame\/?(.*)/ ) { # debug( $get . " was like " . $frame ); $offsite{$1}{$url} ++; } } if ( $rest eq "/" or not $rest ) { # debug( "some sort of homepage link" ); } elsif ( $CFG{"badDomain"}->{$domain} ) { # debug( "skipping domain " . $domain ); } elsif ( $line =~ /shemale/i or $line =~ /rape/i or $line =~ /penis/i or $line =~ /porn/i or $line =~ /nude/i or $line =~ /masturbation/i or $line =~ /naked/i or $line =~ /titmus/i or $line =~ /titmas/i or $line =~ /topless/i or $line =~ /milf/i or $line =~ /lust/i or $line =~ /incest/i or $line =~ /intitle/i ) { # bit draconian! } elsif ( $CFG{"skipUnresolvedIPs"} and $domain !~ /[a-z]/i ) { # debug( "skipping domain " . $domain . "; numeric only" ); } else { $loggedTransfer += $line[9]; $CFG{"earliestDate"} ||= $line[3]; $CFG{"latestDate"} = $line[3]; my $matched = 0; if ( my $result = filter( $domain, $rest )) { my $engineName = $engine->[$result]->[0]; my @qVar; if ( ref( $engine->[$result]->[1] ) eq "ARRAY" ) { # debug( "More than one poss qVar for " . $domain ); @qVar = @{ $engine->[$result]->[1] }; } else { @qVar = ( $engine->[$result]->[1] ); } foreach my $qVar ( @qVar ) { # debug( $engineName . " => " . $qVar ); # debug( $rest ); my $query; # $qVar =~ s/[^\w]$/g; if ( $qVar =~ /[^\w]$/ ) { # debug( $domain . " - got a non 'q=foo' type match, looking for " . $qVar ); if ( $rest =~ m/$qVar([^\/&]+)/i ) { $query = $1; } # else { debug( "Didn't find " . $qVar . " in " . $domain . $rest ); # } } # elsif ( $rest =~ m/\W$qVar=([^&\/]+)[&]*/i ) { elsif ( $rest =~ m/\W$qVar=([^&\/]+)/i ) { $query = $1; } # elsif ( $rest =~ m/\w:[^+]*([^&]*)/i ) { # debug( "Got " . $1 . " out of " . $domain . $rest ); # $query = $1; # } if ( $query ) { $get =~ s/%20/+/g; $get =~ s/cgi-bin\/board.cgi\///g; $qVarCount{$qVar} ++; # $query =~ s/"//g; $query =~ s/%\d\w/ /ig; $query =~ s/[\W\:\/\-\.]/ /ig; # $query =~ s/%2b/+/ig; # $query =~ s/%2c/"/ig; # quotes? # $query =~ s/%20/ /ig; # $query =~ s/%22//ig; # $query =~ s/%27//ig; # $query =~ s/|//ig; $query =~ s/\s+/ /ig; # Trim spaces off the ends $query =~ s/^\s+//ig; $query =~ s/\s+$//ig; next if $query !~ /\w/; # Now sort out the case # $query = ucfirst( lc( $query )); $query = lc( $query ); if ( $previousQuery ne $query ) { my $slot = $totalQueryCount ++ % $CFG{"latestQueriesCount"}; # debug( $slot . " = " . $totalQueryCount . " % " . $CFG{"latestQueriesCount"} ); $latestQueries[ $slot ] = [ $query, $get ]; # debug( "\$latestQueries[ " . $slot . " ] = [ " . $query . ", " . $get . " ]" ); $previousQuery = $query; } if ( $domain =~ /google/ and $rest =~ /spell\=1$/ ) { $googleSpelling{$query}{$get} ++; # debug( "spelling? " . $rest ); } if ( $baseDomain ) { $chart{"baseDomain"}{$get}{$query} ||= $baseDomain; debug( "\$chart{baseDomain}{" . $get . "}{" . $query . "} ||= " . $baseDomain ); } $searchTerms{$query}{$get} ++; my @words = (); foreach my $word ( split( /\W+/, $query )) { if ( $CFG{"tooCommonWords"} !~ /$word/i ) { push( @words, $word ); } } foreach my $word ( @words ) { $searchWords{$word}{$get}{$query}{$get} ++; foreach my $word2 ( @words ) { if ( $word ne $word2 ) { $pairCount{$word}{$word2}{$query}{$get} ++; } } } $matched ++; last; } # else { debug( $qVar . " is blank? " . $rest ); # } } # if ( not $matched # and $rest !~ m/encquery=/i ) { # debug( $domain . " - " . join( " AND ", @qVar ) . " missing from " . $rest ); # } } if ( not $matched ) { if ( ! $CFG{"goodDomain"}->{$domain} and ! $CFG{"badDomain"}->{$domain} ) { debug( "Checking " . $url . " for a real mention of " . $CFG{"baseDomain"} ); my $ua = new LWP::UserAgent( "timeout" => 3 ); $ua->agent( "bot\@" . $CFG{"baseDomain"} . " - http://www.clarkeology.com/refererCode - " . $ua->agent ); my $req = new HTTP::Request( "GET" => $url ); # $req->referrer( "http://www.clarkeology.com/refererCode" ); # don't do this! my $res = $ua->request( $req ); if ( $res->is_success ) { my $bad = 1; foreach my $baseDomain ( @{ $CFG{"altBaseDomains"} } ) { if ( $res->content =~ /$baseDomain/i ) { debug( "It's really there!" ); $bad = 0; debug( stripTagsFrom( $res->content )); last; } } if ( my $sth = doSQL( $dbh, "insert domain set domain = ?, host = ?, dateAdded = ?, bad = ?", $domain, $host, time, $bad )) { $sth->finish; if ( $bad ) { debug( "BAD: " . $line ); $CFG{"badDomain"}->{$domain} ++; } else { $CFG{"goodDomain"}->{$domain} ++; } } } else { debug( $url . " failed" ); # Temporarily call it a bad domain $CFG{"badDomain"}->{$domain} ++; } } if ( $CFG{"goodDomain"}->{$domain} ) { $nonSearchDomainCount{$domain} ++; $chart{"referer"}{$domain}{$rest} ++; debug( "\$referer{" . $domain . "}{" . $rest . "} = " . $chart{"referer"}{$domain}{$rest} ); } else { debug( $domain . " is a BAD" ); } } } } } close( ALOG ) or warn "can't close " . $file . ": $!"; } $CFG{"earliestDate"} =~ s/[^\w\d:\/]//g; $CFG{"latestDate"} =~ s/[^\w\d:\/]//g; debug( "Earliest date is " . $CFG{"earliestDate"} ); local *OUT; if ( open( OUT, ">" . templateName( "offsiteList" ))) { print OUT reportList( { reportHash => \%offsite, title => message( "offsiteList" ), min => $CFG{"offsiteListMin"}, max => $CFG{"offsiteListMax"} } ); close( OUT ); } if ( open( OUT, ">" . templateName( "searchList" ))) { print OUT reportList( { reportHash => \%searchTerms, title => message( "searchTerms" ), min => $CFG{"displayAllMin"}, max => $CFG{"displayAllMax"}, credit => 1 } ); close( OUT ); } if ( open( OUT, ">" . templateName( "searchWordList" ))) { print OUT reportList( { reportHash => \%searchWords, title => message( "searchWords" ), min => $CFG{"minWordCount"}, max => $CFG{"displayAllMax"} } ); close( OUT ); } if ( open( OUT, ">" . templateName( "referer" ))) { print OUT reportList( { reportHash => $chart{"referer"}, title => message( "referer" ), urlPrefix => "http://", min => $CFG{"minRefererCount"}, max => $CFG{"displayAllMax"}, credit => 1 } ); close( OUT ); } if ( open( OUT, ">" . templateName( "sideBarSearchList" ))) { print OUT reportList( { reportHash => \%searchTerms, title => message( "searchTerms" ), min => $CFG{"displaySideBarMin"}, max => $CFG{"displaySideBarMax"}, noRecurse => 1 } ); close( OUT ); } if ( open( OUT, ">" . templateName( "latestQueriesSideBar" ))) { my @niceLatest; for ( my $i = 0; $i < scalar( @latestQueries ); $i ++ ) { push( @niceLatest, a( { href => reassemble( $latestQueries[$i]->[1], $latestQueries[$i]->[0] ), title => "Most recent queries, #" . ( $i + 1 ) . "
(What people were searching for when they landed here)" }, neatLabel( $latestQueries[$i]->[0] ))); } print OUT ol( li( \@niceLatest )) if scalar( @niceLatest ); close( OUT ); } if ( open( OUT, ">" . templateName( "googleSpelling" ))) { print OUT reportList( { title => message( "googleSpelling" ), reportHash => \%googleSpelling, min => 1, max => 20 } ); close( OUT ); } if ( open( OUT, ">" . templateName( "wordPairList" ))) { print OUT reportList( { reportHash => \%pairCount, title => message( "wordPairList" ), min => $CFG{"minWordPairCount"} } ); close( OUT ); } if ( open( OUT, ">" . templateName( "qVarCount" ))) { print OUT reportList( { reportHash => \%qVarCount, title => message( "qVarCount" ), min => $CFG{"minQVarCount"} } ); close( OUT ); } # printf( "Logged %d byte, or %d k, or %d mb, or %d gb ", $loggedTransfer, $loggedTransfer / 1024, $loggedTransfer / ( 1024 * 1024 ), $loogedTransfer / ( 1024 * 1024 * 1024 )); printf( "Logged %d mb from %s to %s, ", $loggedTransfer / ( 1024 * 1024 ), $CFG{"earliestDate"}, $CFG{"latestDate"} ); $dbh->disconnect; # if $dbh; printf( "Took %d mins and %d secs to run.\n", ( time - $startTime ) / 60, ( time - $startTime ) % 60, ); sub getHosts { my $dbh = shift; # Don't want to report on my own domains here... foreach my $baseDomain ( @{ $CFG{"altBaseDomains"} } ) { $CFG{"badDomain"}->{$baseDomain} ++; } if ( my $sth = doSQL( $dbh, "select domain, dateAdded, bad, count(*) as c from domain group by domain, bad order by dateAdded desc" )) { while ( my $ref = $sth->fetchrow_hashref( )) { if ( $ref->{"bad"} ) { if ( $ref->{"c"} > $CFG{"fakeRefererMin"} ) { $CFG{"badDomain"} ||= {}; $CFG{"badDomain"}->{$ref->{"domain"}} ++; # debug( "\$CFG{\"badDomain\"}->{" . $ref->{"domain"} . "} = " . $CFG{"badDomain"}->{$ref->{"domain"}} ); } # else { debug( "Only " . $ref->{"c"} . " times for " . $ref->{"domain"} ); # } } else { $CFG{"goodDomain"} ||= {}; $CFG{"goodDomain"}->{$ref->{"domain"}} ++; # debug( "\$CFG{\"goodDomain\"}->{" . $ref->{"domain"} . "} = " . $CFG{"goodDomain"}->{$ref->{"domain"}} ); } } $sth->finish; } } sub reportList { # debug( "reportList()" ); my $returnString = ""; my $param = shift; my $reportHash = $param->{"reportHash"}; if ( scalar( keys( %{ $reportHash } ))) { # debug( scalar( keys( %{ $reportHash } )) . " elements in " . strong( $param->{"title"} || "hash" )); my @sortedResults = (); $param->{"min"} ||= 1; $param->{"max"} ||= 10; # $param->{"min"} = 2; # $param->{"max"} = 10; foreach my $query ( sort { count( $reportHash->{$b} ) <=> count( $reportHash->{$a} ) } keys( %{ $reportHash } )) { # debug( $query . " -> " . $reportHash->{$query} ); # $query =~ s/\&\;/\&/g; # $query =~ s/\&/\&\;/g; my $count = count( $reportHash->{$query} ); if ( $count < $param->{"min"} ) { # debug( $count . " too low" ); next; } my @listElement = (); push( @listElement, myLink( { query => $query, urlPrefix => $param->{"urlPrefix"}, label => $param->{"label"} } )); my $newPrefix = $param->{"urlPrefix"} . $query if isURL( $param->{"urlPrefix"} ); push( @listElement, countLink( $count )); if ( ref( $reportHash->{$query} ) eq "HASH" ) { if ( not $param->{"noRecurse"} ) { # debug( "" ); } } push( @sortedResults, join( "\n", @listElement )); if ( scalar( @sortedResults ) >= $param->{"max"} ) { # debug( scalar( @sortedResults ) . " too high" ); last; } } $returnString .= ol( { title => $param->{"title"} || "title" }, li( \@sortedResults )) if scalar( @sortedResults ); $returnString .= $CFG{"refererCodeCredit"} if $param->{"credit"}; } # else { debug( "empty hash passed in" ); # } $returnString =~ s/%20/+/g; return $returnString; } sub isURL { my $string = shift; if ( $string =~ /^\w+:\/\// # http://foo.bar.etc # or not $string # homepage or $string =~ /^\// ) { # /foobar.html return 1; } } sub myLink { my $param = shift; # debug( "myLink( href => " . $param->{"urlPrefix"} . $param->{"query"} . ", label => " . $param->{"label"} . " )" ); my $returnString; # $param->{"query"} ||= "/"; # if ( $param->{"query"} ) { if ( isURL( $param->{"urlPrefix"} . $param->{"query"} )) { $returnString = a( { rel => "nofollow", href => reassemble( $param->{"urlPrefix"} . $param->{"query"}, $param->{"label"} ) }, neatLabel( $param->{"label"} || $param->{"query"} )); # debug( "'" . $param->{"query"} . "' is a url" ); } elsif ( securityCleared( { function => "table=message&mode=find" } )) { $returnString = a( { href => $CFG{"baseDomain"} . "/" . lc( myUriEscape( $param->{"query"} )) }, $param->{"query"} ); } else { $returnString = $param->{"query"} || $param->{"label"}; # debug( "not security cleared" ); # debug( "Board script is " . $CFG{"boardScript"} ); # debug( "functionSecurityLevel findMessage is " . $CFG{"functionSecurityLevel"}->{"findMessage"} ); # debug( "functionSecurityLevel table=message&mode=find is " . $CFG{"functionSecurityLevel"}->{"table=message&mode=find"} ); } # } # else { debug( "query is blank!" ); # } return $returnString; } sub count { my $count = shift; my $actualCount = 0; # return ref( $count ) eq "HASH" ? scalar( keys( %{ $count } )) : $count; if ( ref( $count ) eq "HASH" ) { if ( $actualCount = $count{$count} ) { # debug( "count of " . $count . " cached: " . $count{$count} ); # can we use a hash ref as a hash key? } else { foreach my $key ( keys( %{ $count } )) { $actualCount += count( $count->{$key} ); } $count{$count} = $actualCount; } } elsif ( ref( $count ) ) { # debug( "count is a " . ref( $count ) . "???" ); } else { $actualCount = $count; } return $actualCount; } sub countLink { my $count = shift; $count = count( $count ); my $returnString; if ( $CFG{"searchLink"} ) { # $returnString = a( { href => $CFG{"baseDomain"} . $CFG{"searchLink"}, title => "From " . $CFG{"earliestDate"} . " to " . $CFG{"latestDate"} }, "[" . $count . "]" ); $returnString = a( { href => $CFG{"baseDomain"} . $CFG{"searchLink"} }, "[" . $count . "]" ); } else { $returnString = $count; } return $returnString; } sub neatLabel { my $label = shift; # debug( "neatLabel( " . $label . " )" ); $label ||= "[" . message( "homepage" ) . "]"; my $len = $CFG{"maxLabelLength"} || 100; my $length = length( $label ); if ( $length > $len ) { my $half = $len / 2; $label = substr( $label, 0, $half ) . " ... " . substr( $label, $len - $half, $len ); } return $label; } # sub makeGoogleLink { # my $query = shift; # my $plusCodedQuery = $query; # $plusCodedQuery =~ s/ /\+/ig; # my $googlelink = "http://www.google.com/search?q=" . $plusCodedQuery; # return a( { rel => "nofollow", href => $googlelink, title => "google " . $query }, "g" ); # } sub filter { # Returns search engine number # if it can find a search engine reference my $domain = shift; my $rest = shift; my $match = 0; if ( $chart{"referer"}{$domain}{$rest} ) { # debug( "Already seen " . $domain . $rest . ", it's not an engine" ); } else { for ( my $i = 1; $i < scalar( @{ $engine } ); $i ++ ) { my $engineName = $engine->[$i]->[0]; # debug( "Checking " . $domain . $rest . " for " . $engineName ); if ( $domain =~ /$engineName/i ) { $match = $i; # debug( "Got " . $engineName . " (" . $match . ")" ); return $match; } } } return $match; } sub reassemble { my $url = shift; my $query = shift; debug( "reassemble " . $CFG{"baseDir"} . $url ); my $baseDomain = $chart{"baseDomain"}{$url}{$query} || $CFG{"baseDomain"}; debug( "baseDomain is " . $baseDomain ); if ( -d $CFG{"baseDir"} . $url ) { $url = $baseDomain . $url . "/"; } if ( $url =~ /^\// ) { $url = $baseDomain . $url; } debug( $url ); return $url; } sub formatlink { my $inlink = $_[0]; if ( $inlink =~ /\// ) { return $inlink; } else { return $inlink . "/"; } return; } sub getdomain_combined_with_www { # returns lowercase domain + /path/to/doc.html or just domain + / # blogspot.com/ returns www.blogspot.com/ my $preurl = shift; my $domain; my $rest; if ( $preurl =~ /\// ) { $domain = $`; $rest = "/" . $'; my @rootsize = split(/\./, $domain); my $size = @rootsize; if ( $size == 2 ) { $domain = "www." . $domain; } } else { $domain = "$preurl"; my @rootsize = split( /\./, $domain ); if ( scalar( @rootsize ) == 2 ) { $domain = "www." . $domain; } } $rest =~ s/\#.*//; return( lc( $domain ), $rest ); } sub engine { # An array of references, each of those refs is to an array of two elements, # the first is a string that will match the domain of the search engine, # and the second can be one of two things; either a string that is the variable # name that identifies the search term, like this: # # [ "bbc.co.uk", "q" ], # # would get "thing" out of # # http://www.bbc.co.uk/search?q=thing&foo=bar # http://news.bbc.co.uk/search?q=thing&more=etc # # If the search query doesn't contain an equal, it's usually marked # with a forward slash, so for the variable give the marker, ending in /, # so that: # # [ "bbc.co.uk", "search/web/" ], # # would get "thing" out of # # http://www.bbc.co.uk/search/web/thing # # OR a reference to another array, containing all the possible variables that # might identify the search term: # # [ "google.", [ "q", "as_q", "as_epq", "web/" ] ], # # would get "thing" out of # # http://www.google.com/search?q=thing # http://www.google.co.uk/search?as_q=thing # http://www.google.de/search?as_epq=thing # http://www.google.es/search/web/thing # # First element must be blank, so a "non match" is zero... return [ [ "", "" ], [ "yahoo.", [ "p", "va", "vp", "K", "box" ] ], [ "google.", [ "q", "query", "as_q", "as_epq" ] ], [ "aol.", [ "q", "as_q", "query", "userQuery", "query_contain" ] ], [ "msn.", [ "q", "MT", "oq", "searchString" ] ], [ "cbbc.co", "qry" ], [ "bbc.co.uk", "q" ], [ "dogpile.", [ "images/", "web/", "qkw", "q_all", "q_phrase" ] ], # [ "chillisearch.", [ "a/", "b/", "c/", "d/", "e/", "f/", "g/", "h/", "i/", "j/", "k/", "l/", "m/", "n/", "o/", "p/", "q/", "r/", "s/", "t/", "u/", "v/", "w/", "x/", "y/", "z/" ]], # [ "meta4.uk.", [ "a/", "b/", "c/", "d/", "e/", "f/", "g/", "h/", "i/", "j/", "k/", "l/", "m/", "n/", "o/", "p/", "q/", "r/", "s/", "t/", "u/", "v/", "w/", "x/", "y/", "z/" ]], [ "chillisearch.", "/" ], [ "jiffysearch.", "qry" ], [ "meta4.uk.", "/" ], [ "search.blogger.com", [ "q", "as_q" ]], [ "wowway.com", "q" ], [ "overture.btopenworld", "query" ], [ "rapidsearch.co.uk", "qry" ], [ "fundle.co.uk", "qry" ], [ "ilsearch.com", "search" ], [ "snsrch.net", "keywords" ], [ "isearch.", "Terms" ], [ "searchreslt.", "ST" ], [ "publiweb.it.", "a/" ], [ "fundle.", "fun/" ], [ "goinginto.", [ "web/", "keywords" ]], [ "search.oxide.com", [ "web/" ]], [ "onspeedsearch.", "keyword" ], [ "2020search.com", [ "st", "Keywords" ]], [ "blinksearch.com", "keywords" ], [ "marsfind.com", "Keywords" ], [ "theadultchannel.co.uk", "Keywords" ], [ "popularsearch.net", "keywords" ], [ "3721.com", [ "p", "name" ]], [ "blowsearch.com", [ "qkw", "keywords" ]], [ "find-and-buy.co.uk", "search" ], [ "infospace.", [ "images/", "web/", "qkw" ]], [ "bossknowsbest.", [ "/" ]], [ "lycos.", [ "query", "QW" ] ], [ "sympatico.", "query" ], [ "robolink.", "query" ], [ "search.virgilio.it", "qs" ], [ "todaysyellowpages", "query" ], [ "b2byellowpages", "textfield" ], [ "search.naver.com", "query" ], [ "s.teoma.com", "q" ], [ "newmusiccentral.", "/" ], [ "searchforit.", "keywords" ], [ "search.iwon.com", "searchfor" ], [ "myway.", "searchfor" ], [ "sapo.pt", [ "search", "q" ] ], [ "search.earthlink.net", "q" ], [ "askjack.", [ "keywords", "qry" ] ], [ "search.f2.com.au", "q" ], [ "wanadoo.", [ "q", "Keywords" ]], [ "freeserve.", "q" ], [ "directhit.", [ "qry", "q" ] ], [ "ask.", [ "ask", "q", "qry" ] ], [ "askjeeves.", [ "ask", "q" ] ], [ "szukaj.wp.pl", [ "szukaj_and", "szukaj" ]], [ "szukaj.onet.pl", "qt" ], [ "souany.", "recherche" ], [ "jeez.co.uk", "result/" ], [ "kasaa.com", "term" ], [ "quasimondo.", "q" ], [ "fush.com", "t" ], [ "\.aj.", "ask" ], [ "altavista.", [ "q", "aqp", "aqa", "aqb" ] ], [ "alta-vista.", "q" ], [ "altavisa.com", "q" ], [ "altaista.", "q" ], [ "smartfroggy.", "ss" ], [ "mysearchnet.org", [ "s", "Keywords" ]], [ "urlportfolio.", "keyword" ], [ "webtv.net", "q" ], [ "lukol.com", [ "st", "p" ]], [ "breathe.com", "keyword" ], [ "lyrc.com.ar", "songname" ], [ "cyberbritain.co.uk", "Keywords" ], [ "bonzi.com", "keywords" ], [ "ntlworld", "q" ], [ "virginmedia", "q" ], [ "fullsearch", "/" ], [ "technorati.", [ "tag", "url" ]], [ "go2net.", [ "general", "query" ] ], [ "goeureka.", [ "key", "terms" ]], [ "metaeureka.", "terms" ], [ "eurekster.", "w" ], # [ "megaspider.", "?" ], [ "megasearch.", "s" ], [ "hotbot.", [ "MT", "query" ] ], [ "allthesites", "query" ], [ "instafinder.", [ "qry", "Keywords" ] ], [ "sitefinder.verisign.com", [ "sb", "kw" ] ], [ "netscape.", [ "s", "query", "q", "userQuery" ] ], [ "looksmart.", [ "key", "qt" ]], [ "fitseek.com", "keywords" ], [ "seek2.", "seek" ], [ "usseek.", "string" ], [ "infoseek.", "qt" ], [ "perfectnav.", "Keywords" ], [ "collegeclub.", "Keywords" ], [ "greatsearch.info", [ "Keywords", "s" ] ], [ "sapivi.", "uk/" ], [ "mytelus.com", "q" ], [ "elevonsearch", [ "s", "Keywords" ]], [ "1truesearch.com", "qry" ], [ "blowsearch.com", "qkw" ], [ "boldsearch.com", "keywords" ], [ "universalsearcher.", "w" ], [ "uksearcher.co.uk", "qry" ], [ "uksearch.com", "qry" ], [ "7search", "qu" ], [ "uk20.co.uk", "q" ], [ "britishwebsearch.com", [ "keyword", "Keywords" ]], [ "websearch.cs.com", "query" ], [ "mywebsearch.com", "searchfor" ], [ "websearch.com", [ "web/", "qkw", "/" ]], [ "mysearch.com", "searchfor" ], [ "myquicksearch.com", "searchfor" ], [ "quickbrowsersearch.com", "Keywords" ], [ "mywebsearch.com", "searchfor" ], [ "sirsearch.com", "Keywords" ], [ "wolsearch.com", "searchword" ], [ ".cometsystems.com", "qry" ], [ "giantexplorer.com", "Keywords" ], [ "search2525.com", "keywords" ], [ "real.com", "query" ], [ "howstuffworks.com", "terms" ], [ "findipod.com", "qry_str" ], [ "veoda.com", "qry_str" ], [ "metaspinner.com", "qry_str" ], [ "search123.com", "QUERY" ], [ "sohu.com", "search" ], [ "turbo10.com", [ "s", "q" ] ], [ "search.iol.ie", "q" ], [ "/search/cache", "p" ], [ ".mweb.co.za", "q" ], [ ".web.de", "su" ], [ "toile.com", "q" ], [ "joeshapr.com", "keywords" ], [ "mirago.co.uk", [ "qry", "txtSearch" ] ], [ "canoe.ca", "q" ], [ "shaw.ca", "q" ], [ "ivillage.com", "q" ], [ "myvillage.com", "searchparams" ], [ "searchy.co.uk", "search_term" ], [ "SearchServlet", "QRY" ], [ "search.peoplepc.com", "q" ], [ "icq.com", "q" ], [ "icqit.com", "q" ], [ "search.kvasir.no", "q" ], [ "searchuk.", "search" ], [ "a-z-search-directory.", "qry" ], [ "search.redbox.cz", "search_string" ], [ "ilse.nl", "search_for" ], [ "skeech.", "query" ], [ "zoekinalles.nl", "keywords" ], [ "vinden.nl", "query" ], [ "zoek.vinden.nl", "qry" ], [ "zworks.com", [ "what", "what_2" ] ], [ "uzoekt.nl", "searchstring" ], [ "trouvez.com", "query" ], [ "seznam.cz", [ "q", "w" ]], [ "freshget.", "qry" ], [ "fresheye.com", "kw" ], [ "about.com", "terms" ], [ "goto.", "keywords" ], [ "snap.", "keyword" ], [ "club-internet.fr", "q" ], [ "bushinternet.", "keywords" ], [ "surfy.co", "search" ], [ "alltheinternet.", [ "query", "q" ] ], [ "alltheweb.", [ "query", "q", "b_query", "_b_query" ] ], [ "northernlight.", "qr" ], [ "dmoz.", "search" ], [ "newhoo.", "search" ], [ "avantfind.", "Keywords" ], [ "netfind.", "query" ], [ "find.uk.net", "qry" ], [ "search.biglobe.", "q" ], [ "sexinside.net", "q" ], [ "imdb.com", "q" ], [ "libero.it", "query" ], [ "bbsearch.", "q" ], [ "oingo.", [ "s", "Keywords" ] ], [ "goodsearch.info", [ "s", "Keywords" ] ], [ "optusnet.", [ "key", "p" ]], [ "exactresult.com", "keywords" ], [ "quicksearches.net", [ "Keywords", "s" ] ], [ "searches.com", "keywords"], [ "searchanything.co.uk", "Keywords" ], [ "zuvio.com", "keyword" ], [ "overture.com", "Keywords" ], [ "worldnet.att", [ "Keywords", "qry" ] ], [ "artistdirect.com", "artist" ], [ "seznam.cs", "w" ], [ "ieplugin.com", "q" ], [ "azlyrics.com", "q" ], [ "search66.com", "q" ], [ "amazon.co.uk", "search" ], [ "amazon.com", [ "keyword", "websearch.field-keywords" ]], [ "att.net", [ "qry", "websearch.field-keywords" ] ], [ "afterhourszone.co.uk", "q" ], [ "babieca.com", "keywords" ], [ "comcast.net", [ "query", "q" ]], [ "euuu.com", "query" ], [ "find-uk.com", [ "qry_str", "query" ]], [ "ixquick.com", "query" ], [ "steadysearch.com", "w" ], [ "mamma.com", [ "qw", "query" ]], [ "searchalot.com", [ "q", "search" ] ], [ "goobig.com", "keywords" ], [ "searchandclick.com", "qkw" ], [ "bonweb.com", "search/" ], [ "verizon.net", [ "qkw", "web/" ]], [ "netster.com", "KeyWords" ], [ "go.mail.ru", [ "words", "q" ] ], [ "pesenki.ru", "a" ], [ "fastsearch.", "query" ], [ "reliableresults.", "Keywords" ], [ "findallresults.net", [ "s", "Keywords" ]], [ "fastresults.org", "Keywords" ], [ "concert-tickets-over-the-net", "str" ], [ "suomi24.fi", "q" ], [ "killerinfo.", "query" ], [ "info.com", [ "qkw", "web/" ]], [ "portalgates.com", "mt" ], [ "shopnav.com", "s" ], [ "super1000.", "k" ], [ "startnow.", "q" ], [ "warez.com", "q" ], [ "starware.", "qry" ], [ "start.no", "q" ], [ "startium.com", "qkw" ], [ "istarthere.com", "str" ], [ "alexa.com", "q" ], [ "lexxe.com", "sstring" ], [ "justoneclick.co.uk", "qry" ], [ "findtarget.com", "q" ], [ "gmtv.co.uk", "keyword" ], [ "gm.tv", "keyword" ], [ "goliat.hu", "KERESES" ], [ "munky.com", "term" ], [ "ic24.", "keyword" ], [ "music-e.net", "search" ], [ "buscamp3.com.br", "SearchString" ], [ "buscar.ya.com", "item" ], [ "buscador.terra.es", "query" ], [ "buscador.terra.com", [ "qt", "query" ]], [ "buscador.ozu.es", "q" ], [ ".mulitbuscador.com", "keywords" ], [ "downloads.", "qry" ], [ "busqueda.americaonline.com.mx", "query" ], [ "bellsouth.net", "string" ], [ "chello.", "srchText" ], [ "pesquisa.clix.pt", "question" ], [ "uol.com.", "q" ], [ "ultimateguitartabs.com", "term" ], [ "anywho.com", "qry" ], [ "metabot.ru", "st" ], [ "searchscout.com", "k" ], [ "rediff.com", "MT" ], [ "origo.hu", "q" ], [ "webferret.hu", "q" ], [ "cnn.com", [ "q", "query" ] ], [ "adelphiapowerpage", "q" ], [ "aquanuke.com", [ "uk-", "keyword" ]], [ "blueyonder", "q" ], [ "eniro.", "q" ], [ "bluewin.ch", "qry" ], [ "fanzine.co.uk", "keyword" ], [ "indianmusiconline", "term" ], [ "findology.com", "q" ], [ "gobango.com", "keyword" ], [ "virtualsearches.net", [ "s", "Keywords" ]], [ "vivisimo.", "query" ], [ "logicjungle.", "find-" ], [ "search.new.net", [ "Keywords", "s" ]], [ "browserdirect.net", "Keywords" ], [ "searchfact.net", "Keywords" ], [ "cybersearching.", [ "s", "Keywords" ]], [ "picsearch.se", "q" ], [ "compuserve.de", "q" ], [ "itv.com", "Keywords" ], [ "kataweb.it", "q" ], [ "netmadeira.com", "q" ], [ "eircom.net", [ "q", "as_epq" ]], [ "ursearch.com", "keywords" ], [ "steadysearch", "w" ], [ "qksearch.com", "query" ], [ "almightysearch.", "Keywords" ], [ "trustedsearch.", "w" ], [ "search.com", [ "wf,", "q", "qt", "search/1/" ]], [ "searchy.com", "search_term" ], [ "attbi.com", "query" ], [ "redbox.cz", "qs" ], [ "mp3ringtones.com", "k" ], [ "aquimp3.com", "v" ], [ "wwwnation.com", "q" ], [ "aallix.com", "page" ], [ "esearchandfind.org", "Keywords" ], [ "zee.co.uk", "query" ], [ "resultsondemand.net", "Keywords" ], [ "starpulse.com", "Music/" ], [ "adsrve.com", "s" ], [ "abcweba.com", "i" ], [ "oceanfree.net", "q" ], [ "www.fi", "qt" ], [ "espotting.com", "keyword" ], [ "spinner.pl", "qry" ], [ "ubbi.com", [ "q", "Busqueda" ]], [ "updated.com", [ "text", "search" ] ], [ "ukplus.co", "key" ], [ "walhello.info", "key" ], [ "viabusca.com", "query" ], [ "vonna.com", "k" ], [ "webfind.com", "query" ], [ "xquick.com", "params" ], [ "your.com", "Keywords" ], [ "yoursearchfinder.com", [ "s", "Keywords" ] ], [ "beegoo.co", "qry" ], [ "category.org", "q" ], [ "Music.php", "term" ], [ "gogole.", "q" ], [ "gogle.", "q" ], [ "foxnews.", [ "web//", "qkw" ]], [ "tiscali.", [ "query", "key" ]], [ "bol.com", "q" ], [ "text-chat.co.uk", "qry" ], [ "primusonlin.com", "q" ], [ "vitaminic.", "text" ], [ "4searchguide.", "search-" ], [ "pmc-search-services.", "keywords" ], [ "uknetguide.", [ "Keywords", "Search" ]], [ "mykindaplace.", "keyword" ], [ "searchspot.co.uk", "findproducts/" ], [ "peoplesound", [ "wkaname", "advanced_artist_name" ]], [ "the-business-directory.co.uk.", "qry" ], [ "search-o-matic.", "q" ], [ ".directory.co.uk", "/" ], [ "upspiral.co", [ "st", "search" ]], [ "top-uk.co.uk", "for" ], [ "aim4media.", "qry" ], [ "msxml.", "web/" ], [ "msxml.excite.com", [ "web/", "images/" ]], [ "excite.", [ "q", "search", "qkw", "s" ] ], [ "webcrawler.", [ "web/", "search", "qkw" ]], [ "lawcrawler.findlaw.", "entry" ], [ "feedster.com", "q" ], [ "fuzzycrawler.com", "text" ], [ "monstercrawler.com", "qry" ], [ "webcrawl.", "keywords" ], [ "metacrawler.", [ "web/", "qkw", "general", "q_all" ] ], [ "crawler.com", [ "q", "qkw" ]], [ "paley.com", "search/" ], [ "ilectric.com", [ "-/", "s" ]], [ "firstplace.com", "qkw" ], [ "search.goo.ne.jp", [ "MT", "AT" ]], [ "rogers.com", "q" ], [ "splut.", [ "pattern", "search/j/", "search/k/" ]], [ "new.net", "s" ], [ "free.fr", "q" ], [ "need2find.", "searchfor" ], [ "findsall.", [ "Keywords", "s" ] ], [ "searchengine.com", "search" ], [ "ceoexpress.com", "DTqb1" ], [ "wayahead.com", "artist" ], [ "sqwire.com", "kw" ], [ "metastrike.", "web/" ], [ "zapmeta.com", "query" ], [ "arcor.de", "Keywords" ], [ "catcha.com", "qry" ], [ "tdconline.dk", "q" ], [ "searchthru.com", "keywords" ], [ "resultshunter.com", "query" ], [ "jubii.dk", "query" ], [ "ramgo", "ask" ], [ "drumtabs", "term" ], [ "allsearchengines", "query" ], [ "iboogie.tv", "name_query" ], [ "startseite.de", "q" ], [ "joesharp.com", "keywords" ], [ "webfinder.com", [ "phrase", "txtSearchTerm", "SearchTerm", "SearchPhrase" ] ], [ "tdonline.dk", "q" ], [ "best-property-deals.co.uk", "/" ], [ "propertydemon.co.uk", "keyword/" ], [ "searchdemon.", "keyword" ], [ "searchmesilly.", "q" ], [ "onebigworld.", [ "directory/", "directory/n/" ]], [ "searcheverywhere.", [ "search/for/", "uk/for/" ]], [ "teentgp.", "term" ], [ "shopping.net", [ "keyword", "/" ]], [ "ihmemaa.fi", "qt" ], [ "buy-the-best-online.", "keyword" ], [ "washingtonpost.", "searchtext" ], [ "t-online.", "q" ], [ "searchit.com", "keywords" ], [ "krstarica.com", "u" ], [ "piltel.com", "q" ], [ "3web.com", "q" ], [ "gigablast.", "q" ], [ "zoznam.sk", "s" ], [ "linkz.com", "term" ], [ "ukuk.com", "search" ], [ "cox.net", "GoogleSearch" ], [ "euroseek.com", [ "query", "string" ]], [ "positiontech.com", "query", "valueToMatch" ], [ "searcheurope.com", "query" ], [ "to-inspire-you.com", "keyword" ], [ "zillafind.com", "query" ], [ "search2find.co", "/" ], [ "mweb.co.za", [ "hpq", "sqpage" ]], [ "wisenut.com", "q" ], [ "modern-woman.com", [ "q", "find-" ]], [ "blintz.com", [ "q", "directory/" ]], [ "locators.com", [ "que" ]], [ "qsrch.com", [ "s", "Keywords" ]], [ "belga.com", "keywords" ], [ "belgacom.net", "keywords" ], [ "ezanga.com", "qkw" ], [ "lyrics-ez.com", "1-" ], [ "lyrics-song.net", "1-" ], [ "sourtimes.org", "t" ], [ "lookupanything.", [ "s", "Keywords" ]], [ "shockwave.com", "qkw" ], [ "copernic.com", "qkw" ], [ "infospot.com", "terms/" ], [ "tinusi.com", "search/" ], [ "teensexlocator.com", "db/" ], [ "airora.com", "web/" ], [ "shopnav.com", "keywords" ], [ "surfwax.com", "search" ], [ "shatteredreality.net", [ "search/", "searches/" ]], [ "gogo-uk.co.uk", "search-" ], [ "moneydemon.co.uk", "keyword/" ], [ "appliedsearch.net", "keywords" ], [ ".lop.com", "s" ], [ "majorca.info", "keywords" ], [ "searchmole.", "term" ], [ "searchpixie.com", "keywords" ], [ "resultseeker.com", [ "w", "pw" ]], [ "seekerbar.", "qkw" ], [ "bestsearchonearth.", [ "s", "Keywords" ]], [ "mykindaplace.", "keyword" ], [ "giantexplorer.", "Keywords" ], [ "gawwk.", "Terms" ], [ "getyourdatahere.", "Keywords" ], [ "toxiclemon.co.uk", [ "qkw", "/t/", "/d/" ]], [ "query.nytimes.com", "query" ], [ "blueglobus.", "keywords" ], [ "myglobalsearch.", "searchfor" ], [ "globalsearch.cz", "/" ], [ "quzy.", "keywords" ], [ "slinkyslate.", "Keywords" ], [ "mytotalsearch.", "searchfor" ], [ "killersearch.", [ "Search/_", "Keywords" ]], [ "onestepsearch.", [ "s", "Keywords" ]], [ "search2.co", "q" ], [ "icerocket.", [ "q", "SearchPhrase" ]], [ "profusion.", "queryterm" ], [ "baidu.", "word" ], [ "abacho.", "q" ], [ "voila.fr", "rdata" ], [ "fireball.de", [ "q", "query" ]], [ "virgin.net", "q" ], [ "walla.co.", "q" ], [ "corriere.it", "q" ], [ "pageprobe.com", "gs_sq" ], [ "sleepyhollowrecords.", "guitar-resources/" ], [ "animetopia.", "keywords" ], [ "finna.is", "query" ], [ "specster.", "keywords" ], [ "find-a-needle.com", [ "Keywords", "/" ]], [ "srng.", "keywords" ], [ "imaginecontact.", "text" ], [ "coversproject.com", "artist/" ], [ "cutedoggy.", "qry_str" ], [ "newszoom.", "search/latest/" ], [ "zoomtown.", "q" ], [ "search.wn.com.", "query_string" ], [ "uk250.co.uk", "search/for/" ], [ "fanvault.co", [ "1/", "2/", "3/" ]], [ "4pricesearch.co.uk", "search-" ], [ "a9.com", [ "q", "/" ]], [ "32bits.org", "results/" ], [ "girl-freebies.com", "1/" ], [ "cheap.co", "/" ], [ "zabtech.", "find/" ], [ "209.189.108.214", "s" ], [ "www.redzip.com", "st" ], # Got to escape these plusses [ "66.218.", "p" ], [ "66.102.", "\\\+" ], [ "64.233.", "\\\+" ], [ "216.239.", [ "q", "\\\+" ]], [ "216.239.", "\\\+" ], [ "216.109.", "\\\+" ], [ "ecorpscanada.ca", "Second" ], [ "dashbar.com", "k" ], [ "hotrodbot.com", "qry_str" ], [ "sensis.com", "find" ], [ "axmo.com", "ax" ], [ "viewpoint.com", "k" ], [ "friendster.com", "w" ], [ "theselection.co.uk", "keyword/" ], [ "70.84.47.101", "results/" ], [ "xeec.com", "results/" ], [ "gooing.com", "/" ], [ "daum.net", "q" ], [ "answers.com", "/" ], [ "fanmode.com", "1/" ], [ "mygo.com", "search/" ], [ "opera.com", "search" ], [ "mongenie.com", "keywords" ], [ "searchorange.com", "home/" ], [ "search.orange.co.uk", "q" ], [ "gasta.co.uk", "Keywords" ], [ "mynextsearch.com", "Keywords" ], [ "metaseek.nl", "qry" ], [ "feelgoodteam", "qry" ], [ ".net.net", "Keywords" ], [ ".gasta.ie", "Keywords" ], [ "searchfrombrowser.biz", [ "s", "Keywords" ]], [ "onlinetravelnetwork.biz", [ "s", "Keywords" ]], [ "supereva.", "q" ], [ "kaxy.", "query" ], [ "interweddings.", "Keywords" ], [ "weddingangelz.", "for/" ], [ "medicinewheel.", "/" ], [ "revealed.co.uk", "find/" ], [ "bonaireme.info", "/" ], [ "lost-sock.co.uk", "search/for/" ], [ "infotiger", "qs" ], [ "webservicehosts", "st" ], [ "hair-transplant-surgery", "qry" ], [ "fundle.co.uk", "fun/" ], [ "searchinwales.", "qry" ], [ "previewseek.", "query" ], [ "evusto.", "ss" ], [ "infoweb-noticias.", "/" ], [ "geekextreme.", "/" ], [ "searchwhateveraustralia.", "keywords" ], [ "widow.com", "keywords" ], [ "myspace.com", [ "searchrequest", "qry" ] ], [ "ukbargainhunters.com", "search" ], [ "ask.com", "QUERY" ], [ "khalis.", "qry" ], [ "wowway.net", "q" ], [ "fetchme.", "qry" ], [ "scurma.", "search/" ], [ "freenet.de", "query" ], [ "apodcasting.", "dir/" ], [ "gada.b", "d/" ], [ "lnkpage", "/" ], [ "abcsok.", "q" ], [ "ewoss.", "k" ], [ "search.live.com", "q" ], [ "jekoo.", "ksc" ], [ "sify.", "keyword" ], [ "accoona.", "qt" ], [ "yourart.", [ "subject", "/" ]], [ "happy.org.uk", [ "search", "/" ]], [ "lifestylelocal.co.uk", "qLookingFor" ], [ "lifestylelocal.org.uk", "qLookingFor" ], [ "oemji", "Keywords" ], [ "xtraone", "for" ], [ "gbg.bg", "q" ], [ "sweetim.com", "q" ], [ "gomeo.co.uk", "search/" ], [ "clickcompare", "phrase" ], [ "compare-online.", "StrSearch" ], [ "search.conduit.com", "q" ], [ "search1.sky.com", "term" ], [ "my.att.net", "string" ], [ "search.alot.com", "q" ], [ "search1.seznam.cz", "q" ], [ "www.kvasir.no", "searchExpr" ], [ "search.primusonline.com.au", "q" ], [ "search.rr.com", "qs" ], [ "search.bearshare.com", "q" ], [ "s.luna.tv", "q" ], [ "search.auone.jp", "q" ], [ "www.crawler.com", "q" ], [ "incredimail.com", "q" ], [ "popex.com", [ "v", "valueToMatch" ]], [ "clarkeology.com", [ "v", "valueToMatch" ]], [ "claremahon.com", [ "v", "valueToMatch" ]], [ "folkestonegerald.com", [ "v", "valueToMatch" ]], [ "findMessage", "valueToMatch" ], [ "gooogle", "q" ], [ "g00gle", "q" ], [ "GoogleSearch", "GoogleSearch" ], [ "google", [ "q", "query", "as_q", "as_epq" ] ], [ "www", [ "q", "query", "keyword" ] ], ]; }