#!/usr/bin/perl # Change the above line to reflect the location of your installation of PERL # # ------------------------------------------------------------------- # Program: swish-cgi.pl # Author : John Millard (millarj@muohio.edu) # # Purpose: A gateway interface (CGI) to the SWISH Searcher/indexer # # Instructions: # # 1. Install and configure SWISH -- # Available from Enterprise Integration Technologies at # http://www.eit.com/goodies/software/swish/swish.html # # 2. Index your site so that SWISH returns the url for each file # ie. Swish should return http://www.yoursite.edu/file_name.html # See the SWISH documentation about REPLACE_RULES to see how. # # 3. Customize the User-Defined variables below to reflect your site. # # 4. Install this file in your cgi directory. This may vary # from site to site, but is usually in a directory like cgi-bin # # 5. Create a link from your pages to the cgi # ex. http://www.yoursite.edu/cgi-bin/swish-cgi # Running the cgi as a URL will generate a blank query form on the fly. # # # Note: if you don't like the the initial form that comes up, you can modify # the print_form subroutine # # To change the format of the returned results, you may modify the # print_results subroutine # -------- User defined configuration variables ----------- # Absolute path and command to execute the SWISH searcher $swish = "/usr/bin/swish-e"; # URL of where you put this cgi $swishcgi = "index.cgi"; # Optional parameters to pass to the SWISH searcher $params = " "; # Absolute path and filename of your created swish index file $index = "/home/xml4lib/search/index.swish-e"; # The Full name of your organization -- Printed with Search Results $organization = "XML4Lib Electronic Discussion"; # The full name of your department -- Printed with search Results # $department = "Electronic Information Services and Instruction Office"; # ------ End of Configuration Variables ------------ # Retrieve either GET or POST information if ( $ENV{'REQUEST_METHOD'} eq 'POST' ) { read(STDIN,$buffer,$ENV{'CONTENT_LENGTH'}); @pairs = split(/&/,$buffer); } else { @pairs = split(/&/,$ENV{'QUERY_STRING'}); } foreach $pair (@pairs) { ($name, $value) = split(/=/, $pair); # Un-Webify plus signs and %-encoding $value =~ tr/+/ /; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; push(@search_tags, $value), next if ($name eq 'search_tags'); $FORM{$name} = $value } $_ = $FORM{'query'}; s/\'//g; s/\%27//g; $query = $_; $results = $FORM{'results'}; if (@search_tags) { $tags = join("",@search_tags); $search_tags = "\-t $tags"; } else { $search_tags = ""; } if ($query) { &search_parse; } else { &html_header("XML4Lib Search"); &print_form; &print_footer; } sub search_parse # Run SWISH and parse output { #Initialize counter variable for number of results $count = 0; $pid = open(SWISH, "$swish -w \"$query\" -f $index |"); if ($pid) { while () { # First, check to see if search produced an error chop; if ($_ eq "err: no results") {&search_error("There were no items that matched your search request.");} if ($_ eq "err: could not open index file") {&search_error("Could not open the index file $index.");} if ($_ eq "err: no search words specified") {&search_error("Please enter at least one search word.");} if ($_ eq "err: a word is too common") {&search_error("One of your search terms is too common, please try again.");} # Next Line ignores lines that begin with a non-digit next if /^\D/; $count++; push(@results, $_); } close(SWISH); &html_header("XML4Lib Elecronic Discussio"); &print_form; print "

There are $count items that match your search.

\n
    \n"; foreach (@results) { select(STDOUT); ($stringone, $title, $filesize) = split(/\"/, $_); ($rank, $url) = split(/ /, $stringone); $_= $url; s/\/home\/xml4lib//; $url = $_; print "
  1. $title
  2. \n"; } print "
\n"; &print_footer; } else { ($EUID, $EGID) = ($UID, $GID); # suid only exec($program, @options, @args) || die "can't exec program: $!"; # NOTREACHED } } sub search_error { &html_header("Online Medieval & Classical Library"); $error_message = $_[0]; print "

\n"; print "$error_message

\n"; &print_form; &print_footer; exit; } sub html_header # This subroutine takes the document title as a command # line parameter and adds header information to the top # of the HTML document to be returned. { $document_title = $_[0]; print "Content-type: text/html\n\n"; print < $document_title

XML4Lib Electronic Discussion

END } sub print_footer { print <
END } sub print_form { print "
\n"; print <
\n"; print "\n"; print "\n"; print "

\n
\n"; }