<?php
//////////////////////////////////////
//
// The search script.
//
// Author: Matthew W. Coan
// Date: Sun Dec  1 01:53:29 EST 2002 
//
//////////////////////////////////////

//
// Define Statements:
//
   define("RESULTS_PER_PAGE", 10);  // the number of results on a page
   define("BLURB_CHARS", 50);       // the number of chars after a key 
                                    // word in the blurb
   define("BLURB_MAX", 1024 * 3);   // max chars in a blurb

// 
// Functions:
//
function getmicrotime() { 
   list($usec, $sec) = explode(" ",microtime()); 
   return ((float)$usec + (float)$sec); 
} 

//
// Return the text blurb formated:
//
function format_blurb($key_word_array, $blurb) {
   $temp_blurb = strtolower($blurb);
   $location_array = array();

   foreach($key_word_array as $key_word) {
      $index = strpos($temp_blurb, $key_word);
      if($index === false) {
         continue;
      }
      $location_array[$key_word] = $index;
   }

   asort($location_array, SORT_NUMERIC);

   $last_index = -1;
   foreach(array_keys($location_array) as $key_word) {
      if(!isset($location_array[$key_word])) {
         continue;
      }
      $index = $location_array[$key_word];
      if($last_index == -1) {
         $last_index = $index;
         continue;
      }
      if($index < ($last_index + BLURB_CHARS)) {
         unset($location_array[$key_word]);
      }
      else {
         $last_index = $index;
      }
   }

   $temp_blurb = "";
   $blurb_len = strlen($blurb);
   foreach($location_array as $index) {
      $start = $index - BLURB_CHARS;
      if($start < 0) {
         $start = 0;
      }
      $end = $index + BLURB_CHARS;
      if($end >= $blurb_len) {
         $end = $blurb_len - 1;
      }
      $temp_blurb .= " ..." . substr($blurb, $start, $end) . "... ";
   }

   foreach($key_word_array as $key_word) {
      $temp_blurb = eregi_replace($key_word, 
                                  "<b>\\0</b>", 
                                  $temp_blurb);
   }

   if(strlen($temp_blurb) > BLURB_MAX) {
      $temp_blurb = substr($temp_blurb, 0, BLURB_MAX-1);
   }

   return $temp_blurb;
}

//
// The Script:
//
   $start_time = getmicrotime();

   require("get_connection.php");
   $connection = get_connection();
   $query = $HTTP_GET_VARS["query"];
   $key_word_array = array();
   if(!isset($HTTP_GET_VARS["lo"])
      || !isset($HTTP_GET_VARS["hi"])) {
      $lo = 0;
      $hi = RESULTS_PER_PAGE;
   }
   else {
      $lo = $HTTP_GET_VARS["lo"];
      $hi = $HTTP_GET_VARS["hi"];
   }
   if((!ereg("^(([0-9])+)$", $lo)) 
      || (!ereg("^(([0-9])+)$", $hi)) 
      || ($hi <= $lo)
      || (($hi - $lo) > RESULTS_PER_PAGE)) {
      $lo = 0;
      $hi = RESULTS_PER_PAGE;
   }
   $dlim = " \t\n\r~`!@#\$%^&*()+=-|}{\\][\":\'\?/^?.,><;";
   $word = strtok($query, $dlim);
   $key_word_count = 0;
   $sql = 
"SELECT url,last_mod,length,SUM(frequency) As FrequencySum,\n"
."COUNT(documents.id) As DocumentCount, blurb\n"
."FROM words,word_frequencies,documents\n"
."WHERE (";
   $c = "";
   while($word) {
      $word = strtolower($word);
      if(strlen($c) == 0)
         $c .= "word = '" . mysql_escape_string($word) . "'";
      else
         $c .= " OR word = '" . mysql_escape_string($word) . "'";
      $key_word_array[$key_word_count] = $word;
      $word = strtok($dlim);
      $key_word_count++;
   }
   $sql .= $c
.")\n"
." AND documents.id = doc_id\n"
." AND words.id = word_id\n"
."GROUP BY documents.id\n"
."ORDER BY DocumentCount DESC,FrequencySum DESC\n";

   if(strlen($c) != 0) {
      $result = mysql_query($sql, $connection);
      if($result == false) 
         die("SQLError: ".mysql_error()); 
   }
?>
<HTML>
<HEAD><TITLE>Search (V1.0.0) :: Results</TITLE></HEAD>
<BODY BGCOLOR="WHITE">
<CENTER>
<FONT FACE="Arial">
<H1>Search (V1.0.0) :: Results</H1>
<HR>
<FORM METHOD="GET" ACTION="results.php">
<TABLE BORDER="1">
<TR>
<TR>
<TD><FONT FACE="Arial" SIZE="+1"><B>Key Word Search:</B></FONT></TD> 
<TD VALIGN="CENTER"><FONT FACE="Arial" SIZE="+1">
<INPUT TYPE="TEXT" SIZE="20" NAME="query" VALUE="<?= htmlentities($query) ?>"></FONT></TD>
</TR>
<TR>
<TD COLSPAN="2" ALIGN="CENTER"><INPUT TYPE="SUBMIT" VALUE="Search"> 
* <INPUT TYPE="RESET" VALUE="Clear"></TD>
</TR>
</TABLE>
</FORM>
</CENTER>
<?php
   if(strlen($c) == 0) {
?><H2>Nothing to search for...</H2>
<?php
   }
   else if(mysql_num_rows($result) == 0) {
      $word = strtok($query, $dlim);
      $end_sql = 
")\n"
."ORDER BY word";
      $sql =
"SELECT word\n"
."FROM words\n"
."WHERE (";
      $found_soundex_match = false;
      while($word) {
         $c = "";
         $word = strtolower($word);
         $c .= "soundex_code = SOUNDEX('" . mysql_escape_string($word) . "')";
         $result = mysql_query($sql . $c . $end_sql, $connection);
         if($result == false)
            die("SQLError: " . mysql_error());
         $temp = "";
         $first = true;
         while($ret = mysql_fetch_array($result)) {
            if(!$first)
               $temp .= " ~ ";
            else
               $first = false;
            $temp .= "<A HREF=\"results.php?query=" . $ret[0] . "\">" . $ret[0] . "</A>"; 
            $found_soundex_match = true;
         }
         if(strlen($temp) != 0) {
            print "<P><FONT COLOR=\"red\">" . $word 
                  . "</FONT> <B>::</B> <I>sounds like</I> <B>::</B> ";
            print $temp;
            print "</P>";
         }
         $word = strtok($dlim);
         mysql_free_result($result);
      }

?><H2>No results...</H2>
<?php
   }
   else {
      $ret = mysql_fetch_array($result);
      if($ret[4] != $key_word_count) {
         $word = strtok($query, $dlim);
         $end_sql =
")\n"
."ORDER BY word";
         $sql =
"SELECT word\n"
."FROM words\n"
."WHERE (";
         while($word) {
            $c = "";
            $word = strtolower($word);
            $result2 = mysql_query("SELECT id FROM words WHERE word = '" 
                                   . mysql_escape_string($word) . "'",
                                   $connection);
            if($result2 == false)
               die("SQLError: " . mysql_error());
            if(mysql_num_rows($result2) != 0) {
               mysql_free_result($result2);
               $word = strtok($dlim);
               continue;
            }
            mysql_free_result($result2);
            $c .= "soundex_code = SOUNDEX('" . mysql_escape_string($word) . "')";
            $result2 = mysql_query($sql . $c . $end_sql, $connection);
            if($result2 == false)
               die("SQLError: " . mysql_error());
            $temp = "";
            $first = true;
            while($ret2 = mysql_fetch_array($result2)) {
               if(!$first)
                  $temp .= " ~ ";
               else
                  $first = false;
               $temp .= "<A HREF=\"results.php?query=" 
                        . $ret2[0] . "\">" . $ret2[0] . "</A>";
               $found_soundex_match = true;
            }
            if(strlen($temp) != 0) {
               print "<P><FONT COLOR=\"red\">" . $word 
                     . "</FONT> <B>::</B> <I>sounds like</I> <B>::</B> ";
               print $temp;
               print "</P>";
            }
            $word = strtok($dlim);
            mysql_free_result($result2);
         }
      }
?>
<HR>
<H2>>>>Results:</H2>
<OL>
<?php
   $index = -1;
   do {
      $index++;
      if($index < $lo) 
        continue;
      if($index >= $hi)
        break;
?>
<LI>
  <UL>
    <LI><B>URL:</B> <A HREF="<?= $ret[0] ?>"><?= $ret[0] ?></A></B></LI>
    <LI><B>Last Modified:</B> <?= $ret[1] ?></B></LI>
    <LI><B>Length (in bytes):</B> <?= $ret[2] ?></B></LI>
    <LI><B>Hit Count:</B> <?= $ret[3] ?></B></LI>
    <LI><B>Key Words:</B> <?= $ret[4] ?></B></LI>
    <LI><B>Blurb:</B> <?= format_blurb($key_word_array, $ret[5]) ?>...</B></LI>
  </UL>
</LI>
<?php
   }
   while($ret = mysql_fetch_array($result));

   $temp = mysql_num_rows($result);
   mysql_free_result($result);
   $temp = $temp / RESULTS_PER_PAGE;
   print "</OL>";
   if($temp > 1.0) {
      print "<B><FONT SIZE=\"+2\" FACE=\"Arial\" COLOR=\"RED\">"
            . "Result Pages</FONT>:&nbsp;&nbsp;&nbsp;</B> ";

      $first_one = true;
      for($index = 0; $index < $temp; $index++) {
         if($first_one)
            $first_one = false;
         else
            print " | ";
         if(($index * RESULTS_PER_PAGE) == $lo) 
            print "<I>" . ($index+1) . "</I>\n";
         else {
            print "<A HREF=\"results.php?query=" . urlencode($query) 
                  . "&lo=". ($index * RESULTS_PER_PAGE) ."&hi="
                  . ((($index + 1) * RESULTS_PER_PAGE)) 
                  ."\">" . ($index+1) . "</A>\n";
         }
      }
   }
?>
<?php
   }
?>
<CENTER>
<HR>
<FONT FACE="Arail" SIZE="-2">(MWC Labs)</FONT>
</CENTER>
<?php
   mysql_close($connection);

   $end_time = getmicrotime();

   $total_time = $end_time - $start_time;

   print "<I>Total time spent in script is: $total_time secons</I>";
?>
</FONT>
</BODY></HTML>
