/*

CGI WEB SEARCH ENGINE...

Author: Matthew W. Coan
Date: Sun Sep  1 00:08:04 EDT 2013

*/

#include <iostream>
#include <string>
#include <list>
#include <ctime>
#include <sys/types.h>
#include <signal.h>
#include <unistd.h>

#include "cgi.h"
#include "csv.h"

// Server software version string
#define VERSION_STRING "Simple Web Search V1.0"

// Search information file
#define SEARCH_FILE "/home/mcoan/websearch/title-bot.txt"

// Temp search results file
#define TEMP_FILE "/home/mcoan/websearch/temp_file.txt"

// Search timeout time value
#define TIMEOUT 5

using namespace std;
using namespace csv_reader;

void
on_signal(int arg)
{
   switch(arg) {
      case SIGALRM:
         cout << "<BR><BR>" << endl;
         cout << "Search timeout...<BR><HR><BR><BR>" << endl;
         exit(0);
      break;

      case SIGCHLD:
      break;
   }
}

int
System(const string & cmd)
{
   return system(cmd.c_str());
}

bool
is_alnum(const char ch) 
{
   bool ret = false;
   if((ch >= '0' && ch <= '9')
      || (ch >= 'A' && ch <= 'Z')
      || (ch >= 'a' && ch <= 'z')) {
      ret = true;
   }
   return ret;
}

bool
is_space(const char ch)
{
   return ch == ' ';
}

string
escape(const string & str)
{
   string ret;
   for(size_t i = 0; i < str.size(); i++) {
      if(str[i] == '<')
         ret += "&lt;";
      else if(str[i] == '>')
         ret += "&gt;";
      else if(str[i] == '\"')
         ret += "&quot;";
      else if(str[i] == '&')
         ret += "&amp;";
      else 
          ret += str[i];
   }
   return ret;
}

string
shell_encode(const string & str)
{
   string ret;
   for(size_t i = 0; i < str.size(); i++) {
      if(is_space(str[i]) 
         || is_alnum(str[i])
         || str[i] == '+'
         || str[i] == '-') {
         ret += str[i];
      }
   }
   return ret;
}

int
main(int argc, char ** argv) 
{
   cout << "Content-type: text/html\r\n\r\n" << flush;
   signal(SIGALRM, on_signal);
   signal(SIGCHLD, on_signal);
   CGI form_data;
   const char * search_str = form_data["search_str"];
   if(search_str == 0) search_str = "";
   cout << "<HTML>" << endl;
   cout << "<HEAD>" << endl;
   cout << "<TITLE>" << VERSION_STRING << "</TITLE>" << endl; 
   cout << "<LINK REL=\"stylesheet\" HREF=\"style.css\">" << endl;
   cout << "<SCRIPT LANGUAGE=\"JavaScript\" SRC=\"lib/site.js\"></SCRIPT>" << endl;
   cout << "</HEAD>" << endl;
   cout << "<BODY BGCOLOR=\"WHITE\">" << endl;
   cout << "<H1>" << VERSION_STRING << "</H1>" << endl; 
   cout << "<FORM ACTION=\"/~mcoan/cgi-bin/websearch.cgi\" METHOD=\"GET\">" << endl;
   cout << "Keyword Search: <INPUT TYPE=\"TEXT\" NAME=\"search_str\">" << endl;
   cout << "<INPUT VALUE=\"Search\" TYPE=\"SUBMIT\"> * <INPUT VALUE=\"Reset\" TYPE=\"RESET\">" << endl;
   cout << "</FORM>" << endl;
   cout << "<HR><BR>" << endl;
   char * buffer =  new char[strlen(search_str)+1];
   strcpy(buffer, shell_encode(search_str).c_str());
   char * ptr = strtok(buffer, " ");
   string qs;
   while(ptr != NULL) {
      qs += " | egrep -i \"";
      qs += shell_encode(ptr); 
      qs += "\"";
      ptr = strtok(NULL, " ");
   }
   delete [] buffer;
   if(strlen(search_str) > 0) {
      alarm(TIMEOUT);
      clock_t start_time = clock();
      if(System("cat " + string(SEARCH_FILE) + " " + qs 
             + string(" > ")
             + string(TEMP_FILE)) == 0) {
         csv_row_type row;
         ifstream fin(TEMP_FILE, ios::in);
         off_t count = 0UL;
         if(fin) {
            while(fin >> row) {
               cout << "URL: <A HREF=\"" << row[0] << "\">" << row[0] << "</A>" << endl;
               cout << "<BR>" << endl; 
               cout << "Title: " << escape(row[1]) << endl;
               cout << "<BR>" << endl; 
               cout << "Server Software: <I>" << escape(row[2]) << "</I>" << endl;
               cout << "<BR><BR>" << endl;
               count++;
            }
            fin.close();
          }
          cout << "search done...<BR>" << endl;
          cout << count << " search results...<BR>" << endl;
          clock_t end_time = clock();
// DEBUG
          cout << (((double)end_time - (double)start_time) / (double)CLOCKS_PER_SEC)
               << " seconds(s) " 
               << " total search time...<BR>" << endl;
      }
      else {
          cout << "search error...<BR>" << endl;
      }
      alarm(0);
   }
   else {
      cout << "search error...<BR>" << endl;
   }
   cout << "<BR><HR><BR><BR>" << endl;
   cout << "</BODY>" << endl;
   cout << "</HEAD>" << endl; 
   cout << "</HTML>" << endl << flush;
   return 0;
}
