/* Filename: websearch.cpp Author: Br. David Carlson Date: January 4, 2007 This is a CGI program to handle a user's search for a certain target string. Using a browser pointed at websearch.html, the user fills in the desired target and then clicks on the submit button. The uncgi program is used to put this string into an environment variable named WWW_Key. Our particular version of uncgi also rejects strings that contain common hacker symbols. If the user submits a string containing a % sign, for example, the input is rejected. Most non-alphanumeric symbols cause uncgi to display an error message and quit. In addition, when we use the GetValue function below, it does some filtering of user input as well. When it finds an unacceptable character, it puts the empty string into the WWW variable. Note that websearch.html must be placed in some location under the web root for your web server. Assuming that WWW_Key gets a non-empty string, this program searchs through the text file named weblist for that target string. The weblist file contains complete pathnames for various html files, one per line, like this example: /www/carlson/cs125/final.html /www/carlson/cs125/hw/homework2.html The weblist file must be located in the cgi-bin directory along with the compiled websearch program and webhead.html. The weblist file can be created with the separate makelist script. The output of the program is an HTML-marked up page with a list of the URLs corresponding to the lines of weblist that contained the target string. Note, to compile this program in Linux, use: g++ websearch.cpp stringhelp.cpp -o websearch -s */ #include "stringhelp.h" // Next line should give the name of the text file to be searched: #define DATAFILE "weblist" // Next line should give name of file containing opening HTML for search results: #define HEAD "webhead.html" // The next line should give the start of all URLs for the html files in the above file: #define URLSTART "http://cis.stvincent.edu" // The next line should give the number of chars to remove at the start of each pathname when making a URL: #define REMOVECHARS 4 // Function prototypes: void SearchFile(fstream & fs); void PrintFile(StringType Filename); void PrintURL(StringType Line, int Length); int main(void) { fstream InFile; InFile.open(DATAFILE, ios::in); if (InFile.fail()) { cout << "Error: Cannot open file named " << DATAFILE << " for input" << endl; exit(1); } SearchFile(InFile); InFile.close(); return 0; } /* Given: fs Text file stream already opened for input. Task: To get the target string from the WWW_Key environment variable and do a sequential search of the fs file for all instances of this target string. Each matching line of the file is printed for the user within an HTML page which is sent to standard output. Return: Nothing. */ void SearchFile(fstream & fs) { int LineLength, KeyLength; StringType Key, Line; const char Quote = 34; // 34 is ASCII code for double quote character. GetValue("WWW_Key", Key); // Get the target key from the WWW environment variable set up by uncgi. KeyLength = strlen(Key); cout << "Content-type: text/html" << endl << endl; PrintFile(HEAD); cout << "
    " << endl; if (KeyLength == 0) { cout << "
  1. No keyword was supplied or keyword contained disallowed characters
  2. " << endl; cout << "
" << endl; return; } LineLength = MyGetLine(fs, Line, StrMax); while (LineLength > 0) { if (SubstringPresent(Line, Key)) { cout << "
  • "; PrintURL(Line, LineLength); cout << "
  • " << endl; } LineLength = MyGetLine(fs, Line, StrMax); } cout << "" << endl; } /* Given: Filename String containing name of a text file to output. Task: To copy the contents of this file to standard output. Return: Nothing. */ void PrintFile(StringType Filename) { fstream fs; StringType Line; fs.open(Filename, ios::in); if (fs.fail()) exit(1); MyGetLine(fs, Line, StrMax); while (! fs.fail()) { cout << Line << endl; MyGetLine(fs, Line, StrMax); } fs.close(); } /* Given: Line A string containing a complete pathname for a particular html file. Length The number of chars in the string in Line. Task: To print out the URL corresponding to this pathname. This is done by printing URLSTART followed by all of Line except the first REMOVECHARS characters. Return: Nothing. */ void PrintURL(StringType Line, int Length) { int k; cout << URLSTART; for (k = REMOVECHARS; k < Length; k++) cout.put(Line[k]); }