#include "gdt.h"
#include "isearch.hxx"
#include "common.hxx"
#include "infix2rpn.hxx"
#include "dtreg.hxx"
#include "rcache.hxx"
#include "index.hxx"
#include "fprec.hxx"
#include "fpt.hxx"
#include "registry.hxx"
#include "idb.hxx"
#include "tokengen.hxx"
#include "infix2rpn.hxx"
#include "config.hxx"
#include "cgi-util.hxx"
#define SEARCH_TYPE(n) ((n) & SEARCH_TYPE_MASK)
#define SEARCH_CLASS(n) ((n) & SEARCH_CLASS_MASK)
#define SEARCH_TYPE_MASK 0xf0
#define SEARCH_CLASS_MASK 0x0f
#define SIMPLE 0x10
#define ADVANCED 0x20
#define BOOLEAN 0x30
#define BOOLEAN_AND 0x01
PCHR gettok(PCHR input);
INT get_term(INT t, STRING &PrintTerm, STRING &PrintField, STRING &PrintWeight);
PCHR get_field(PCHR fmt, INT n);
INT Search(PCHR DBPath, PCHR DBName, STRING& query_str, STRING& ESName,
INT Start, INT MaxHits, INT TYPE);
void PutHTTPHeader(void);
void PutHTMLHead(void);
void PutHTMLBodyStart(void);
void PutHTMLBodyEnd(void);
#define MAXHIT_DEFAULT 50
#define MAXSTR 1024
CGIAPP *cgidata;
STRING query;
// Randy.Wood@nau.edu
PCHR cgiDir;
INT main(int argc, char **argv)
{
CHR *db, *term, *field, *weight, *p, *path;
STRING ESName;
CHR temp[MAXSTR+1];
INT Start, MaxHits, i, type, x, y, z, terms;
STRING PrintQuery, PrintTerm, PrintField, PrintWeight;
if (!setlocale(LC_CTYPE,"")) {
cout << "Warning: Failed to set the locale!" << endl;
}
cgidata = new CGIAPP();
// Write the preliminary stuff out - these can be customzied
PutHTTPHeader();
PutHTMLHead();
PutHTMLBodyStart();
// Good for debugging form values
// cgidata->Display();
// exit(0);
if ((db = cgidata->GetValueByName("DATABASE")) == NULL) {
cout << "You must specify a database name." << endl;
PutHTMLBodyEnd();
exit(0);
}
// Randy.Wood@nau.edu
// if cgi-bin was specified by the web form, use it instead of cgi-bin
if ((cgiDir = cgidata->GetValueByName("CGI_BIN")) == NULL) {
cgiDir = new CHR[10];
strcpy(cgiDir,"cgi-bin");
}
// Which result set record number should be displayed first?
Start = (p = cgidata->GetValueByName("START")) ? atoi(p): 1;
// How many result set records should be displayed?
MaxHits = (p = cgidata->GetValueByName("MAXHITS")) ? atoi(p): MAXHIT_DEFAULT;
if (MaxHits == 0)
MaxHits = 1;
else if (MaxHits < 0)
MaxHits = MAXHIT_DEFAULT;
// What element set to return
// "B" = brief.
// "F" = fulltext.
ESName = (p = cgidata->GetValueByName("ELEMENT_SET")) ? p: "B";
query = "";
// If they want URLs returned, there has to be a value for HTTP_PATH
path = cgidata->GetValueByName("HTTP_PATH");
// assume simple search type until told otherwise
type = SIMPLE;
if ((p = cgidata->GetValueByName("SEARCH_TYPE")))
if (StrCaseCmp(p, "ADVANCED") == 0)
type = ADVANCED;
else if (StrCaseCmp(p, "BOOLEAN") == 0)
type = BOOLEAN;
if (SEARCH_TYPE(type) == ADVANCED) {
query = cgidata->GetValueByName("ISEARCH_TERM");
PrintQuery = query;
}
else if (SEARCH_TYPE(type) == BOOLEAN) {
STRING TempQuery;
terms = 0;
if ((p = cgidata->GetValueByName("OPERATOR")))
type = (StrCaseCmp(p, "AND") == 0) ? type | BOOLEAN_AND: type;
// Build up the infix query from the components
// Might as well make a nice printable version, too
for (i=1; get_term(i,PrintTerm,PrintField,PrintWeight) > 0; ++i) {
if (i>1) {
if (StrCaseCmp(p, "AND") == 0) {
PrintQuery.Cat(" and ");
TempQuery.Cat(" and ");
} else if (StrCaseCmp(p, "ANDNOT") == 0) {
PrintQuery.Cat(" and not ");
TempQuery.Cat(" andnot ");
} else if (StrCaseCmp(p, "OR") == 0) {
PrintQuery.Cat(" or ");
TempQuery.Cat(" or ");
} else if (StrCaseCmp(p, "NEAR") == 0) {
PrintQuery.Cat(" is near ");
TempQuery.Cat(" near ");
} else {
PrintQuery.Cat(" ");
PrintQuery.Cat(p);
PrintQuery.Cat(" ");
TempQuery.Cat(" ");
TempQuery.Cat(p);
TempQuery.Cat(" ");
}
}
PrintQuery.Cat(PrintTerm);
if (PrintField.GetLength()>0) {
PrintQuery.Cat(" in ");
PrintQuery.Cat(PrintField);
TempQuery.Cat(PrintField);
TempQuery.Cat("/");
}
TempQuery.Cat(PrintTerm);
if (PrintWeight.GetLength()>0) {
TempQuery.Cat(":");
TempQuery.Cat(PrintWeight);
}
}
query = TempQuery;
type = ADVANCED;
}
else {
// only SIMPLE searches get here
terms = 0;
// only 1 OPERATOR is possible = AND otherwise is an implied OR
if ((p = cgidata->GetValueByName("OPERATOR")))
type = (StrCaseCmp(p, "AND") == 0) ? type | BOOLEAN_AND: type;
for (i=1; get_term(i,PrintTerm,PrintField,PrintWeight) > 0; ++i) {
// since we know that logic is either "AND" or "OR" just insert
if (i>1) {
if (SEARCH_CLASS(type) == BOOLEAN_AND)
PrintQuery.Cat(" and ");
else
PrintQuery.Cat(" or ");
}
PrintQuery.Cat(PrintTerm);
if (PrintField.GetLength()>0) {
PrintQuery.Cat(" in ");
PrintQuery.Cat(PrintField);
}
}
}
if (query.Equals("")) {
/*
* cout << "You must enter a query term" << endl;
* PutHTMLBodyEnd();
* exit(0);
*/
// From Monty Walls
if (Start >1) {
query = cgidata->GetValueByName("ISEARCH_TERM");
// only 1 OPERATOR is possible, or get implied OR
if ((p=cgidata->GetValueByName("OPERATOR")))
type = (StrCaseCmp(p,"AND") == 0) ? type|BOOLEAN_AND:type;
} else {
cout << "You must enter a query term" << endl;
PutHTMLBodyEnd();
exit(0);
}
}
cout << "Operation Summary
" << endl;
cout << "Query: " << endl;
cout << PrintQuery << "" << endl;
INT nhits;
nhits = Search(argv[1], db, query, ESName, Start, MaxHits, type);
if (nhits > 0) {
cout << "
" << endl;
}
PutHTMLBodyEnd();
delete cgidata;
exit(0);
}
/*
* slight variation in the traditional strtok
* forces the delimiters to balance - good for
* quotes
*
* based on Henry Spencers implementation of strtok.c
*/
PCHR
gettok(PCHR input)
{
static PCHR last;
CHR *pos, *tok;
CHR lc;
if (input == (PCHR)NULL && last == (PCHR)NULL)
return ((PCHR)NULL);
pos = (input == (PCHR)NULL)? last: input;
for (lc = ' '; *pos != '\0'; ++pos) {
if (*pos == ' ')
continue;
// either non-delimiter or '"'
if (*pos != '"')
lc = ' ';
else {
lc = '"';
++pos;
}
// found non-delimiter
for (tok = pos; *pos; ++pos)
if (*pos != lc)
continue;
else {
*pos = '\0';
last = ++pos;
return (tok);
}
// loop ends on a null
last = pos;
return (tok);
}
return ((PCHR)NULL);
}
PCHR
get_field(PCHR f, INT n)
{
PCHR bp;
PCHR field;
bp = new CHR[MAXSTR+1];
sprintf(bp, f, n);
if ((field = cgidata->GetValueByName(bp))) {
delete bp;
return (field);
}
delete bp;
return ((PCHR)NULL);
}
INT
get_term(INT i, STRING &PrintTerm, STRING &PrintField, STRING &PrintWeight)
{
PCHR buffer;
PCHR s;
PCHR argument;
PCHR field;
PCHR weight;
PCHR entry;
INT w, terms;
buffer = new CHR[MAXSTR+1];
// See if the form included a button to request phrase searching
PCHR phrase;
GDT_BOOLEAN do_phrase=GDT_FALSE;
if ((phrase = get_field("PHRASE_%i",i)) != (PCHR)NULL) {
if (StrCaseCmp(phrase,"YES") == 0) {
do_phrase = GDT_TRUE;
} else {
do_phrase = GDT_FALSE;
}
}
*buffer = '\0';
terms = 0;
if ((argument = get_field("TERM_%i", i)) == (PCHR)NULL)
return (0);
// Even if there's no button, do phrase searching if the phrase is
// in quotes
if (argument[0] == '"')
do_phrase=GDT_TRUE;
// One other case to consider - no quotes, but the user has asked for
// phrase searching via separate input. Nest the query in quotes so
// that gettok handles it correctly, and so it gets passed to the
// TOKENGEN routines.
if ((do_phrase) && (argument[0] != '"')) {
PCHR p_hold = new CHR[strlen(argument)+3];
strcpy(p_hold,"\"");
strcat(p_hold,argument);
strcat(p_hold,"\"");
argument = p_hold;
}
if ((field = get_field("FIELD_%i", i)) != (PCHR)NULL) {
if (StrCaseCmp(field, "FULLTEXT") == 0) {
strcpy(field,"");
}
}
PrintField = field;
if ((weight = get_field("WEIGHT_%i", i)) != (PCHR)NULL) {
w = atoi(weight);
PrintWeight = weight;
}
else {
w = 0;
PrintWeight = "";
}
PrintTerm = argument;
while ((s = gettok(argument)) != (PCHR)NULL) {
argument = (PCHR)NULL;
entry = (PCHR)&buffer[0];
if (do_phrase) {
if ((field != (PCHR)NULL) && (strlen(field) > 0)) {
if (w > 0)
sprintf(entry, "%.128s/\"%.256s\":%d", field, s, w);
else
sprintf(entry, "%.128s/\"%.256s\"", field, s);
}
else {
if (w > 0)
sprintf(entry, "\"%.256s\":%d", s, w);
else
sprintf(entry, "\"%.256s\"", s);
}
} else {
if ((field != (PCHR)NULL) && (strlen(field) > 0)) {
if (w > 0)
sprintf(entry, "%.128s/%.256s:%d", field, s, w);
else
sprintf(entry, "%.128s/%.256s", field, s);
}
else {
if (w > 0)
sprintf(entry, "%.256s:%d", s, w);
else
sprintf(entry, "%.256s", s);
}
}
if (!query.Equals(""))
query.Cat(" ");
query.Cat(entry);
++terms;
}
return (terms);
}
/*
Path = full path to directory where database files reside,
e.g. "/usr/dbs".
Name = root database name, e.g. "MYDB".
Field = field in which to search. NULL means full text search.
Term = term(s) for which to search.
ESName = field to display as headline. NULL will choose whatever is
available.
MaxHits = maximum number of hits to display
Returns number of hits on success, -1 on failure
*/
INT Search(PCHR DBPath, PCHR DBName, STRING& query_str, STRING& ESName,
INT Start, INT MaxHits, INT type)
{
PRSET prset;
PIRSET pirset;
STRING DBPathName;
STRING DBRootName;
SQUERY query;
IDB *pdb;
INT HitCount, FieldCount, NextStart, NextCount, i;
DFDT dfdt, rec_dfdt;
DFD dfd;
time_t StartTime, EndTime;
if (!ESName.Equals("F"))
ESName = "B";
switch (SEARCH_TYPE(type)) {
case ADVANCED: {
// break the query string into tokens.
STRLIST PhraseList;
STRING StrTerm;
TOKENGEN TokenGen(query_str);
// this loop processes each term.
query_str = "";
INT IsBool=0;
INT TotalPhrases = TokenGen.GetTotalEntries();
for (i=1;i <= TotalPhrases;i++) {
TokenGen.GetEntry(i, &StrTerm);
if ( (StrTerm ^= "AND")
|| (StrTerm ^= "OR")
|| (StrTerm ^= "ANDNOT")
|| (StrTerm == "||")
|| (StrTerm == "&&") ) {
IsBool = 1;
}
//rebuild the query
query_str += StrTerm;
if (i < TotalPhrases)
query_str += ' ';
}
if (IsBool) {
STRING ProcessedQuery;
INFIX2RPN Parser;
Parser.SetDefaultOp("OR");
Parser.Parse(query_str, &ProcessedQuery);
if (!Parser.InputParsedOK()) {
cout << "The Query " << query_str << "\n";
cout << "was unparseable. If you think this is an error in this\n";
cout << "gateway, send mail to ";
cout << "CNIDR's Isite Technical Support. Please include all relevant\n";
cout << "information, including the URL of the page you're searching\n";
cout << "in and the query that you entered.\n";
exit(0);
} else {
query.SetRpnTerm(ProcessedQuery);
}
} else
query.SetTerm(query_str);
break;
}
case SIMPLE: {
query.SetTerm(query_str);
break;
}
}
// Open database
DBPathName=DBPath;
DBRootName=DBName;
if ((pdb = new IDB(DBPathName, DBRootName)) == NULL) {
printf("Failed to open database [%s]\n", DBName);
return -1;
}
// Is the database valid?
if (pdb->GetTotalRecords() <= 0) {
cout << "Database " << DBRootName;
cout << " does not exist or is corrupted\n";
return -1;
}
// Execute the search
time(&StartTime);
if (SEARCH_CLASS(type) == BOOLEAN_AND)
pirset=pdb->AndSearch(query);
else
pirset=pdb->Search(query);
time(&EndTime);
pirset->SortByScore();
// How many hits?
HitCount = pirset->GetTotalEntries();
// pdb->BeginRsetPresent(RecordSyntax);
PRSET NewPrset;
NewPrset=pirset->GetRset(0,HitCount);
pirset->Fill(0,HitCount,NewPrset);
NewPrset->SetScoreRange(pirset->GetMaxScore(),
pirset->GetMinScore());
prset = NewPrset;
HitCount = prset->GetTotalEntries();
INT FetchCount
= HitCount > (Start + MaxHits - 1) ? MaxHits : (HitCount - Start + 1);
cout << "Matching Record Count: " << HitCount << "
\n";
cout << "Total Retrieved: " << FetchCount << "
\n";
cout << "Interpreted Query: " << query_str << "
\n";
cout << "Total Database Records: " << pdb->GetTotalRecords();
cout << "
\n";
cout << "Query Time: " << (EndTime - StartTime);
cout << " seconds
" << endl;
cout << "Results
" << endl;
if (HitCount == 0) {
cout << "\nNo matches found.\n
" << endl;
return 0;
} else cout << "
";
pdb->GetDfdt(&dfdt);
FieldCount = dfdt.GetTotalEntries();
// Build the HTML output. You'll probably want to customize here
RESULT RsRecord;
STRING File, RecordKey, Headline, Field, Fullname;
DOUBLE Score;
INT j, x, y;
CHR *url, *name, *HttpPath;
HttpPath=(char *)getenv("DOCUMENT_ROOT");
/*
CHR *PathInfo,*PathTranslated;
PathInfo=(char *)getenv("PATH_INFO");
PathTranslated=(char *)getenv("PATH_TRANSLATED");
cout << "PATH_INFO=" << PathInfo << "
" << endl;
cout << "PATH_TRANSLATED=" << PathTranslated << "
" << endl;
*/
for (i=Start;i <= (Start + FetchCount - 1);i++) {
// Fetch the first hit
prset->GetEntry(i, &RsRecord);
// Construct a headline for this hit in HTML
// pdb->Present(RsRecord, ESName, &Headline);
pdb->Present(RsRecord, ESName, HtmlRecordSyntax, &Headline);
// Get the name of the file
RsRecord.GetFullFileName(&Fullname);
RsRecord.GetFileName(&File);
name=Fullname.NewCString();
// Find the url
if (HttpPath) {
url=strstr(name,HttpPath);
if (url)
url=url+strlen(HttpPath);
} else
url=(PCHR)NULL;
// Get the unique database key for this record to be use
// in subsequent retrieval when URL is clicked.
RsRecord.GetKey(&RecordKey);
Score = prset->GetScaledScore(RsRecord.GetScore(),100);
cout << "Match Number: " << i << " of " << HitCount
<< "
" << endl;
cout << "Score: " << Score << " - " << endl;
/* Files not within the given WWW path must be accessed with ifetch
for their full text */
if (url==NULL) {
#if defined(_WIN32) || defined (MSDOS)
cout << "";
if (Headline.GetLength() > 0) {
cout << Headline;
cout << " [";
cout << File;
cout << "] " << endl;
} else {
cout << File;
cout << "" << endl;
}
} else { // Just print the URL
cout << "";
if (Headline.GetLength() > 0) {
cout << Headline;
cout << " [";
cout << File;
cout << "]" << endl;
} else {
cout << File;
cout << "" << endl;
}
}
// }
if ((i + 1) <= HitCount)
cout << "
";
}
INT Remaining = (HitCount - (Start + MaxHits - 1));
NextStart = Start + MaxHits;
NextCount = Remaining < MaxHits ? Remaining : MaxHits;
// If we're not on the first page, let's give them a way to move back
if (Start > 1) {
// Display a mini-form with CGI variables
cout << endl;
// Randy.Wood@nau.edu
// Break up the cgi-bin button to use the variable instead of hard-code
cout << "" << endl;
}
// If we were unable to display all hits, lets give them a hyperlink
// to display the next MaxHits of them.
if (Remaining > 0) {
// Display a mini-form with CGI variables
cout << endl;
// Randy.Wood@nau.edu
// Break up the cgi-bin button to use the variable instead of hard-code
cout << "" << endl;
}
return HitCount;
}
void PutHTTPHeader()
{
cout << "Content-type: text/html\n\n";
}
void PutHTMLHead()
{
cout << "\n" << endl;
cout << "Isearch Results" << endl;
cout << "" << endl;
}
void PutHTMLBodyStart()
{
cout << "" << endl;
cout << "CNIDR Isearch-cgi ";
cout << IsearchVersion << "" << endl;
}
void PutHTMLBodyEnd()
{
cout << "" << endl;
}