/* $Id: numsearch.cxx,v 1.12 1999/03/26 00:27:22 cnidr Exp $ */ /************************************************************************ Copyright Notice Copyright (c) MCNC, Clearinghouse for Networked Information Discovery and Retrieval, 1994. Permission to use, copy, modify, distribute, and sell this software and its documentation, in whole or in part, for any purpose is hereby granted without fee, provided that 1. The above copyright notice and this permission notice appear in all copies of the software and related documentation. Notices of copyright and/or attribution which appear at the beginning of any file included in this distribution must remain intact. 2. Users of this software agree to make their best efforts (a) to return to MCNC any improvements or extensions that they make, so that these may be included in future releases; and (b) to inform MCNC/CNIDR of noteworthy uses of this software. 3. The names of MCNC and Clearinghouse for Networked Information Discovery and Retrieval may not be used in any advertising or publicity relating to the software without the specific, prior written permission of MCNC/CNIDR. THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL MCNC/CNIDR BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ************************************************************************/ /*@@@ File: numsearch.cxx Version: $Revision: 1.12 $ Description: Class INDEX - numeric search methods Author: Archie Warnock (warnock@clark.net), A/WWW Enterprises @@@*/ #include #include #include #include #include #include "defs.hxx" #include "string.hxx" #include "vlist.hxx" #include "strlist.hxx" #include "common.hxx" //#include "sw.hxx" #include "soundex.hxx" #include "nfield.hxx" #include "nlist.hxx" #include "intfield.hxx" #include "intlist.hxx" #include "attr.hxx" #include "attrlist.hxx" #include "dfd.hxx" #include "dfdt.hxx" #include "fc.hxx" #include "fct.hxx" #include "df.hxx" #include "dft.hxx" #include "record.hxx" #include "mdtrec.hxx" #include "mdt.hxx" #include "result.hxx" #include "idbobj.hxx" #include "iresult.hxx" #include "opobj.hxx" #include "operand.hxx" #include "rset.hxx" #include "irset.hxx" #include "opstack.hxx" #include "squery.hxx" #include "dtreg.hxx" #include "rcache.hxx" #include "index.hxx" #include "fprec.hxx" #include "fpt.hxx" #include "registry.hxx" #include "idb.hxx" #include "mergeunit.hxx" #include "filemap.hxx" #ifdef DICTIONARY #include "dictionary.hxx" #endif void INDEX::SortNumericFieldData() { INT total; INT x; DFD DfdRecord; STRING FieldName,FieldType,Fn; INT4 Count; total = Parent->DfdtGetTotalEntries(); for (x=1; x<=total; x++) { Parent->DfdtGetEntry(x,&DfdRecord); DfdRecord.GetFieldType(&FieldType); if (FieldType.CaseEquals("TEXT")) { continue; } else if (FieldType.CaseEquals("NUM")) { NUMERICLIST NumList; DfdRecord.GetFieldName(&FieldName); Parent->DfdtGetFileName(FieldName,&Fn); NumList.SetFileName(Fn); NumList.LoadTable(0,-1); Count = NumList.GetCount(); if (Count > 1) { CHR *Fname; Fname = Fn.NewCString(); unlink(Fname); delete Fname; NumList.Sort(); NumList.WriteTable(0); NumList.SortByGP(); NumList.WriteTable(Count); } /* // Debugging if (Count > 0) { cout << "\nDumping " << FieldName << endl; cout << "Debug output:" << endl; NUMERICLIST NL; NL.SetFileName(Fn); NL.LoadTable(0,-1,VAL_BLOCK); NL.Dump(0,Count); } else cout << "Empty field " << FieldName << endl; */ } else if (FieldType.CaseEquals("COMPUTED")) { NUMERICLIST NumList; DfdRecord.GetFieldName(&FieldName); Parent->DfdtGetFileName(FieldName,&Fn); NumList.SetFileName(Fn); NumList.LoadTable(0,-1); Count = NumList.GetCount(); if (Count > 1) { CHR *Fname; Fname = Fn.NewCString(); unlink(Fname); delete Fname; NumList.Sort(); NumList.WriteTable(0); NumList.SortByGP(); NumList.WriteTable(Count); } /* // Debugging if (Count > 0) { cout << "\nDumping " << FieldName << endl; cout << "Debug output:" << endl; NUMERICLIST NL; NL.SetFileName(Fn); NL.LoadTable(0,-1,VAL_BLOCK); NL.Dump(0,Count); } else cout << "Empty field " << FieldName << endl; */ } else if ((FieldType.CaseEquals("DATE-RANGE")) || (FieldType.CaseEquals("DATE"))) { INTERVALLIST IntList; DfdRecord.GetFieldName(&FieldName); Parent->DfdtGetFileName(FieldName,&Fn); IntList.SetFileName(Fn); IntList.LoadTable(0,-1); Count = IntList.GetCount(); if (Count > 1) { CHR *Fname; Fname = Fn.NewCString(); unlink(Fname); delete Fname; IntList.SortByStart(); IntList.WriteTable(0); IntList.SortByEnd(); IntList.WriteTable(Count); IntList.SortByGP(); IntList.WriteTable(2*Count); } /* if (Count > 0) { cout << "\nDumping " << FieldName << endl; cout << "Debug output:" << endl; INTERVALLIST NL; NL.SetFileName(Fn); NL.LoadTable(0,-1,0); cout << Count << " records sorted by start:\n" << endl; NL.Dump(0,Count); NL.LoadTable(0,-1,Count); cout << Count << " records sorted by end:\n" << endl; NL.Dump(0,Count); } else cout << "Empty field " << FieldName << endl; */ } else { continue; } } return; } PIRSET INDEX::NumericSearch(const DOUBLE fKey, const STRING& FieldName, INT4 Relation) { STRING FieldType; PIRSET pirset; SearchState Status=NO_MATCH; INT4 Start=-1, End=-1, Pointer=0, Value, ListCount; INT4 w; IRESULT iresult; MDT* ThisMdt; STRING Fn; NUMERICLIST List; Parent->FieldTypes.GetValue(FieldName,&FieldType); if (FieldType.GetLength() == 0) FieldType = "TEXT"; if(FieldType == "TEXT") return((PIRSET)NULL); pirset=new IRSET(Parent); /* We'll fix the rset cache when we can feed the server name to it STRING DBName="",T1; CHR TempBuffer[256]; Parent->GetDBFileStem(DBName); sprintf(TempBuffer,"%f",fKey); T1 = TempBuffer; w = SetCache->Check(T1,Relation,FieldName,DBName); if(w > -1) { delete pirset; return(SetCache->Fetch(w)); } */ Parent->DfdtGetFileName(FieldName, &Fn); switch (Relation) { case ZRelEQ: // equals // Start is the smallest index in the table // for which fKey is <= to the table value Status = List.Find(Fn, fKey, ZRelGT, &End); if (Status == TOO_LOW) // We ran off the bottom end without a match Status = NO_MATCH; if (Status == NO_MATCH) // No matching values - bail out break; // End is the largest index in the table for which // fKey is >= to the table value; Status = List.Find(Fn, fKey, ZRelLT, &Start); if (Status == TOO_HIGH) // We ran off the top Status = NO_MATCH; break; case ZRelLT: // less than case ZRelLE: // less than or equal to // Start at the beginning of the table Start=0; // End is the largest index in the table for which // fKey is <= the table value Status = List.Find(Fn, fKey, Relation, &End); if (Status == TOO_LOW) // We ran off the low end without a match Status = NO_MATCH; break; case ZRelGE: // greater than or equal to case ZRelGT: // greater than // Go to the end of the table End = -1; // Find the smallest index for which fKey is >= the table value Status = List.Find(Fn, fKey, Relation, &Start); if (Status == TOO_HIGH) // We ran off the top end without a match Status = NO_MATCH; break; } // Bail out if we failed to find the value we were looking for if (Status == NO_MATCH) return pirset; List.SetFileName(Fn); if (Relation != ZRelEQ) { List.LoadTable(Start, End, VAL_BLOCK); } else if ( (Relation == ZRelEQ) && ( (End == -1) || (Start == -1) ) ) { if (Start != -1) Start++; if (End != -1) End--; List.LoadTable(Start, End, VAL_BLOCK); } else { // OK - it was equal and we found the value. But Start points to the // first value less than the hit, and End points to the first value // greater than the hit, so we need to bump them to get to the right // values List.LoadTable(Start+1, End-1, VAL_BLOCK); } ListCount = List.GetCount(); for(Pointer=0; PointerGetMainMdt(); Value=List.GetGlobalStart(Pointer); w = Parent->GetMainMdt()->LookupByGp(Value); iresult.SetMdtIndex(w); iresult.SetHitCount(1); iresult.SetScore(0); iresult.SetMdt(*ThisMdt); pirset->FastAddEntry(iresult, 1); } pirset->SortByIndex(); pirset->MergeEntries(1); // SetCache->Add(T1,Relation,FieldName,DBName,pirset); #ifdef DEBUG cout << "NumericSearch - " << pirset->GetTotalEntries() << " hits in " << FieldName << " for term=" << fKey << ", relation=" << Relation << endl; #endif return(pirset); }