/************************************************************************ Copyright Notice Copyright (c) MCNC, Clearinghouse for Networked Information Discovery and Retrieval, 1994. Permission to use, copy, modify, distribute, and sell this software and its documentation, in whole or in part, for any purpose is hereby granted without fee, provided that 1. The above copyright notice and this permission notice appear in all copies of the software and related documentation. Notices of copyright and/or attribution which appear at the beginning of any file included in this distribution must remain intact. 2. Users of this software agree to make their best efforts (a) to return to MCNC any improvements or extensions that they make, so that these may be included in future releases; and (b) to inform MCNC/CNIDR of noteworthy uses of this software. 3. The names of MCNC and Clearinghouse for Networked Information Discovery and Retrieval may not be used in any advertising or publicity relating to the software without the specific, prior written permission of MCNC/CNIDR. THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL MCNC/CNIDR BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ************************************************************************/ /*@@@ File: irset.cxx Version: 1.01 Description: Class IRSET - Internal Search Result Set Author: Nassib Nassar, nrn@cnidr.org @@@*/ #include #include "defs.hxx" #include "string.hxx" #include "common.hxx" #include "vlist.hxx" #include "attr.hxx" #include "attrlist.hxx" #include "dfd.hxx" #include "dfdt.hxx" #include "fc.hxx" #include "fct.hxx" #include "result.hxx" #include "strlist.hxx" #include "df.hxx" #include "dft.hxx" #include "record.hxx" #include "mdtrec.hxx" #include "mdt.hxx" #include "idbobj.hxx" #include "result.hxx" #include "iresult.hxx" #include "opobj.hxx" #include "operand.hxx" #include "rset.hxx" #include "irset.hxx" #include "opstack.hxx" #include "squery.hxx" #include "dtreg.hxx" int IrsetIndexCompare(const void* x, const void* y) { INT4 Difference = ( (*((PIRESULT)y)).GetMdtIndex() - (*((PIRESULT)x)).GetMdtIndex() ); if (Difference < 0) { return (-1); } else { if (Difference == 0) { return(0); } else { return 1; } } } IRSET::IRSET(const PIDBOBJ DbParent) { Init(DbParent); } void IRSET::Init(const PIDBOBJ DbParent) { Table = new IRESULT[1000]; TotalEntries = 0; MaxEntries = 1000; Parent = DbParent; MinScore=999999.0; MaxScore=0.0; INT ScoreSort=0; // 1 if sorted by score } DOUBLE IRSET::GetMaxScore(){ return(MaxScore); } DOUBLE IRSET::GetMinScore(){ return(MinScore); } OPOBJ& IRSET::operator=(const OPOBJ& OtherIrset) { if (Table) { delete [] Table; } Init(OtherIrset.GetParent()); INT y = OtherIrset.GetTotalEntries(); INT x; IRESULT iresult; for (x=1; x<=y; x++) { OtherIrset.GetEntry(x, &iresult); AddEntry(iresult, 0); } return *this; } OPOBJ* IRSET::Duplicate() const { IRSET* Temp = new IRSET(Parent); *Temp = *((OPOBJ*)this); return (OPOBJ*)Temp; } PIRSET IRSET::Duplicate(){ IRSET * Temp= new IRSET(Parent); *Temp=*((OPOBJ*)this); return(Temp); } void IRSET::MergeEntries(const INT AddHitCounts) { INT x; DOUBLE y; INT CurrentItem=0; IRESULT *NewTable; #ifdef DO_HIGHLIGHTING FCT Fct; #endif if (TotalEntries == 0) return; NewTable= new IRESULT[TotalEntries]; NewTable[0]=Table[0]; for (x=1; x MaxScore) { MaxScore = y; } if (y < MinScore) { MinScore = y; } } else { CurrentItem++; NewTable[CurrentItem]=Table[x]; } } delete [] Table; Table=new IRESULT[CurrentItem+1]; MaxEntries=CurrentItem+1; TotalEntries=CurrentItem+1; for(x=0; xMaxScore) MaxScore=Table[x].GetScore(); } delete [] NewTable; } void IRSET::FastAddEntry(const IRESULT& ResultRecord, const INT AddHitCounts) { DOUBLE x; if (TotalEntries == MaxEntries) Expand(); Table[TotalEntries] = ResultRecord; TotalEntries = TotalEntries + 1; } void IRSET::AddEntry(const IRESULT& ResultRecord, const INT AddHitCounts) { INT x; DOUBLE y; // linear! for (x=0; x MaxScore) MaxScore = y; if (y < MinScore) MinScore = y; return; } } if (TotalEntries == MaxEntries) Expand(); Table[TotalEntries] = ResultRecord; TotalEntries = TotalEntries + 1; } void IRSET::GetEntry(const INT Index, PIRESULT ResultRecord) const { if ( (Index > 0) && (Index <= TotalEntries) ) { *ResultRecord = Table[Index-1]; } } PRSET IRSET::GetRset() { RSET *prset = new RSET(); RESULT result; MDTREC mdtrec; #ifdef DO_HIGHLIGHTING FCT Fct; #endif STRING s; INT x; for (x=0; xGetMainMdt()->GetEntry(Table[x].GetMdtIndex(), &mdtrec); if (mdtrec.GetDeleted() == GDT_FALSE) { mdtrec.GetKey(&s); result.SetKey(s); mdtrec.GetDocumentType(&s); result.SetDocumentType(s); mdtrec.GetPathName(&s); result.SetPathName(s); mdtrec.GetFileName(&s); result.SetFileName(s); result.SetRecordStart(mdtrec.GetLocalRecordStart()); result.SetRecordEnd(mdtrec.GetLocalRecordEnd()); result.SetScore(Table[x].GetScore()); result.SetDbNum(Table[x].GetDbNum()); #ifdef DO_HIGHLIGHTING Table[x].GetHitTable(&Fct); Fct.SubtractOffset(mdtrec.GetGlobalFileStart() + mdtrec.GetLocalRecordStart()); result.SetHitTable(Fct); #endif prset->AddEntry(result); } } return prset; } PRSET IRSET::GetRset(INT4 Start, INT4 End) { RSET *prset = new RSET(); RESULT result; MDTREC mdtrec; #ifdef DO_HIGHLIGHTING FCT Fct; #endif STRING s; INT x; if(End>TotalEntries) End=TotalEntries; for (x=Start; xAddEntry(result); } return prset; } void IRSET::Fill(INT Start, INT End, PRSET set) { RESULT result; MDTREC mdtrec; MDT* ThisMdt; #ifdef DO_HIGHLIGHTING FCT Fct; #endif STRING s; INT x, y; if(End>TotalEntries) End=TotalEntries; // for (y=0, x=Start-1; xGetEntry(x+1,&result); ThisMdt = Table[x].GetMdt(); ThisMdt->GetEntry(Table[x].GetMdtIndex(), &mdtrec); // Parent->GetMainMdt()->GetEntry(Table[x].GetMdtIndex(), &mdtrec); mdtrec.GetKey(&s); result.SetKey(s); mdtrec.GetDocumentType(&s); result.SetDocumentType(s); mdtrec.GetPathName(&s); result.SetPathName(s); mdtrec.GetFileName(&s); result.SetFileName(s); result.SetRecordStart(mdtrec.GetLocalRecordStart()); result.SetRecordEnd(mdtrec.GetLocalRecordEnd()); #ifdef DO_HIGHLIGHTING result.GetHitTable(&Fct); //Fct.ConvertHits(mdtrec); Fct.SubtractOffset(mdtrec.GetGlobalFileStart() + mdtrec.GetLocalRecordStart()); result.SetHitTable(Fct); #endif result.SetScore(Table[x].GetScore()); //set->SetEntry(x+1,result); set->SetEntry(y+1,result); } } void IRSET::Expand() { // Resize(TotalEntries+1000); // Really resize this Resize(TotalEntries*2); } void IRSET::CleanUp() { Resize(TotalEntries); } void IRSET::Resize(const INT Entries) { IRESULT *Temp = new IRESULT[Entries]; INT RecsToCopy; INT x; if (Entries >= TotalEntries) { RecsToCopy = TotalEntries; } else { RecsToCopy = Entries; TotalEntries = Entries; } for (x=0; xAddToHitTable(OtherIresult); #endif match->IncHitCount(OtherIresult.GetHitCount()); y = OtherIresult.GetScore(); match->IncScore(y); if (y > MaxScore) MaxScore = y; if (y < MinScore) MinScore = y; MyResult.FastAddEntry(*match, 0); count++; } x++; } MyResult.SortByIndex(); MyResult.MergeEntries(0); delete [] Table; TotalEntries=count; MaxEntries=MyResult.MaxEntries; if (MyResult.GetMaxScore() > MaxScore) { MaxScore = MyResult.GetMaxScore(); } if (MyResult.GetMinScore() < MinScore) { MinScore = MyResult.GetMinScore(); } Table = MyResult.StealTable(); } #else void IRSET::And(const OPOBJ& OtherIrset) { // not a very fast implementation INT y; GDT_BOOLEAN found; IRESULT OtherIresult; INT x = 0; IRSET *pTempIrset; RSET *Prset,*OtherPrset; RESULT MyResultRecord, OtherResultRecord; STRING MyPath,OtherPath; pTempIrset = (PIRSET) &OtherIrset; OtherPrset = pTempIrset->GetRset(); Prset = GetRset(); while (x < TotalEntries) { found = GDT_FALSE; Prset->GetEntry(x+1, &MyResultRecord); MyResultRecord.GetPathName(&MyPath); for (y=1; y<=OtherIrset.GetTotalEntries(); y++) { OtherPrset->GetEntry(y, &OtherResultRecord); OtherResultRecord.GetPathName(&OtherPath); if (MyPath == OtherPath) { found = GDT_TRUE; break; } } if (!found) { Table[x].SetMdtIndex(0); } x++; } INT in, out, last; out = 0; last = TotalEntries; for (in=0;inGetRset(); Prset = GetRset(); while (x < TotalEntries) { found = GDT_FALSE; Prset->GetEntry(x+1, &MyResultRecord); MyResultRecord.GetPathName(&MyPath); for (y=1; y<=OtherIrset.GetTotalEntries(); y++) { OtherPrset->GetEntry(y, &OtherResultRecord); OtherResultRecord.GetPathName(&OtherPath); if (MyPath.Equals(OtherPath)) { found = GDT_TRUE; break; } } if (found) { Table[x].SetMdtIndex(0); } x++; } INT in, out, last; out = 0; last = TotalEntries; for (in=0;inGetMainMdt()->GetTotalEntries(); DOUBLE InvDocFreq = DocsInDb / DocsInRs; DOUBLE SumSqScores = 0; DOUBLE SqrtSum; DOUBLE Score; for (x=0; xMaxScore) MaxScore=Score; if(ScoreGetEntry(x,&ResultRecord); ResultRecord.GetPathName(&PathName); ResultRecord.GetFileName(&FileName); cerr << " -Result#" << x << " "; cerr << PathName; cerr << FileName << endl; ResultRecord.GetKey(&ResultKey); cerr << " Key="; cerr << ResultKey; cerr << " ["; cerr << ResultRecord.GetRecordStart(); cerr << ", "; cerr << ResultRecord.GetRecordEnd(); cerr << "]" << endl; } } IRSET::~IRSET() { if (Table) delete [] Table; }