/* $Id: Iutil.cxx,v 1.25 2000/09/06 18:21:26 cnidr Exp $ */ /************************************************************************ Copyright Notice Copyright (c) MCNC, Clearinghouse for Networked Information Discovery and Retrieval, 1994. Permission to use, copy, modify, distribute, and sell this software and its documentation, in whole or in part, for any purpose is hereby granted without fee, provided that 1. The above copyright notice and this permission notice appear in all copies of the software and related documentation. Notices of copyright and/or attribution which appear at the beginning of any file included in this distribution must remain intact. 2. Users of this software agree to make their best efforts (a) to return to MCNC any improvements or extensions that they make, so that these may be included in future releases; and (b) to inform MCNC/CNIDR of noteworthy uses of this software. 3. The names of MCNC and Clearinghouse for Networked Information Discovery and Retrieval may not be used in any advertising or publicity relating to the software without the specific, prior written permission of MCNC/CNIDR. THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL MCNC/CNIDR BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ************************************************************************/ /*@@@ File: Iutil.cxx Version: 1.02 $Revision: 1.25 $ Description: Command-line utilities for Isearch databases Author: Nassib Nassar, nrn@cnidr.org @@@*/ #define GILS_DIRNAME "gils.out" #include #include #ifdef UNIX #include #endif #include #include #include #include #include #if defined(_MSDOS) || defined(_WIN32) #include #endif #include #include "isearch.hxx" #include "common.hxx" #include "dtreg.hxx" #include "rcache.hxx" #include "index.hxx" #include "fprec.hxx" #include "fpt.hxx" #include "registry.hxx" #include "idb.hxx" class IDBC : public IDB { public: IDBC(const STRING& NewPathName, const STRING& NewFileName, const STRLIST& NewDocTypeOptions) : IDB(NewPathName, NewFileName, NewDocTypeOptions) {}; protected: void IndexingStatus(const INT StatusMessage, const STRING *FileName, const INT WordCount) const {}; }; typedef IDBC* PIDBC; void cleanupAfterKillAll(const STRING& db) { // for some reason these files are not getting deleted // by KillAll() or are being written out again, // probably in ~IDB(). #if !defined(_MSDOS) && !defined (WIN32) char* dbs = db.NewCString(); char *s; s = new char[db.GetLength() + 16]; sprintf(s, "rm -f %s.mdt", dbs); system(s); sprintf(s, "rm -f %s.num", dbs); system(s); delete [] dbs; delete [] s; #endif } int main(int argc, char** argv) { fprintf(stderr,"Iutil v%s\n", IsearchVersion); if (argc < 2) { fprintf(stderr,"Copyright (c) 1995-2000 MCNC/CNIDR and A/WWW Enterprises\n"); fprintf(stderr,"-d (X) # Use (X) as the root name for database files.\n"); fprintf(stderr,"-V # Print the version number.\n"); fprintf(stderr,"-vi # View summary information about the database.\n"); fprintf(stderr,"-vf # View list of fields defined in the database.\n"); fprintf(stderr,"-v # View list of documents in the database.\n"); fprintf(stderr,"-newpaths # Prompt for new pathnames for files.\n"); fprintf(stderr,"-del # Mark individual documents (by key) to be deleted from database.\n"); fprintf(stderr,"-undel # Unmark documents (by key) that were marked for deletion.\n"); #ifdef DICTIONARY fprintf(stderr,"-dict # Generate a search dictionary for the index.\n"); fprintf(stderr,"-centroid # Create a centroid document for the database.\n"); #endif fprintf(stderr,"-c # Cleanup database by removing unused data (useful after -del).\n"); // cout << " [-collapse] // Collapse last two index files."<= argc) { fprintf(stderr,"ERROR: No option specified after -o.\n"); RETURN_ZERO; } STRING S; S = argv[x]; DocTypeOptions.AddEntry(S); LastUsed = x; } if(Flag.Equals("-optimize")){ Optimize=1; LastUsed=x; } if(Flag.Equals("-collapse")){ Collapse=1; LastUsed=x; } if (Flag.Equals("-d")) { if (++x >= argc) { fprintf(stderr,"ERROR: No database name specified after -d.\n"); RETURN_ZERO; } DBName = argv[x]; LastUsed = x; } if (Flag.Equals("-meta")) { if (++x >= argc) { fprintf(stderr,"ERROR: No file name specified after -meta.\n"); RETURN_ZERO; } MetaFn = argv[x]; LastUsed = x; } if (Flag.Equals("-replace")) { if (++x >= argc) { fprintf(stderr,"ERROR: No file name specified after -replace.\n"); RETURN_ZERO; } Replace = argv[x]; LastUsed = x; } if (Flag.Equals("-gt")) { if (++x >= argc) { fprintf(stderr,"ERROR: No document type specified after -gt.\n"); fprintf(stderr," Use -gt0 if you want no document type.\n"); RETURN_ZERO; } GlobalDoctype = argv[x]; SetGlobalDoctype = 1; LastUsed = x; } if (Flag.Equals("-m")) { if (++x >= argc) { fprintf(stderr,"ERROR: No memory size specified after -m.\n"); RETURN_ZERO; } OptimizerMemory = atoi(argv[x]); printf("%i MB Memory Selected\n", OptimizerMemory); // OptimizerMemory=OptimizerMemory*1024*1024; LastUsed = x; } if (Flag.Equals("-gt0")) { GlobalDoctype = ""; SetGlobalDoctype = 1; LastUsed = x; } if (Flag.Equals("-debug")) { DebugFlag = 1; if (x+1 < argc) { Temp = argv[x+1]; Temp.GetCString(Cwd, 256); if (isdigit(Cwd[0])) { Skip = Temp.GetInt(); x++; } } LastUsed = x; } #ifdef DICTIONARY if (Flag.Equals("-dict")) { DictGen = 1; LastUsed = x; } if (Flag.Equals("-centroid")) { DoCentroid = 1; LastUsed = x; } #endif if (Flag.Equals("-erase")) { EraseAll = 1; LastUsed = x; } if (Flag.Equals("-newpaths")) { PathChange = 1; LastUsed = x; } if (Flag.Equals("-v")) { View = 1; LastUsed = x; } if (Flag.Equals("-vf")) { ViewFields = 1; LastUsed = x; } if (Flag.Equals("-vi")) { ViewInfo = 1; LastUsed = x; } if (Flag.Equals("-del")) { DeleteByKey = 1; LastUsed = x; } if (Flag.Equals("-undel")) { UndeleteByKey = 1; LastUsed = x; } if (Flag.Equals("-V")) { RETURN_ZERO; } if (Flag.Equals("-c")) { Cleanup = 1; LastUsed = x; } if (Flag.Equals("-gilsdocs")) { Gils = 1; LastUsed = x; } if (Flag.Equals("-gilsindex")) { GilsIndex = 1; LastUsed = x; } if (Flag.Equals("-state")) { DbState = 1; LastUsed = x; } if (Flag.Equals("-urn")) { Urn = 1; LastUsed = x; } } x++; } if (DBName.Equals("")) { DBName = IsearchDefaultDbName; } x = LastUsed + 1; // we need to prevent bad combinations of options, such as -erase and -del together PIDBC pdb; STRING DBPathName, DBFileName; if (!DBExists(DBName)) { fprintf(stderr,"Database "); DBName.Print(stderr); fprintf(stderr," does not exist.\n"); RETURN_ZERO; } struct stat info; STRING IndexFile; DBPathName = DBName; DBFileName = DBName; RemovePath(&DBFileName); RemoveFileName(&DBPathName); pdb = new IDBC(DBPathName, DBFileName, DocTypeOptions); OptimizerMemory=OptimizerMemory*1024*1024; // in bytes if (DebugFlag) { pdb->DebugModeOn(); } if (Optimize) { IndexFile = DBName; IndexFile.Cat(".inx.1"); PCHR CheckName; CheckName = IndexFile.NewCString(); if (stat(CheckName, &info) !=0) { fprintf(stderr,"Database "); DBName.Print(stderr); fprintf(stderr," does not need optimizing.\n"); delete CheckName; RETURN_ZERO; } else { delete CheckName; pdb->MergeIndexFiles(OptimizerMemory); } } if (Collapse) { IndexFile = DBName; IndexFile.Cat(".inx.1"); PCHR CheckName; CheckName = IndexFile.NewCString(); if (stat(CheckName, &info) !=0) { fprintf(stderr,"Database "); DBName.Print(stderr); fprintf(stderr," cannot be collapsed.\n"); delete CheckName; RETURN_ZERO; } else { delete CheckName; pdb->CollapseIndexFiles(OptimizerMemory); } } if (ViewInfo) { STRING S; INT x, y, z; pdb->GetDbFileStem(&S); printf("Database name: "); S.Print(); printf("\n"); pdb->GetGlobalDocType(&S); if (S == "") { S = "(none)"; } printf("Global document type: "); S.Print(); printf("\n"); y = pdb->GetTotalRecords(); printf("Total number of documents: %i\n", y); z = 0; for (x=1; x<=y; x++) { if (pdb->GetDocumentDeleted(x)) { z++; } } printf("Documents marked as deleted: %i\n", z); } if (DbState) { INT4 DbState = pdb->GetDbState(); switch (DbState) { case IsearchDbStateReady: printf("ready\n"); break; case IsearchDbStateBusy: printf("busy\n"); break; case IsearchDbStateInvalid: printf("invalid\n"); break; default: printf("unknown\n"); } delete pdb; RETURN_ZERO; } if (!pdb->IsDbCompatible()) { fprintf(stderr,"The specified database is not compatible with this version of Iutil.\n"); fprintf(stderr,"Please use matching versions of Iindex, Isearch, and Iutil.\n"); delete pdb; RETURN_ZERO; } if (SetGlobalDoctype) { pdb->SetGlobalDocType(GlobalDoctype); if (GlobalDoctype == "") { printf("Global document type cleared.\n"); } else { GlobalDoctype.UpperCase(); printf("Global document type set to "); GlobalDoctype.Print(); printf(".\n"); } } if (EraseAll) { printf("Erasing database files ...\n"); pdb->KillAll(); delete pdb; cleanupAfterKillAll(DBName); printf("Database files erased.\n"); RETURN_ZERO; } if (PathChange) { printf("Scanning database for file paths ...\n"); printf("Enter new path or to leave unchanged:\n"); INT x, y; RECORD Record; PCHR p; STRING OldPath, NewPath; STRLIST PathList; CHR s[512]; y = pdb->GetTotalRecords(); for (x=1; x<=y; x++) { pdb->GetDocumentInfo(x, &Record); Record.GetPathName(&OldPath); p = OldPath.NewCString(); PathList.GetValue(p, &NewPath); delete [] p; if (NewPath == "") { printf("Path=["); OldPath.Print(); printf("]\n"); printf(" > "); //gets(s); fgets(s,511,stdin); INT slen; slen = strlen(s); if ((slen > 0) && (s[slen-1] == '\n')) { s[slen-1] = '\0'; //GCMD chop off '\n' from the end. } if (s[0] == '\0') { NewPath = OldPath; } else { NewPath = s; } Record.SetPathName(NewPath); OldPath += "="; OldPath += NewPath; PathList.AddEntry(OldPath); } else { Record.SetPathName(NewPath); } pdb->SetDocumentInfo(x, Record); } printf("Done.\n"); } /* // Replaced with new versions from J. Wehle if (DeleteByKey) { cout << "Marking documents as deleted ..." << endl; INT x, z; INT y = 0; STRING S; z = WordList.GetTotalEntries(); for (x=1; x<=z; x++) { WordList.GetEntry(x, &S); y += pdb->DeleteByKey(S); } cout << y << " document(s) marked as deleted." << endl; } if (UndeleteByKey) { cout << "Removing deletion mark from documents ..." << endl; INT x, z; INT y = 0; STRING S; z = WordList.GetTotalEntries(); for (x=1; x<=z; x++) { WordList.GetEntry(x, &S); y += pdb->UndeleteByKey(S); } cout << "Deletion mark removed for " << y << " document(s)." << endl; } */ if (DeleteByKey) { printf("Marking documents as deleted ...\n"); INT NumWords = argc - x; INT y = 0; for (INT z = 0; z < NumWords; z++) { y += pdb->DeleteByKey(argv[z+x]); } printf("%i document(s) marked as deleted.\n", y); } if (UndeleteByKey) { printf("Removing deletion mark from documents ...\n"); INT NumWords = argc - x; INT y = 0; for (INT z = 0; z < NumWords; z++) { y += pdb->UndeleteByKey(argv[z+x]); } printf("Deletion mark removed for %i document(s).\n", y); } if (Cleanup) { printf("Cleaning up database (removing deleted documents) ...\n"); INT x = pdb->CleanupDb(); printf("%i document(s) were removed.\n", x); } #ifdef DICTIONARY if (DictGen) { printf("Creating dictionary ...\n"); pdb->CreateDictionary(); } if (DoCentroid) { printf("Generating centroid document ...\n"); pdb->CreateCentroid(); } #endif if (ViewFields) { printf("The following fields are defined in this database:\n"); DFDT Dfdt; DFD Dfd; STRING S; pdb->GetDfdt(&Dfdt); INT y = Dfdt.GetTotalEntries(); INT x; for (x=1; x<=y; x++) { Dfdt.GetEntry(x, &Dfd); Dfd.GetFieldName(&S); S.Print(); printf("\n"); } } if (View) { printf("DocType: [Key] (Start - End) File\n"); printf("(* indicates deleted record)\n"); RECORD Record; STRING S; INT y = pdb->GetTotalRecords(); INT x; for (x=1; x<=y; x++) { pdb->GetDocumentInfo(x, &Record); Record.GetDocumentType(&S); if (S.Equals("")) { printf("(none)"); } else { S.Print(); } printf(": ["); Record.GetKey(&S); S.Print(); printf("] "); printf("(%i - %i) ", Record.GetRecordStart(), Record.GetRecordEnd()); Record.GetFullFileName(&S); S.Print(); if (pdb->GetDocumentDeleted(x)) { printf(" *"); } printf("\n"); } } if (Urn) { RECORD Record; STRING S,FullDbName; INT y = pdb->GetTotalRecords(); INT x; FullDbName = DBPathName; FullDbName.Cat(DBFileName); for (x=1; x<=y; x++) { pdb->GetDocumentInfo(x, &Record); FullDbName.Print(); printf("/"); Record.GetKey(&S); S.Print(); printf("\t"); Record.GetFullFileName(&S); S.Print(); printf("\n"); } } if (Gils) { printf("Generating GILS records ...\n"); printf("Files will be placed in %s/ ...\n", GILS_DIRNAME); printf("Any default values in "); MetaFn.Print(); printf(" will be included.\n"); // parse defaults file REGISTRY* metadef = parseMetaDefaults(MetaFn); // generate gils records #ifdef UNIX mkdir(GILS_DIRNAME, 0777); #else mkdir(GILS_DIRNAME); #endif INT t = pdb->GetTotalRecords(); INT x; RECORD record; STRING doctype; DOCTYPE* dtp; // STRING metadata; FILE* fp; STRING fn; STRING recfn; char* p; REGISTRY* meta; for (x = 1; x <= t; x++) { pdb->GetDocumentInfo(x, &record); record.GetDocumentType(&doctype); dtp = pdb->GetDocTypePtr(doctype); meta = dtp->GetMetadata(record, "gils", metadef); fn = GILS_DIRNAME; fn += "/"; record.GetFileName(&recfn); fn += recfn; fn += ".xml"; p = fn.NewCString(); fp = fopen(p, "w"); delete [] p; if (fp) { fprintf(fp, "\n"); fprintf(fp, "\n"); STRLIST position; meta->PrintSgml(fp, position); fclose(fp); } } } if (GilsIndex) { printf("Creating GILS metadata...\n"); STRING GilsBuffer,GilsFile; MakeDbGilsRec(pdb, DBPathName, DBFileName,&GilsBuffer); GilsFile = DBName; GilsFile.Cat(".gils"); GilsBuffer.WriteFile(GilsFile); FILE* fp = fopen(GilsFile, "a"); pdb->WriteCentroid(fp); fprintf(fp, "\n"); fclose(fp); } delete pdb; if ( ! Replace.Equals("") ) { STRING source, dest; source = DBName; dest = Replace; ExpandFileSpec(&source); ExpandFileSpec(&dest); if (source.Equals(dest)) { printf("Both databases are the same; replace aborted.\n"); return 1; } printf("Replacing "); dest.Print(); printf(" with "); source.Print(); printf(" ...\n"); // do the replacement // remove the database to be replaced STRING path, file; path = dest; file = dest; RemoveFileName(&path); RemovePath(&file); IDB* idb = new IDB(path, file); idb->KillAll(); delete idb; cleanupAfterKillAll(dest); // move the new database // for now we move DB.* since there is no good way to // get a list of files from IDB. RemoveFileName(&dest); #if !defined(_MSDOS) && !defined (WIN32) char *s; s = new char [source.GetLength() + dest.GetLength() + 32]; char* s1 = source.NewCString(); char* s2 = dest.NewCString(); sprintf(s, "mv -f %s.* %s.", s1, s2); system(s); delete [] s; #endif } RETURN_ZERO; }