# # This file is part of Documancer (http://documancer.sf.net) # # Copyright (C) 2004-2005 Vaclav Slavik # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: __init__.py,v 1.5 2005/02/05 10:37:06 vaclavslavik Exp $ # # Interface that fulltext indexers must implement # class FulltextIndexer: """Interface to fulltext indexer.""" def getNameAndVersion(self): """Returns human-readable name + version of the indexer (e.g. 'PyLucene 0.9.3 with Lucene 1.4.3').""" raise NotImplementedError class Result: def __init__(self, title, url, score): self.title = title self.url = url self.score = score def search(self, directory, query): """ Searches for Lucene query 'query' using fulltext index in given directory. Returns array of results. Items of the array are Result objects with three fields: title title of the document url URL of the document score document's score (0..1 float) """ raise NotImplementedError def startIndexing(self, directory): """Starts indexing process. Passed directory is where the index should be stored, it is guaranteed to exist prior to call to startIndexing.""" raise NotImplementedError def indexDocument(self, url, data): """ Stores document in the index. 'url' is the URL to remember and 'data' is dictionary with data to index. The dictionary values are strings and keys are categories of contained data. Possible categories are: 'title' document's title 'headings' headings in the document (e.g.