# # This file is part of Documancer (http://documancer.sf.net) # # Copyright (C) 2004-2005 Vaclav Slavik # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: pylucene_impl.py,v 1.4 2005/02/04 13:29:51 vaclavslavik Exp $ # # Fulltext indexer using PyLucene -- implementation import sys import PyLucene from __init__ import FulltextIndexer # ugly, I admit class PyLuceneIndexer(FulltextIndexer): """PyLucene-based fulltext indexer - the meal.""" def __init__(self): self.writer = None def getNameAndVersion(self): return 'PyLucene %s using Lucene %s' % ( PyLucene.VERSION, PyLucene.LUCENE_VERSION) def search(self, directory, query): dir = PyLucene.FSDirectory.getDirectory(directory, False) searcher = PyLucene.IndexSearcher(dir) analyzer = PyLucene.StandardAnalyzer() queryobj = PyLucene.QueryParser.parse(query, 'contents', analyzer) hits = searcher.search(queryobj) results = [] for hit in xrange(hits.length()): doc = hits.doc(hit) results.append(FulltextIndexer.Result( doc.get('title'), doc.get('url'), hits.score(hit))) return results def startIndexing(self, directory): store = PyLucene.FSDirectory.getDirectory(directory, True) self.writer = PyLucene.IndexWriter(store, PyLucene.StandardAnalyzer(), True) def indexDocument(self, url, data): doc = PyLucene.Document() doc.add(PyLucene.Field('url', url, True, True, True)) for field in data: doc.add(PyLucene.Field(field, data[field], True, True, True)) self.writer.addDocument(doc) def stopIndexing(self): self.writer.optimize() self.writer.close() self.writer = None