#
#  This file is part of Documancer (http://documancer.sf.net)
#
#  Copyright (C) 2004-2005 Vaclav Slavik
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License version 2 as
#  published by the Free Software Foundation.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#  $Id: __init__.py,v 1.5 2005/02/05 10:37:06 vaclavslavik Exp $
#
#  Interface that fulltext indexers must implement
#

class FulltextIndexer:
    """Interface to fulltext indexer."""

    def getNameAndVersion(self):
        """Returns human-readable name + version of the indexer
           (e.g. 'PyLucene 0.9.3 with Lucene 1.4.3')."""
        raise NotImplementedError


    class Result:
        def __init__(self, title, url, score):
            self.title = title
            self.url = url
            self.score = score

    def search(self, directory, query):
        """
        Searches for Lucene query 'query' using fulltext index in given
        directory. Returns array of results.
        Items of the array are Result objects with three fields:

          title          title of the document
          url            URL of the document
          score          document's score (0..1 float)
        """
        raise NotImplementedError
    
    def startIndexing(self, directory):
        """Starts indexing process. Passed directory is where the index
           should be stored, it is guaranteed to exist prior to call to
           startIndexing."""
        raise NotImplementedError
    
    def indexDocument(self, url, data):
        """
        Stores document in the index. 'url' is the URL to remember  and 'data'
        is dictionary with data to index. The dictionary values are strings and
        keys are categories of contained data. Possible categories are:

          'title'        document's title
          'headings'     headings in the document (e.g. <h1> tags)
          'subheadings'  subheadings
          'emphasized'   emphasized text (bold, italic, underlined, ...)
          'contents'     text of the document

        Not all of the keys have to be present.
        """
        raise NotImplementedError
    
    def stopIndexing(self):
        """Stops indexing process started by previous call to startIndexing."""
        raise NotImplementedError


def createBest():
    """Creates instance of best implementation of indexer available
       (one of in-process PyLucene, out-of-process PyLucene and Lucene
       ran using Java)."""
    # try PyLucene first:
    try:
        import pylucene
        from _external import LaunchingError
        try:
            return pylucene.PyLuceneIndexer()
        except LaunchingError:
            # no PyLucene or error starting new Python instance
            pass
    except ImportError:
        # no PyLucene
        pass

    # no luck with PyLucene, try Java (it's the last option, so pass any
    # exception that may occur to the caller):
    import java
    return java.LuceneIndexer()