#
#  This file is part of Documancer (http://documancer.sf.net)
#
#  Copyright (C) 2002,2003,2004 Vaclav Slavik
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License version 2 as
#  published by the Free Software Foundation.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#  $Id: server.py,v 1.11 2004/12/19 22:25:16 vaclavslavik Exp $
#
#  HTTP server serving all the documents
#


import sys, BaseHTTPServer, string, re, urllib
from HTMLParser import HTMLParser, HTMLParseError
from threading import *

import utils, book, providers

serverLock = Lock()

def runServer(inBackground):
    server_address = ('', utils.getServerPort())
    httpd = DocumancerHTTPServer(server_address)
    
    if inBackground:
        class ServerThread(Thread):
            def run(self):
                self.httpd.serve_forever()

        thread = ServerThread()
        thread.httpd = httpd
        thread.setDaemon(1)
        thread.start()
    else:
        httpd.serve_forever()
    
class DocumancerHTTPServer(BaseHTTPServer.HTTPServer):
    def __init__(self, server_addr):
        BaseHTTPServer.HTTPServer.__init__(self, server_addr, RequestHandler)

    def handle_error(self, request, client_address):
        pass

# NB: this is an ugly hack -- we need information about the number of
#     highlighted hits so that we can navigate among them, but there's no
#     way to pass this information from HTTP request (FIXME: maybe by some
#     JavaScript hack?) through Mozilla to Documancer, so we keep dictionary
#     of fulfilled highlighting requests and look into it from Documancer
#     code.
queryHitsCounts = {}
    
class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):

    def send_document(self, contents, mime):
        try:
            self.send_response(200)
            self.send_header('Content-Type', mime)
            self.send_header('Content-Length', len(contents))
            self.end_headers()
            # we can't send the data in one chunk because TCP buffer may be
            # too small for them and e.g. Windows doesn't like it:
            size = len(contents)
            pos = 0
            CHUNKSIZE = 1024
            while pos < size:
                self.wfile.write(contents[pos:pos+CHUNKSIZE])
                pos += CHUNKSIZE
        except: 
            # strangely, "except IOError" causes a problem: Documancer's server
            # thread hangs and stops responding; but IOError exception is *not*
            # catched
            pass

    def log_message(self, *args): pass

    def do_GET(self):
        if self.client_address[0] != '127.0.0.1':
            self.send_error(403, 'Request forbidden')
            return
            
        serverLock.acquire()
        urlAndParams = self.path.split("?")
        if len(urlAndParams) == 2 and urlAndParams[1][:6] == "query=":
            query = urllib.unquote(urlAndParams[1][6:].replace("+", " "))
        else:
            query = None
        parts = urlAndParams[0].split("/")
        if len(parts) < 3:
            self.send_error(404, "Invalid URL")
            serverLock.release()
            return

        bookId = utils.demangleBookName(parts[1])
        url = urllib.unquote(string.join(parts[2:], "/"))

        if bookId not in book.books.keys():
            self.send_error(404, "Unknown book")
            serverLock.release()
            return
        bookObj = book.books[bookId]
        prov = providers.providers[bookObj.provider]
        
        reply = prov.serve(bookObj, url)
        if reply != None:
            data,mime = reply
            if query != None:
                global queryHitsCounts
                data, hitsCount = highlightQuery(data, query, mime)
                key = 'http://localhost:%i%s' % (utils.getServerPort(),
                                                 self.path)
                queryHitsCounts[key] = hitsCount
            self.send_document(data, mime)
        else:
            self.send_error(404, "File not found (%s)" % url)
            serverLock.release()
            return

        serverLock.release()


class HighlightingParser(HTMLParser):
    def __init__(self, words):
        HTMLParser.__init__(self)
        self.words = words
        self.result = []
        # Construct regex pattern that matches these words:
        self.regex = re.compile(r"\b(%s)\b" %
                                reduce(lambda a,b: "%s|%s" % (a,b), words),
                                re.IGNORECASE)
        self.match_number = -1
        self.do_highlight = 1
    
    def handle_starttag(self, tag, attrs):
        if len(attrs) == 0:
            self.result.append('<%s>' % tag)
        else:
            self.result.append(
                '<%s %s>' % (tag,
                          ' '.join(['%s="%s"' % (x[0],x[1]) for x in attrs])))
        if tag == 'title' or tag == 'script':
            self.do_highlight = 0
    
    def handle_startendtag(self, tag, attrs):
        if len(attrs) == 0:
            self.result.append('<%s/>' % tag)
        else:
            self.result.append(
                '<%s %s/>' % (tag,
                          ' '.join(['%s="%s"' % (x[0],x[1]) for x in attrs])))

    def handle_endtag(self, tag):
        self.result.append('</%s>' % tag)
        if tag == 'title' or tag == 'script':
            self.do_highlight = 1

    def handle_data(self, data):
        if self.do_highlight:
            self.result.append(self.regex.sub(self._replacement, data))
        else:
            self.result.append(data)
    
    def _replacement(self, match):
        # Adds some markup around a regex match
        self.match_number += 1
        return ('<span id="search_result_%i" style="background: yellow">%s</span>' % (self.match_number, match.group(0)))

    def handle_charref(self, name):
        self.result.append('&#%s;' % name)
    
    def handle_entityref(self, name):
        self.result.append('&%s;' % name)

    def get_result(self):
        return ''.join(self.result)

    def get_hits_count(self):
        return self.match_number + 1


def highlightQuery(html_text, query, mime):
    """ Hight matches in HTML markup with yellow background."""

    if mime != 'text/html':
        return (html_text, 0)

    # Only highlight real words, not operators:
    words = map(lambda x: re.escape(x),
                filter((lambda x: len(x) > 0 and x not in ["and", "or", "not"]),
                   query.split(' '))
               )
    if len(words) == 0:
        return (html_text, 0)

    try:
        hp = HighlightingParser(words)
        hp.feed(html_text)
        hp.close()
    except HTMLParseError:
        return (html_text, 0)
    return (hp.get_result(), hp.get_hits_count())