#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 
# Copyright 2003-2006 Zuza Software Foundation
# 
# This file is part of translate.
#
# translate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# translate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with translate; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""converts comma-separated values (.csv) files to gettext .po localization files"""

import sys
from translate.misc import quote
from translate.misc import sparse
from translate.storage import po
from translate.storage import csvl10n

def replacestrings(source, *pairs):
  for orig, new in pairs:
    source = source.replace(orig, new)
  return source

def quotecsvstr(source):
  return '"' + replacestrings(source, ('\\"','"'), ('"','\\"'), ("\\\\'", "\\'"), ('\\\\n', '\\n')) + '"'

def simplify(string):
  return filter(type(string).isalnum, string)
  tokens = sparse.SimpleParser().tokenize(string)
  return " ".join(tokens)

class csv2po:
  """a class that takes translations from a .csv file and puts them in a .po file"""
  def __init__(self, templatepo=None, charset=None, duplicatestyle="keep"):
    """construct the converter..."""
    self.pofile = templatepo
    self.charset = charset
    self.duplicatestyle = duplicatestyle
    if self.pofile is not None:
      self.unmatched = 0
      self.makeindex()

  def makeindex(self):
    """makes indexes required for searching..."""
    self.commentindex = {}
    self.sourceindex = {}
    self.simpleindex = {}
    self.duplicatecomments = []
    for pounit in self.pofile.units:
      commentparts = []
      for comment in pounit.sourcecomments:
        commentparts.append(comment.replace("#:","",1).strip())
      joinedcomment = " ".join(commentparts)
      unquotedid = po.unquotefrompo(pounit.msgid)
      # the definitive way to match is by source comment (joinedcomment)
      if joinedcomment in self.commentindex:
        # unless more than one thing matches...
        self.duplicatecomments.append(joinedcomment)
      else:
        self.commentindex[joinedcomment] = pounit
      # do simpler matching in case things have been mangled...
      simpleid = simplify(unquotedid)
      # but check for duplicates
      if simpleid in self.simpleindex and not (unquotedid in self.sourceindex):
        # keep a list of them...
        self.simpleindex[simpleid].append(pounit)
      else:
        self.simpleindex[simpleid] = [pounit]
      # also match by standard msgid
      self.sourceindex[unquotedid] = pounit
    for comment in self.duplicatecomments:
      if comment in self.commentindex:
        del self.commentindex[comment]

  def convertunit(self, csvunit):
    """converts csv unit to po unit"""
    pounit = po.pounit(encoding="UTF-8")
    if csvunit.comment:
      pounit.addlocation(csvunit.comment)
    pounit.source = csvunit.source
    pounit.target = csvunit.target
    return pounit

  def handlecsvunit(self, csvunit):
    """handles reintegrating a csv unit into the .po file"""
    if len(csvunit.comment.strip()) > 0 and csvunit.comment in self.commentindex:
      pounit = self.commentindex[csvunit.comment]
    elif csvunit.source in self.sourceindex:
      pounit = self.sourceindex[csvunit.source]
    elif simplify(csvunit.source) in self.simpleindex:
      thepolist = self.simpleindex[simplify(csvunit.source)]
      if len(thepolist) > 1:
        csvfilename = getattr(self.csvfile, "filename", "(unknown)")
        matches = "\n  ".join(["possible match: " + po.unquotefrompo(pounit.msgid) for pounit in thepolist])
        print >>sys.stderr, "%s - csv entry not found in pofile, multiple matches found:\n  location\t%s\n  original\t%s\n  translation\t%s\n  %s" % (csvfilename, csvunit.comment, csvunit.source, csvunit.target, matches)
        self.unmatched += 1
        return
      pounit = thepolist[0]
    else:
      csvfilename = getattr(self.csvfile, "filename", "(unknown)")
      print >>sys.stderr, "%s - csv entry not found in pofile:\n  location\t%s\n  original\t%s\n  translation\t%s" % (csvfilename, csvunit.comment, csvunit.source, csvunit.target)
      self.unmatched += 1
      return
    csvtarget = [quotecsvstr(line) for line in csvunit.target.split('\n')]
    if pounit.hasplural():
      # we need to work out whether we matched the singular or the plural
      singularid = po.unquotefrompo(pounit.msgid)
      pluralid = po.unquotefrompo(pounit.msgid_plural)
      if csvunit.source == singularid:
        pounit.msgstr[0] = csvtarget
      elif csvunit.source == pluralid:
        pounit.msgstr[1] = csvtarget
      elif simplify(csvunit.source) == simplify(singularid):
        pounit.msgstr[0] = csvtarget
      elif simplify(csvunit.source) == simplify(pluralid):
        pounit.msgstr[1] = csvtarget
      else:
        print >>sys.stderr, "couldn't work out singular or plural: %r, %r, %r" %  \
          (csvunit.source, singularid, pluralid)
        self.unmatched += 1
        return
    else:
      pounit.msgstr = csvtarget

  def convertfile(self, thecsvfile):
    """converts a csvfile to a pofile, and returns it. uses templatepo if given at construction"""
    self.csvfile = thecsvfile
    if self.pofile is None:
      self.pofile = po.pofile()
      mergemode = False
    else:
      mergemode = True
    if self.pofile.units and self.pofile.units[0].isheader():
      headerpo = self.pofile.units[0]
      headerpo.msgstr = [line.replace("CHARSET", "UTF-8").replace("ENCODING", "8bit") for line in headerpo.msgstr]
    else:
      headerpo = self.pofile.makeheader(charset="UTF-8", encoding="8bit")
    headerpo.othercomments.append("# extracted from %s\n" % self.csvfile.filename)
    mightbeheader = True
    for csvunit in self.csvfile.units:
      if self.charset is not None:
        csvunit.source = csvunit.source.decode(self.charset)
        csvunit.target = csvunit.target.decode(self.charset)
      if mightbeheader:
        # ignore typical header strings...
        mightbeheader = False
        if [item.strip().lower() for item in csvunit.comment, csvunit.source, csvunit.target] == \
           ["comment", "original", "translation"]:
          continue
        if len(csvunit.comment.strip()) == 0 and csvunit.source.find("Content-Type:") != -1:
          continue
      if mergemode:
        self.handlecsvunit(csvunit)
      else:
        pounit = self.convertunit(csvunit)
        self.pofile.units.append(pounit)
    self.pofile.removeduplicates(self.duplicatestyle)
    return self.pofile

  def getmissing(self):
    """get the number of missing translations..."""
    # TODO: work out how to print out the following if in verbose mode
    missing = 0
    for pounit in self.pofile.units:
      if pounit.isblankmsgstr():
        missing += 1

def convertcsv(inputfile, outputfile, templatefile, charset=None, columnorder=None, duplicatestyle="msgctxt"):
  """reads in inputfile using csvl10n, converts using csv2po, writes to outputfile"""
  inputcsv = csvl10n.csvfile(inputfile, fieldnames=columnorder)
  if templatefile is None:
    convertor = csv2po(charset=charset, duplicatestyle=duplicatestyle)
  else:
    templatepo = po.pofile(templatefile)
    convertor = csv2po(templatepo, charset=charset, duplicatestyle=duplicatestyle)
  outputpo = convertor.convertfile(inputcsv)
  if outputpo.isempty():
    return 0
  outputposrc = str(outputpo)
  outputfile.write(outputposrc)
  return 1

def main(argv=None):
  from translate.convert import convert
  formats = {("csv", "po"): ("po", convertcsv), ("csv", "pot"): ("po", convertcsv), 
             ("csv", None): ("po", convertcsv)}
  parser = convert.ConvertOptionParser(formats, usetemplates=True, description=__doc__)
  parser.add_option("", "--charset", dest="charset", default=None,
    help="set charset to decode from csv files", metavar="CHARSET")
  parser.add_option("", "--columnorder", dest="columnorder", default=None,
    help="specify the order and position of columns (source,source,target)")
  parser.add_duplicates_option()
  parser.passthrough.append("charset")
  parser.passthrough.append("columnorder")
  parser.run(argv)


if __name__ == '__main__':
    main()