#!/usr/bin/env python from translate.convert import html2po from translate.convert import po2html from translate.convert import test_convert from translate.misc import wStringIO from translate.storage import po from translate.storage import html class TestHTML2PO: def html2po(self, markup): """Helper to convert html to po without a file.""" inputfile = wStringIO.StringIO(markup) convertor = html2po.html2po() outputpo = convertor.convertfile(inputfile, "test", False, False) return outputpo def po2html(self, posource, htmltemplate): """Helper to convert po to html without a file.""" inputfile = wStringIO.StringIO(posource) outputfile = wStringIO.StringIO() templatefile = wStringIO.StringIO(htmltemplate) assert po2html.converthtml(inputfile, outputfile, templatefile) return outputfile.getvalue() def countunits(self, pofile, expected): """helper to check that we got the expected number of messages""" actual = len(pofile.units) if actual > 0: if pofile.units[0].isheader(): actual = actual - 1 print pofile assert actual == expected def compareunit(self, pofile, unitnumber, expected): """helper to validate a PO message""" if not pofile.units[0].isheader(): unitnumber = unitnumber - 1 print 'unit source: ' + str(pofile.units[unitnumber].source) + '|' print 'expected: ' + expected.encode('utf-8') + '|' assert unicode(pofile.units[unitnumber].source) == unicode(expected) def check_single(self, markup, itemtext): """checks that converting this markup produces a single element with value itemtext""" pofile = self.html2po(markup) self.countunits(pofile, 1) self.compareunit(pofile, 1, itemtext) def check_null(self, markup): """checks that converting this markup produces no elements""" pofile = self.html2po(markup) self.countunits(pofile, 0) def test_htmllang(self): """test to ensure that we no longer use the lang attribure""" markup = '''My title''' pofile = self.html2po(markup) self.countunits(pofile, 1) # Check that the first item is the not <head> self.compareunit(pofile, 1, "My title") def test_title(self): """test that we can extract the <title> tag""" self.check_single("<html><head><title>My title", "My title") def test_title_with_linebreak(self): """Test a linebreak in the tag""" htmltext = '''<html> <head> <title>My title ''' self.check_single(htmltext, "My title") def test_meta(self): """Test that we can extract certain info from .""" self.check_single('''''', "these are keywords") def test_tag_p(self): """test that we can extract the

tag""" self.check_single("

A paragraph.

", "A paragraph.") markup = "

First line.
Second line.

" pofile = self.html2po(markup) self.compareunit(pofile, 1, "First line.
Second line.") def test_tag_p_with_linebreak(self): """Test newlines within the

tag.""" htmltext = '''

A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.

''' self.check_single(htmltext, "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.") markup = "

First\nline.
Second\nline.

" pofile = self.html2po(markup) self.compareunit(pofile, 1, "First line.
Second line.") def test_tag_div(self): """test that we can extract the
tag""" self.check_single("
A paragraph.
", "A paragraph.") markup = "
First line.
Second line.
" pofile = self.html2po(markup) self.compareunit(pofile, 1, "First line.
Second line.") def test_tag_div_with_linebreaks(self): """Test linebreaks within a
tag.""" htmltext = '''
A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.
''' self.check_single(htmltext, "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.") markup = "
First\nline.
Second\nline.
" pofile = self.html2po(markup) self.compareunit(pofile, 1, "First line.
Second line.") def test_tag_a(self): """test that we can extract the tag""" self.check_single('

A paragraph with hyperlink.

', 'A paragraph with hyperlink.') def test_tag_a_with_linebreak(self): """Test that we can extract the tag with newlines in it.""" htmltext = '''

A paragraph with hyperlink and newlines.

''' self.check_single(htmltext, 'A paragraph with hyperlink and newlines.') def test_tag_img(self): """Test that we can extract the alt attribute from the tag.""" self.check_single('''A picture''', "A picture") def test_img_empty(self): """Test that we can extract the alt attribute from the tag.""" htmlsource = '''''' self.check_null(htmlsource) def test_tag_table_summary(self): """Test that we can extract the summary attribute.""" self.check_single( '''
''', "Table summary") def test_table_simple(self): """Test that we can fully extract a simple table.""" markup = '''
Heading OneHeading Two
OneTwo
''' pofile = self.html2po(markup) self.countunits(pofile, 4) self.compareunit(pofile, 1, "Heading One") self.compareunit(pofile, 2, "Heading Two") self.compareunit(pofile, 3, "One") self.compareunit(pofile, 4, "Two") def test_table_complex(self): markup = '''
A caption
Heading OneHeading Two
Foot OneFoot Two
OneTwo
''' pofile = self.html2po(markup) self.countunits(pofile, 9) self.compareunit(pofile, 1, "This is the summary") self.compareunit(pofile, 2, "A caption") self.compareunit(pofile, 3, "Head 1") self.compareunit(pofile, 4, "Heading One") self.compareunit(pofile, 5, "Heading Two") self.compareunit(pofile, 6, "Foot One") self.compareunit(pofile, 7, "Foot Two") self.compareunit(pofile, 8, "One") self.compareunit(pofile, 9, "Two") def test_table_empty(self): """Test that we ignore tables that are empty. A table is deemed empty if it has no translatable content. """ self.check_null('''
''') self.check_null('''
 
''') self.check_null('''
''') def test_address(self): """Test to see if the address element is extracted""" self.check_single("
My address
", "My address") def test_headings(self): """Test to see if the h* elements are extracted""" markup = "

Heading One

Heading Two

Heading Three

Heading Four

Heading Five
Heading Six
" pofile = self.html2po(markup) self.countunits(pofile, 6) self.compareunit(pofile, 1, "Heading One") self.compareunit(pofile, 2, "Heading Two") self.compareunit(pofile, 3, "Heading Three") self.compareunit(pofile, 4, "Heading Four") self.compareunit(pofile, 5, "Heading Five") self.compareunit(pofile, 6, "Heading Six") def test_headings_with_linebreaks(self): """Test to see if h* elements with newlines can be extracted""" markup = "

Heading\nOne

Heading\nTwo

Heading\nThree

Heading\nFour

Heading\nFive
Heading\nSix
" pofile = self.html2po(markup) self.countunits(pofile, 6) self.compareunit(pofile, 1, "Heading One") self.compareunit(pofile, 2, "Heading Two") self.compareunit(pofile, 3, "Heading Three") self.compareunit(pofile, 4, "Heading Four") self.compareunit(pofile, 5, "Heading Five") self.compareunit(pofile, 6, "Heading Six") def test_dt(self): """Test to see if the definition list title (dt) element is extracted""" self.check_single("
Definition List Item Title
", "Definition List Item Title") def test_dd(self): """Test to see if the definition list description (dd) element is extracted""" self.check_single("
Definition List Item Description
", "Definition List Item Description") def test_span(self): """test to check that we don't double extract a span item""" self.check_single("

You are a Spanish sentence.

", "You are a Spanish sentence.") def test_ul(self): """Test to see if the list item
  • is exracted""" markup = "
    • Unordered One
    • Unordered Two
    1. Ordered One
    2. Ordered Two
    " pofile = self.html2po(markup) self.countunits(pofile, 4) self.compareunit(pofile, 1, "Unordered One") self.compareunit(pofile, 2, "Unordered Two") self.compareunit(pofile, 3, "Ordered One") self.compareunit(pofile, 4, "Ordered Two") def test_duplicates(self): """check that we use the default style of msgid_comments to disambiguate duplicate messages""" markup = "

    Duplicate

    Duplicate

    " pofile = self.html2po(markup) self.countunits(pofile, 2) # FIXME change this so that we check that the KDE comment is correctly added self.compareunit(pofile, 1, "Duplicate") self.compareunit(pofile, 2, "Duplicate") def wtest_multiline_reflow(self): """check that we reflow multiline content to make it more readable for translators""" self.check_single('''South Africa''', '''South Africa''') def wtest_nested_tags(self): """check that we can extract items within nested tags""" markup = "

    Extract this

    And this
    " pofile = self.html2po(markup) self.countunits(pofile, 2) self.compareunit(pofile, 1, "Extract this") self.compareunit(pofile, 2, "And this") def test_carriage_return(self): """Remove carriage returns from files in dos format.""" htmlsource = '''\r \r \r \r \r \r \r \r

    The rapid expansion of telecommunications infrastructure in recent\r years has helped to bridge the digital divide to a limited extent.

    \r \r \r ''' self.check_single(htmlsource, 'The rapid expansion of telecommunications infrastructure in recent years has helped to bridge the digital divide to a limited extent.') def test_encoding_latin1(self): """Convert HTML input in iso-8859-1 correctly to unicode.""" htmlsource = ''' FMFI - South Africa - CSIR Openphone - Overview

    We aim to please \x96 will you aim too, please?

    South Africa\x92s language diversity can be challenging.

    ''' pofile = self.html2po(htmlsource) self.countunits(pofile, 4) self.compareunit(pofile, 3, u'We aim to please \x96 will you aim too, please?') self.compareunit(pofile, 4, u'South Africa\x92s language diversity can be challenging.') def test_strip_html(self): """Ensure that unnecessary html is stripped from the resulting unit.""" htmlsource = ''' FMFI - Contact
    Projects
    Home Page
    ''' pofile = self.html2po(htmlsource) self.countunits(pofile, 3) self.compareunit(pofile, 2, u'Projects') self.compareunit(pofile, 3, u'Home Page') # Translate and convert back: pofile.units[1].target = 'Projekte' pofile.units[2].target = 'Tuisblad' htmlresult = self.po2html(str(pofile), htmlsource).replace('\n', ' ').replace('= "', '="').replace('> <', '><') snippet ='Projekte' assert snippet in htmlresult snippet = 'Tuisblad' assert snippet in htmlresult class TestHTML2POCommand(test_convert.TestConvertCommand, TestHTML2PO): """Tests running actual html2po commands on files""" convertmodule = html2po defaultoptions = {"progress": "none"} def test_help(self): """tests getting help""" options = test_convert.TestConvertCommand.test_help(self) options = self.help_check(options, "-P, --pot") options = self.help_check(options, "--duplicates=DUPLICATESTYLE") options = self.help_check(options, "-u, --untagged", last=True)