# Got this script from Liquid War, pathed to be able # to transform XML docs into uwikicms content. # Note that this code used to use the xmllib module # which came with python 1.6, however it's now # deprecated so I use sax instead. Point is it wasn't # designed for sax in the first place, and I haven't # much time nor interest to code this, and the direct # consequence is that the code is ugly. I know. import xml.sax import string import re def remove_duplicate_blanks(text): result=text result=string.replace(result,"\t"," ") result=string.replace(result,"\n"," ") if (result!=""): temp="" while temp!=result: temp=result result=string.replace(result," "," ") return result def format_text(text,left_col,right_col): temp=remove_duplicate_blanks(text) result="" p=0 l=len(temp) while p0: if cur_line[i]==' ': break i=i-1 if i>0: cur_line=cur_line[0:i] dp=i+1 if 0==1: #the following code justifies the text, it is disabled j=i-1 while len(cur_line)0 and cur_line[j-1]==' ': j=j-1 cur_line=cur_line[:j]+' '+cur_line[j:] j=j-1 if j<0: j=len(cur_line)-1 result=result+" "*left_col+cur_line+"\n" p=p+dp return result def format_email_and_url(text): result=text email=re.compile('"([\w\-\.]+@[\w\-\.]+)"') result=email.sub(r'\1',result); url=re.compile('"http://([\w\-\.\~/]+)"', re.I) result=url.sub(r'http://\1',result); return result def format_html(text): result=text result=string.replace(result,"<","ufoot_html_lt") result=string.replace(result,">","ufoot_html_gt") result=string.replace(result,"&","&") result=string.replace(result,"ufoot_html_lt","<") result=string.replace(result,"ufoot_html_gt",">") # Uncomment this to make mailing list adresses look like "xxx at xxx" # instead of "xxx@xxx". This can prevent spammers from harvesting # the mailing list address # fakeemail=re.compile('"([\w\.]+\-user)@([\w\-\.]+)"') # result=fakeemail.sub(r'\1 at \2 (replace "at" by "@")',result); email=re.compile('"([\w\-\.]+@[\w\-\.]+)"') result=email.sub(r'\1',result); url=re.compile('"http://([\w\-\.\~/]+)"', re.I) result=url.sub(r'\1',result); return result def format_tex(text): result=text result=string.replace(result,"\\","$\\backslash$") result=string.replace(result,"_","\\_") result=string.replace(result,"#","\\#") result=string.replace(result,"%","\\%") result=string.replace(result,"}","\\}") result=string.replace(result,"<","$<$") result=string.replace(result,">","$>$") result=string.replace(result,"~","$\\tilde{}$") return result def format_texi(text): result=text result=string.replace(result,"@","@@") result=string.replace(result,"}","@}") result=string.replace(result,"{","@{") return result def format_uwc(text): result=text result=string.replace(result,"]","]") result=string.replace(result,"[","[[") return result def format_uwc_text(text): result=text result=format_uwc(result) # This is an ugly way to get rid of all junk at line start result=format_text(result,0,999999) return result def format_uwc_elem(text): result=text result=format_uwc(result) result=string.replace(result,"\n"," ") result=string.replace(result,"\r"," ") result=remove_duplicate_blanks(result) return result def format_uwc_code(text): result=text result=format_uwc(result) line=re.compile('^(.*)$',re.M) result=line.sub(r" \1",result); return result class XMLToX(xml.sax.ContentHandler): def __init__(self): xml.sax.ContentHandler.__init__(self) self.charbuf="" self.translated="" self.stack=[] def write(self,text): self.translated=self.translated+text def start_file(self,title): pass def start_chap(self,title): pass def start_part(self,title): pass def start_text(self): pass def start_list(self): pass def start_elem(self): pass def start_code(self): pass def end_file(self): pass def end_chap(self): pass def end_part(self): pass def end_text(self): pass def end_list(self): pass def end_elem(self): pass def end_code(self): pass def startElement(self,tag,attributes): self.charbuf="" self.stack.append(tag) if tag=="file": self.start_file(attributes["title"]) if tag=="chap": self.start_chap(attributes["title"]) if tag=="part": self.start_part(attributes["title"]) if tag=="text": self.start_text() if tag=="list": self.start_list() if tag=="elem": self.start_elem() if tag=="code": self.start_code() def endElement(self,tag): data=string.strip(self.charbuf) if (data!=""): self.write(self.translate(data,self.stack[-1])) self.charbuf="" self.stack=self.stack[:-1] if tag=="file": self.end_file() elif tag=="chap": self.end_chap() elif tag=="part": self.end_part() elif tag=="text": self.end_text() elif tag=="list": self.end_list() elif tag=="elem": self.end_elem() elif tag=="code": self.end_code() def translate(self,data,tag): return data def characters(self,data): self.charbuf+=data class XMLToHTML(XMLToX): def __init__(self,header,footer): XMLToX.__init__(self) self.header=header self.footer=footer def start_file(self,title): self.write("\n"+\ "\n\n"+\ "\n"+\ ""+\ title+\ "\n\n\n\n"+\ self.header+\ "\n

"+\ title+\ "

\n") def start_chap(self,title): self.write("
\n

"+\ title+\ "

\n") def start_part(self,title): self.write("

"+\ title+\ "

\n") def start_text(self): self.write("

") def start_list(self): self.write("

\n") def end_elem(self): self.write("\n") def end_code(self): self.write("\n") def translate(self,data,tag): result=data result=format_html(result) return result class XMLToPHP3(XMLToX): def __init__(self): XMLToX.__init__(self) def start_file(self,title): self.write("\n\n\n\n"+\ "
\n

"+\ title+\ "

\n
\n") def start_chap(self,title): self.write("
\n

"+\ title+\ "

\n") def start_part(self,title): self.write("

"+\ title+\ "

\n") def start_text(self): self.write("

") def start_list(self): self.write("

\n") def end_elem(self): self.write("\n") def end_code(self): self.write("\n") def translate(self,data,tag): result=data result=format_html(result) return result class XMLToTeX(XMLToX): def __init__(self): XMLToX.__init__(self) def start_file(self,title): self.write("\\chapter{"+\ self.translate(title,"text")+\ "}\n") def start_chap(self,title): self.write("\\section{"+\ self.translate(title,"text")+\ "}\n") def start_part(self,title): self.write("\\subsection{"+\ self.translate(title,"text")+\ "}\n") def start_text(self): self.write("\n") def start_list(self): self.write("\\begin{itemize}\n") def start_elem(self): self.write("\\item[$\\bullet$]") def start_code(self): self.write("\\begin{verbatim}\n") def end_file(self): self.write("") def end_chap(self): self.write("\n") def end_part(self): self.write("\n") def end_text(self): self.write("\n") def end_list(self): self.write("\\end{itemize}\n") def end_elem(self): self.write("\n") def end_code(self): self.write("\n\\end{verbatim}\n") def translate(self,data,tag): result=data result=format_email_and_url(result) if (tag!="code"): result=format_tex(result) return result class XMLToMan(XMLToX): def __init__(self): XMLToX.__init__(self) def start_file(self,title): self.write(".SH "+title+"\n\n") def start_chap(self,title): self.write(".SS "+title+"\n\n") def start_part(self,title): self.write(".TP 0 \n.B "+title+"\n") def start_text(self): self.write(".HP 0\n") def start_list(self): self.write("") def start_elem(self): self.write(".TP 3 \n.B *\n") def start_code(self): self.write(".HP 0\n") def end_file(self): self.write("") def end_chap(self): self.write("\n") def end_part(self): self.write("\n") def end_text(self): self.write("\n") def end_list(self): self.write("\n") def end_elem(self): self.write("\n") def end_code(self): self.write("\n") def translate(self,data,tag): result=data result=format_email_and_url(result) result=string.replace(result,"\\","\\\\") result=string.replace(result,".","\.") result=string.replace(result,"-","\-") if (tag=="code"): result=string.replace(result,"\n","\n.br\n") else: result=remove_duplicate_blanks(result) return result class XMLToTxt(XMLToX): def __init__(self,header): XMLToX.__init__(self) self.header=header def start_file(self,title): self.write(self.header+" - "+title+"\n\n") self.indent=0 def start_chap(self,title): self.write("\n\n"+\ " "*self.indent+"\n"+title+"\n"+\ " "*self.indent+"="*len(title)+"\n\n") self.indent=self.indent+2 def start_part(self,title): self.write("\n"+\ " "*self.indent+title+"\n"+\ " "*self.indent+"-"*len(title)+"\n") self.indent=self.indent+2 def start_text(self): self.write("\n") def start_list(self): self.write("") self.indent=self.indent+2 def start_elem(self): self.write("\n") def start_code(self): self.write("\n") def end_file(self): self.write("\n") def end_chap(self): self.write("") self.indent=self.indent-2 def end_part(self): self.write("") self.indent=self.indent-2 def end_text(self): self.write("") def end_list(self): self.write("") self.indent=self.indent-2 def end_elem(self): self.write("") def end_code(self): self.write("\n") def translate(self,data,tag): result=data result=format_email_and_url(result) if (tag=="code"): result=" "*self.indent+\ string.replace(result,"\n","\n"+" "*self.indent) else: result=format_text(result,self.indent,80) if (tag=="elem"): result=" "*(self.indent-2)+"* "+result[self.indent:] return result class XMLToTexi(XMLToX): def __init__(self,node): XMLToX.__init__(self) self.node=node def start_file(self,title): self.write("\n@node "+self.node+" , , , Top\n") self.write("\n@chapter "+title+"\n"); def start_chap(self,title): self.write("\n@section "+title+"\n"); def start_part(self,title): self.write("\n@subsection "+title+"\n"); def start_text(self): self.write("\n") def start_list(self): self.write("\n@itemize @bullet") def start_elem(self): self.write("\n@item\n") def start_code(self): self.write("\n@example\n") def end_file(self): self.write("\n") def end_chap(self): self.write("\n") def end_part(self): self.write("\n") def end_text(self): self.write("\n") def end_list(self): self.write("@end itemize\n") def end_elem(self): self.write("\n") def end_code(self): self.write("\n@end example\n") def translate(self,data,tag): result=data result=format_email_and_url(result) if (tag!="code"): result=remove_duplicate_blanks(result) result=format_texi(result) return result class XMLToUWC(XMLToX): def __init__(self): XMLToX.__init__(self) def start_file(self,title): # In UWikKiCMS document is handled manually once for # all directly in the UWiKiCMS site/instance. pass def start_chap(self,title): self.write("\n!! "+title+"\n"); def start_part(self,title): self.write("\n! "+title+"\n"); def start_text(self): self.write("\n") def start_list(self): self.write("\n") def start_elem(self): self.write("\n* ") def start_code(self): self.write("\n") def end_file(self): self.write("\n") def end_chap(self): self.write("\n") def end_part(self): self.write("\n") def end_text(self): self.write("\n") def end_list(self): self.write("\n") def end_elem(self): # No \n for we do not want blank lines between elems pass def end_code(self): self.write("\n") def translate(self,data,tag): result=data result=format_email_and_url(result) if (tag=="code"): result=format_uwc_code(result) elif (tag=="elem"): result=format_uwc_elem(result) else: result=format_uwc_text(result) return result def run_parser(handler,dst,src): dst_file=open(dst,"w") src_file=open(src,"r") #src_code=src_file.read() parser=xml.sax.make_parser() parser.setContentHandler(handler) parser.parse(src_file) dst_code=handler.translated dst_file.write(dst_code.encode("iso-8859-1")) src_file.close() dst_file.close() def make_html(html_file,xml_file,header,footer): header_file=open(header,"r") footer_file=open(footer,"r") header_str=header_file.read() footer_str=footer_file.read() handler=XMLToHTML(header_str,footer_str) run_parser(handler,html_file,xml_file) header_file.close() footer_file.close() def make_php3(php3_file,xml_file): parser=xml.sax.make_parser() handler=(XMLToPHP3()) run_parser(handler,php3_file,xml_file) def make_tex(tex_file,xml_file): parser=xml.sax.make_parser() handler=XMLToTeX() run_parser(handler,tex_file,xml_file) def make_man(man_file,xml_file): parser=xml.sax.make_parser() handler=XMLToMan() run_parser(handler,man_file,xml_file) def make_txt(txt_file,xml_file,header): parser=xml.sax.make_parser() handler=XMLToTxt(header) run_parser(handler,txt_file,xml_file) def make_texi(texi_file,xml_file): node=string.replace(xml_file,".xml","") node=string.replace(node,"xml/","") parser=xml.sax.make_parser() handler=XMLToTexi(node) run_parser(handler,texi_file,xml_file) def make_uwc(uwc_file,xml_file): parser=xml.sax.make_parser() handler=XMLToUWC() run_parser(handler,uwc_file,xml_file)