""" zwikiimport.py - import files/directories into a zwiki. (c) 2004-2005 SKWM, GNU GPL. Usage: cd directorycontainingfiles zopectl run .../ZWiki/tools/zwikiimport.py [opts] /path/to/wikipageorfolder Options: -h, --help show this help message and exit -n, --dry-run Don't actually import anything. May be inaccurate. -v, --verbose Be more verbose. --debug Print additional debug information. --replace When objects already exist, replace them. --delete Delete existing objects instead of importing. Notes/stories/todos: -zope root folder or root page is specified as an argument -we walk the current directory -each text file becomes a wiki page by the same name -a subdirectory becomes a page. the contents are imported and parented under it -a file named after its directory or index.html? is merged with the directory page -images become images, files become files -relative links and image paths are adjusted -allow ignoring/replacing/deleting old pages (as long wiki is anon-writable) todo: suffixes influence the page type: .html, .stx, .rst, .txt etc. an id collision creates a page with modified name a front page may be selected from the imported pages text patterns may be removed from the pages file(s) may be specified on the command line use cmf or plone image/files when appropriate images and files have safe ids assigned if needed use visitor pattern smarter dry run get around anon-writable requirement of zwiki web api apply html tidy automatically ? """ import os, re from optparse import OptionParser from zExceptions import * import OFS.Image from OFS.content_types import guess_content_type try: from transaction import get as get_transaction except ImportError: pass from os import environ from sys import stdin, stdout from ZPublisher.HTTPRequest import HTTPRequest from ZPublisher.HTTPResponse import HTTPResponse from ZPublisher.BaseRequest import RequestContainer options = args = None def parseArgs(): """Parse command-line options.""" global options, args parser = OptionParser() parser.add_option('-n', '--dry-run', action='store_true',dest='dryrun', help="Don't actually import anything (may be inaccurate)") parser.add_option('-v', '--verbose', action='store_true', help="Be more verbose") parser.add_option('--debug', action='store_true', help="Print additional debug information") #parser.add_option('--ignore', action='store_true', # help="When objects already exist, ignore and continue") parser.add_option('--replace', action='store_true', help="When objects already exist, replace them") parser.add_option('--delete', action='store_true', help="Delete existing objects instead of importing") #parser.add_option('-u','--user', # help="user:password for authentication") options,args = parser.parse_args() # to facilitate calling authentication-requiring API methods, # set up a dummy request with the provided credentials #if options.user: # user, password = options.user.split(':') # options.request = makerequest(app,user,password) #else: # options.request = None # XXX doesn't work yet options.request = None # XXX doesn't work yet def makerequest(app,user,password): """Like Testing.makerequest but add authentication info.""" resp = HTTPResponse(stdout=stdout) environ['SERVER_NAME']='foo' environ['SERVER_PORT']='80' environ['REQUEST_METHOD'] = 'GET' environ['AUTHENTICATED_USER'] = user environ['__ac_name'] = user environ['__ac_password'] = password req = HTTPRequest(stdin, environ, resp) return app.__of__(RequestContainer(REQUEST = req)) def dlog(msg='', newline=True): """Print some text and/or a newline if debug option is true.""" if options.debug: if newline: print '* %s' % msg else: print '* %s' % msg, def vlog(msg='', newline=True): """Print some text and/or a newline if verbose option is true.""" if options.verbose: if newline: print '%s' % msg else: print '%s' % msg, def bodyFromHtml(t): """Return contents of html body tag in t, or None.""" m = re.search(r'(?is)]*>(.*)',t) if m: return m.group(1) else: return None def fixLinksIn(t): """Fix up relative hyperlinks and image paths for the wiki. NB doesn't work after html tidy.""" # strip path components from relative links t = re.sub(r'(?i)( (href|src)=")(?!http:)([^"/]+/)+?(?P[^"/]+)"', r'\1\g"', t) # strip .htm suffix t = re.sub(r'(?i)( (href|src)=")(?!http:)(?P[^"]+).html?"', r'\1\g"', t) # fuzzy urls should take care of whitespace/quoting/capitalization diffs return t def doPage(parent,name,text,type): """Create, modify or delete the specified wiki page under the parent page. Prints a status message and returns a boolean for success/failure. """ dlog('doFile(%s,...,%s)' % (name,type)) if options.dryrun: vlog(': dry run') return True existing = parent.pageWithName(name) #if existing and options.ignore: # vlog(': ignored') # return True if existing and options.delete: existing.delete(REQUEST=options.request) get_transaction().commit() vlog(': deleted') return True elif existing and options.replace: text = fixLinksIn(text) existing.edit(name, text, type, REQUEST=options.request) get_transaction().commit() vlog(': replaced') return True else: try: text = fixLinksIn(text) parent.create(name, text, type, REQUEST=options.request) get_transaction().commit() vlog(': created') return True except BadRequest, e: vlog(': failed\n*** (%s)' % e) return False def doFile(context,filename,data): """Create, modify or delete the specified file or image. An Image is created if the file suffix indicates it. Prints a status message and returns a boolean for success/failure. """ dlog('doFile(%s,...)' % (filename)) if options.dryrun: vlog(': dry run') return True folder = context.folder() existing = getattr(folder,filename,None) #if existing and options.ignore: # vlog(': ignored') # return True if existing and options.delete: folder._delObject(filename) get_transaction().commit() vlog(': deleted') return True elif existing and options.replace: folder._getOb(filename).manage_upload(data) get_transaction().commit() vlog(': replaced') return True else: try: if guess_content_type(filename)[0][0:5] == 'image': folder._setObject(filename, OFS.Image.Image(filename,filename,'')) else: folder._setObject(filename, OFS.Image.File(filename,filename,'')) folder._getOb(filename).manage_upload(data) get_transaction().commit() vlog(': created') return True except BadRequest, e: vlog(': failed\n*** (%s)' % e) return False def importFile(context,filepath): """Import a file or directory tree as wiki page/file/image objects.""" dlog('importFile(%s,%s)' % (`context`,filepath)) vlog(filepath,newline=False) if os.path.isfile(filepath): # create(/replace/delete) a wiki page based on file name & content filename = os.path.basename(filepath) pagename,ext = os.path.splitext(os.path.basename(filepath)) if re.match('(?i).htm',ext): text = bodyFromHtml(open(filepath).read()) if text != None: vlog(filepath,newline=False) doPage(context,pagename, text, 'html') else: vlog(filepath,newline=False) doFile(context, filename, open(filepath).read()) else: # create(/replace/delete) a page representing this directory/node # (unless it's the very first directory). # if the directory contains a page with the same name or # index.htm, we will use that for content. vlog(filepath,newline=False) dirpagename = pageNameFromPath(filepath) if dirpagename == '' or doPage(context, dirpagename, '', 'html'): dirpage = context.pageWithName(dirpagename) or context dirpagetext = '' for f in os.listdir(filepath): pagename,ext = os.path.splitext(os.path.basename(f)) if (re.match('(?i).htm',ext) and pagename == dirpagename or re.match(r'(?i)index$',pagename)): dirpagetext = open(os.path.join(filepath,f)).read() else: importFile(dirpage,os.path.join(filepath,f)) if dirpagetext: dirpagetext = fixLinksIn(dirpagetext) context.pageWithName(dirpagename).edit(text=dirpagetext) def pageNameFromPath(path): """Derive a suitable wiki page name from a filesystem path.""" return os.path.splitext(os.path.basename(path))[0] def pageFromPath(path): """Get the wiki page object indicated by a ZODB path. If it's a folder, return the first page. Otherwise None.""" obj = app.restrictedTraverse(path, None) if not obj: return None if 'Folder' in obj.meta_type: pages = obj.objectValues(spec='ZWiki Page') if pages: obj = pages[0] if obj.meta_type == 'ZWiki Page': return obj else: return None def main(): """Main procedure.""" parseArgs() importFile(pageFromPath(args[0]), '.') get_transaction().commit() if __name__ == "__main__": main() #def _test(): # import doctest, zwikiimport # return doctest.testmod(zwikiimport) #if __name__ == "__main__": # _test()