ports//net/dictd-database/work/world02-2003-02-15/convert.py

#!/usr/bin/python

''' Quick and dirty hack to preformat the CIA world factbook for dictfmt use.
    See file README for additional notes.

    This script was written by Eckhard Licher and is put into the public domain.
    Use at your own risk.
'''

#####################################
# BEGIN Configuration
#
# input files / dirs
country_d  = 'countries'
copyright  = 'copyright.txt'
appfiles   = {  "Appendix A" : "appendices/appendix-a.txt",
		"Appendix B" : "appendices/appendix-b.txt",
		"Appendix C" : "appendices/appendix-c.txt",
		"Appendix D" : "appendices/appendix-d.txt",
		"Appendix E" : "appendices/appendix-e.txt",
		"Appendix F" : "appendices/appendix-f.txt"
}
appnames   = {  "Appendix A": "Abbreviations",
		"Appendix B": "International Organizations and Groups",
		"Appendix C": "Selected International Environmental Agreements",
		"Appendix D": "Cross-Reference List of Country Data Codes",
		"Appendix E": "Cross-Reference List of Hydrographic Data Codes",
		"Appendix F": "Cross-Reference List of Geographic Names"
}
verbose = 0
#
# END Configuration
#####################################


import string, sys, glob

# some special text items used for triggering events
start="[Map of"
stop="This page was last updated"
keywords=[	"Introduction",
		"Geography",
		"People",
		"Government",
		"Economy",
		"Communications",
		"Transportation",
		"Military",
		"Transnational Issues"]

# get input filenames
country_f  = glob.glob(country_d+"/*.txt")

countries={}
count=0

def scanfile(name,start,stop):
	global countries, appendices

	input=open(name,"r")
	country=""
	text=[]
	state=0
	line=input.readline()
	while line:
		if state == 0:
			pos = string.find(line,start)
			if pos  > -1:
				state=1
				pos2=pos + len(start) +1
				end=string.find(line,"]")
				country=line[pos2:end]
				text.append("\n"+country+"\n\n")
		else:
			if string.find(line,stop) > -1:
				state=2
			else:
				flag=1
				for kw in keywords:
					pattern=kw+" "+country
					if string.find(line,pattern) > -1:
						text.append("\t" + pattern+"\n")
						text.append("\t" + len(pattern)*"-" + "\n")
						flag=0
				if flag==1:
					text.append("\t" + line)
		line=input.readline()
	countries[country]=text
	input.close()


if verbose: sys.stderr.write("Reading Countries\n")
for cf in country_f:
	count += 1
	if verbose: sys.stderr.write(cf + "\n")
	scanfile(cf,start,stop)

if verbose: sys.stderr.write("Copying Copyright info\n")
copy=open(copyright,"r")
lines=copy.readlines()
for line in lines:
	sys.stdout.write("\t" + line)
copy.close()

# dictfmt does not work as advertized in the man pages
# temporary workaround ...
sys.stdout.write("\n.\n.\n\n")

if verbose: sys.stderr.write("Writing Countries\n")

keys=countries.keys()
keys.sort()
for key in keys:
	for line in countries[key]:
		sys.stdout.write(line)


if verbose: sys.stderr.write("Writing Appendices\n")

keys = appnames.keys()
keys.sort()

for key in keys:
	if verbose: sys.stderr.write(key + "\n")
	input=open(appfiles[key],"r")
	lines=input.readlines()
	sys.stdout.write(appnames[key]+"\n")
	for line in lines:
		line = string.replace(line,"_"," ")
		line = string.replace(line,"|"," ")
		line = string.replace(line,"'","'")
		sys.stdout.write(" " + line)
	sys.stdout.write("\n")
	input.close()

if verbose: sys.stderr.write("Writing Table of Contents\n")
keys=countries.keys()
keys.sort()
sys.stdout.write("\n\nCIA World Factbook 2002\n\n\tCountries\n\n")
for key in keys:
	sys.stdout.write("\t{%s}\n" % key)

sys.stdout.write("\n\tAppendices\n\n")
keys = appnames.keys()
keys.sort()
for key in keys:
	sys.stdout.write("\t{%s}\n" % appnames[key])

# dictfmt does not work as advertized in the man pages
# temporary workaround ...
sys.stdout.write("\n\n-\n\t-\n\n")

print >> sys.stderr, "%d countries converted" % count
syntax highlighted by Code2HTML, v. 0.9.1