#!/usr/bin/env python # nohtml.py - Filter To Remove HTML Attachments From Email # Copyright (c) 2003 TundraWare Inc. All Rights Reserved. # For Updates See: http://www.tundraware.com/Software/nohtml #------------------- Nothing Below Here Should Need Changing ------------------# # Program Information PROGNAME = "nohtml" RCSID = "$Id: nohtml.py,v 1.11 2003/05/06 22:59:02 tundra Exp $" VERSION = RCSID.split()[2] # Copyright Information CPRT = chr(169) DATE = 2003 OWNER = "TundraWare Inc." RIGHTS = "All Rights Reserved" COPYRIGHT = "Copyright %s %s %s %s. " % (CPRT, DATE, OWNER, RIGHTS) #----------------------------------------------------------# # Imports # #----------------------------------------------------------# import getopt import os import sys #----------------------------------------------------------# # Constants & Literals # #----------------------------------------------------------# ##### # Literals ##### BOUNDARY = "BOUNDARY=".lower() STARTHTML0 = "".lower() STARTHTML1 = "Content-Type:".lower() STARTHTML2 = "text/html".lower() #----------------------------------------------------------# # Prompts, & Application Strings # #----------------------------------------------------------# ##### # Usage Prompts ##### uTable = [PROGNAME + " " + VERSION + " - %s\n" % COPYRIGHT, "usage: " + PROGNAME + " [-hv] where,\n", " -h print this help information", " -v print detailed version information", ] #--------------------------- Code Begins Here ---------------------------------# #----------------------------------------------------------# # Supporting Function Definitions # #----------------------------------------------------------# ##### # Print Usage Information ##### def Usage(): for line in uTable: print line #----------------------------------------------------------# # Program Entry Point # #----------------------------------------------------------# # Command line processing - Process any options set in the # environment first, and then those given on the command line OPTIONS = sys.argv[1:] envopt = os.getenv(PROGNAME.upper()) if envopt: OPTIONS = envopt.split() + OPTIONS try: opts, args = getopt.getopt(OPTIONS, '-hv') except getopt.GetoptError: Usage() sys.exit(1) for opt, val in opts: if opt == "-h": Usage() sys.exit(0) if opt == "-v": print RCSID sys.exit(0) lines = sys.stdin.readlines() numlin = len(lines) MIMESEP = "" ATTACH = [[0, False]] # Determine MIME boundary, if any, and find all attachments. # Along the way, mark any HTML attachments so we can ignore later. for x in range(numlin): line = lines[x] lline = line.lower() # Keep track of current MIME separator string if lline.count(BOUNDARY): # Get just the separator string MIMESEP = line.split(BOUNDARY)[1].strip() # Delete leading quote if MIMESEP[0] == '"': MIMESEP = MIMESEP[1:] # Delete trailing quote if MIMESEP[-1] == '"': MIMESEP = MIMESEP[:-1] # Note existence of next attachment if MIMESEP and line.count(MIMESEP): # End of last attachment ATTACH[-1].append(x) # Beginning of next attachment ATTACH.append([x, False]) # If any of the trigger words indicating HTML are found in the # current attachment, note that fact by setting the second entry of # the associated list to True. if lline.count(STARTHTML0) or (lline.count(STARTHTML1) and lline.count(STARTHTML2)): ATTACH[-1][1] = True # Make sure last partition list is filled properly ATTACH[-1].append(numlin) # Now output everything which is not HTML for part in ATTACH: if not part[1]: for x in range(part[0], part[2]): sys.stdout.write(lines[x])