#!/usr/bin/env python
# nohtml.py - Filter To Remove HTML Attachments From Email
# Copyright (c) 2003 TundraWare Inc. All Rights Reserved.
# For Updates See: http://www.tundraware.com/Software/nohtml
#------------------- Nothing Below Here Should Need Changing ------------------#
# Program Information
PROGNAME = "nohtml"
RCSID = "$Id: nohtml.py,v 1.11 2003/05/06 22:59:02 tundra Exp $"
VERSION = RCSID.split()[2]
# Copyright Information
CPRT = chr(169)
DATE = 2003
OWNER = "TundraWare Inc."
RIGHTS = "All Rights Reserved"
COPYRIGHT = "Copyright %s %s %s %s. " % (CPRT, DATE, OWNER, RIGHTS)
#----------------------------------------------------------#
# Imports #
#----------------------------------------------------------#
import getopt
import os
import sys
#----------------------------------------------------------#
# Constants & Literals #
#----------------------------------------------------------#
#####
# Literals
#####
BOUNDARY = "BOUNDARY=".lower()
STARTHTML0 = "".lower()
STARTHTML1 = "Content-Type:".lower()
STARTHTML2 = "text/html".lower()
#----------------------------------------------------------#
# Prompts, & Application Strings #
#----------------------------------------------------------#
#####
# Usage Prompts
#####
uTable = [PROGNAME + " " + VERSION + " - %s\n" % COPYRIGHT,
"usage: " + PROGNAME + " [-hv] where,\n",
" -h print this help information",
" -v print detailed version information",
]
#--------------------------- Code Begins Here ---------------------------------#
#----------------------------------------------------------#
# Supporting Function Definitions #
#----------------------------------------------------------#
#####
# Print Usage Information
#####
def Usage():
for line in uTable:
print line
#----------------------------------------------------------#
# Program Entry Point #
#----------------------------------------------------------#
# Command line processing - Process any options set in the
# environment first, and then those given on the command line
OPTIONS = sys.argv[1:]
envopt = os.getenv(PROGNAME.upper())
if envopt:
OPTIONS = envopt.split() + OPTIONS
try:
opts, args = getopt.getopt(OPTIONS, '-hv')
except getopt.GetoptError:
Usage()
sys.exit(1)
for opt, val in opts:
if opt == "-h":
Usage()
sys.exit(0)
if opt == "-v":
print RCSID
sys.exit(0)
lines = sys.stdin.readlines()
numlin = len(lines)
MIMESEP = ""
ATTACH = [[0, False]]
# Determine MIME boundary, if any, and find all attachments.
# Along the way, mark any HTML attachments so we can ignore later.
for x in range(numlin):
line = lines[x]
lline = line.lower()
# Keep track of current MIME separator string
if lline.count(BOUNDARY):
# Get just the separator string
MIMESEP = line.split(BOUNDARY)[1].strip()
# Delete leading quote
if MIMESEP[0] == '"':
MIMESEP = MIMESEP[1:]
# Delete trailing quote
if MIMESEP[-1] == '"':
MIMESEP = MIMESEP[:-1]
# Note existence of next attachment
if MIMESEP and line.count(MIMESEP):
# End of last attachment
ATTACH[-1].append(x)
# Beginning of next attachment
ATTACH.append([x, False])
# If any of the trigger words indicating HTML are found in the
# current attachment, note that fact by setting the second entry of
# the associated list to True.
if lline.count(STARTHTML0) or (lline.count(STARTHTML1) and lline.count(STARTHTML2)):
ATTACH[-1][1] = True
# Make sure last partition list is filled properly
ATTACH[-1].append(numlin)
# Now output everything which is not HTML
for part in ATTACH:
if not part[1]:
for x in range(part[0], part[2]):
sys.stdout.write(lines[x])