ports//devel/drpython/work/drpython-3.10.13/drEncoding.py

#Edited by Dan:
#Now takes raw encoding as arguments for Set/Get EncodedText.
#Also removed some uneeded code.

#***************************************************************************
#drDetectUTF_8
#	Programmer:	limodou
#	E-mail:		limodou@users.sourceforge.net
#
#	Copyright 2004 limodou
#
#	Distributed under the terms of the GPL (GNU Public License)
#
#	DrPython is free software; you can redistribute it and/or modify
#	it under the terms of the GNU General Public License as published by
#	the Free Software Foundation; either version 2 of the License, or
#	(at your option) any later version.
#
#	This program is distributed in the hope that it will be useful,
#	but WITHOUT ANY WARRANTY; without even the implied warranty of
#	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#	GNU General Public License for more details.
#
#	You should have received a copy of the GNU General Public License
#	along with this program; if not, write to the Free Software
#	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import wx, re

def utf8Detect(text):
	"""Detect if a string is utf-8 encoding"""
	lastch=0
	begin=0
	BOM=True
	BOMchs=(0xEF, 0xBB, 0xBF)
	good=0
	bad=0
	for char in text:
		ch=ord(char)
		if begin<3:
			BOM=(BOMchs[begin]==ch) and BOM
			begin += 1
			continue
		if (begin==4) and (BOM==True):
			break;
		if (ch & 0xC0) == 0x80:
			if (lastch & 0xC0) == 0xC0:
				good += 1
			elif (lastch &0x80) == 0:
				bad += 1
		elif (lastch & 0xC0) == 0xC0:
			bad += 1
		
		lastch = ch

	#modi limodou 2004/04/16
	#if all characters are ascii, its encoding also can be ascii encoding or default encoding
	#in most cases, I think it's better that not recognizing it as utf-8 encoding
	if (((begin == 4) and (BOM == True)) or
		(good >= bad and good>0)):
		return True
	else:
		return False

#/drDetectUTF_8
#***************************************************************************

#Autodetect:  If the user has enabled autodetection,
#and the file has the proper special comment for encoding,
#return the encoding used (or None if nothing is found):
	
reencoding = re.compile(r'\#\s*-\*-.+-\*-')

def CheckForEncodingComment(text):
	encodingmatch = reencoding.search(text)
	if encodingmatch is not None:
		t = encodingmatch.group()		
		i = t.find(':') + 1
		encoding = t[i:].rstrip('-*-').strip()
		
		return encoding
	
	return None

#What if python is not built with unicode support, but wxPython is?
#So, here is a safe way to test for ascii.

def CheckAscii(text):
	#Optimize this a little.
	_ord = ord
	for a in text:
		n = _ord(a)
		if (n < 0) or (n > 127):
			return False
	return True

def DecodeTextWith(text, encoding):
	#Try the encoding
	try:
		text = text.encode(encoding)
		return text
	except:
		pass
	
	return None

def DecodeText(frame, text, encoding='<Default Encoding>'):
	if wx.USE_UNICODE:
				
		etext = None
		
		if encoding != '<Default Encoding>':
			etext = DecodeTextWith(text, encoding)
			if etext is not None:
				return etext
			
		defaultencoding = len(frame.prefs.defaultencoding) > 0

		if frame.prefs.autodetectencoding:
			enco = CheckForEncodingComment(text)
			if enco is not None:
				etext = DecodeTextWith(text, enco)
				if etext is not None:
					return etext
			
			if utf8Detect(text):
				etext = DecodeTextWith(text, 'utf-8')
				if etext is not None:
					return etext
				
		if defaultencoding:
			etext = DecodeTextWith(text, frame.prefs.defaultencoding)
			if etext is None:
				frame.ShowMessage('There was an error using the encoding "%s".' % (frame.prefs.defaultencoding), 'Encoding Error')
			else:
				return etext
		
		if CheckAscii(text):
			return text
		
		if etext is None:
			raise Exception, 'Encoding Error!'
			
	else:
		return text

def EncodeTextWith(text, encoding):
	#Try the encoding
	try:
		text = unicode(text, encoding)
		return text
	except:
		pass
	
	return None	

def EncodeText(frame, text, encoding='<Default Encoding>', returnEncoding=False):
	
	if wx.USE_UNICODE:
				
		etext = None
		
		if encoding != '<Default Encoding>':
			etext = EncodeTextWith(text, encoding)
			if etext is not None:
				if returnEncoding:
					return etext, encoding
				else:
					return etext
			
		defaultencoding = len(frame.prefs.defaultencoding) > 0

		if frame.prefs.autodetectencoding:
			enco = CheckForEncodingComment(text)
			if enco is not None:
				etext = EncodeTextWith(text, enco)
				if etext is not None:
					if returnEncoding:
						return etext, enco
					else:
						return etext
			
			if utf8Detect(text):
				etext = EncodeTextWith(text, 'utf-8')
				if etext is not None:
					if returnEncoding:
						return etext, 'utf-8'
					else:
						return etext
				
		if defaultencoding:
			etext = EncodeTextWith(text, frame.prefs.defaultencoding)
			if etext is None:
				frame.ShowMessage('There was an error using the encoding "%s".' % (frame.prefs.defaultencoding), 'Encoding Error')
			else:
				if returnEncoding:
					return etext, frame.prefs.defaultencoding
				else:
					return etext
		
		if CheckAscii(text):				
			if returnEncoding:
				return text, 'ascii'
			else:
				return text
		
		if etext is None:
			raise Exception, 'Encoding Error!'
			
	else:		
		if returnEncoding:
			return text, '<Default Encoding>'
		else:
			return text
syntax highlighted by Code2HTML, v. 0.9.1