/*
* ratCode.c --
*
* This file contains basic support for decoding and encoding of
* strings coded in various MIME-encodings.
*
* TkRat software and its included text is Copyright 1996-2002 by
* Martin Forssén
*
* The full text of the legal notice is contained in the file called
* COPYRIGHT, included with this distribution.
*/
#include "rat.h"
/*
* List used when decoding QP
*/
char alphabetHEX[17] = "0123456789ABCDEF";
/*
* List used when decoding base64
* It consists of 64 chars plus '=' and null
*/
static char alphabet64[66] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
/*
* List used when decoding modified base64
* It consists of 64 chars plus '=' and null
*/
static char modified64[66] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,=";
#define RFC2047_MAX_LINE_LENGTH 75
#define RFC2047_MAX_ENCODED_WORD_LENGTH 75
static int FindMimeHdr(Tcl_Interp *interp, unsigned char *hdr,
unsigned char **sPtr, unsigned char **ePtr, Tcl_Encoding *encoding,
int *code, unsigned char **data, int *length);
static int RatUtf8to16(const unsigned char *src, unsigned char *dst);
static int RatUtf16to8(const unsigned char *src, unsigned char *dst);
static int RatCheckEncoding(Tcl_Interp *interp, char *encoding_name,
const char *string, int length);
static int CreateEncWord(Tcl_Interp *interp, Tcl_Encoding enc,
const char *charset, unsigned char *raw, int length,
Tcl_DString *dest, int maxUse);
/*
*----------------------------------------------------------------------
*
* FindMimeHdr --
*
* Find a string encoded according to rfc2047
*
* Results:
* Returns data in most arguments.
*
* Side effects:
* None
*
*----------------------------------------------------------------------
*/
static int
FindMimeHdr(Tcl_Interp *interp, unsigned char *hdr, unsigned char **sPtr,
unsigned char **ePtr, Tcl_Encoding *encoding, int *code,
unsigned char **data, int *length)
{
unsigned char *sCharset, *eCharset, *cPtr, c;
for (cPtr = hdr; *cPtr; cPtr++) {
if ('=' == cPtr[0] && '?' == cPtr[1]) {
*sPtr = cPtr;
sCharset = cPtr+2;
for (cPtr+=2; '?' != *cPtr && *cPtr; cPtr++);
if ('?' != *cPtr) return 0;
if ('?' != cPtr[2]) continue;
switch (cPtr[1]) {
case 'b':
case 'B':
*code = ENCBASE64;
break;
case 'q':
case 'Q':
*code = ENCQUOTEDPRINTABLE;
break;
default:
continue;
}
eCharset = cPtr;
*data = cPtr+3;
for (cPtr+=3, *length = 0;
*cPtr && ('?' != *cPtr || '=' != cPtr[1]);
cPtr++, (*length)++);
if ('?' != *cPtr) return 0;
*ePtr = cPtr+2;
c = *eCharset;
*eCharset = '\0';
*encoding = RatGetEncoding(interp, (char*)sCharset);
*eCharset = c;
return 1;
}
}
return 0;
}
/*
*----------------------------------------------------------------------
*
* RatDecodeHeader --
*
* Decodes a header line encoded according to rfc2047.
*
* Results:
* Returns a pointer to a static storage area
*
* Side effects:
* None
*
* TODO, handle address entries correct
*
*----------------------------------------------------------------------
*/
char*
RatDecodeHeader(Tcl_Interp *interp, const char *data, int adr)
{
static Tcl_DString ds, tmp;
static int initialized = 0;
unsigned char *sPtr, *ePtr, *decoded, *text, *cPtr,
*point = (unsigned char*)data;
int length, code, first = 1;
unsigned long dlen;
unsigned int i;
Tcl_Encoding encoding;
Tcl_DString *myPtr = NULL;
if (!data || !*data) {
return "";
}
if (!initialized) {
Tcl_DStringInit(&ds);
initialized = 1;
} else {
Tcl_DStringSetLength(&ds, 0);
}
/*
* Check for headers from buggy programs (with raw eight-bit data
* in them)
*/
for (cPtr = (unsigned char*)data; *cPtr; cPtr++) {
if (*cPtr & 0x80) {
myPtr = (Tcl_DString*)ckalloc(sizeof(Tcl_DString));
Tcl_DStringInit(myPtr);
Tcl_ExternalToUtfDString(NULL, data, -1, myPtr);
data = Tcl_DStringValue(myPtr);
point = (unsigned char*)data;
break;
}
}
while (FindMimeHdr(interp, point, &sPtr, &ePtr, &encoding, &code, &text,
&length)) {
if (sPtr != point) {
if (!first) {
for (cPtr = point; cPtr < sPtr && isspace(*cPtr); cPtr++);
if (cPtr < sPtr) {
Tcl_DStringAppend(&ds, (char*)point, sPtr-point);
}
} else {
for (i=0; i<sPtr-point; i++) {
if ('\n' != point[i]) {
Tcl_DStringAppend(&ds, &point[i], 1);
}
}
}
}
first = 0;
point = ePtr;
if (NULL == encoding) {
Tcl_DStringAppend(&ds, (char*)sPtr, ePtr-sPtr);
continue;
}
if (ENCBASE64 == code) {
decoded = rfc822_base64(text, length, &dlen);
} else {
decoded = (char*)ckalloc(length+1);
for (dlen=0, cPtr=text; cPtr-text < length; cPtr++) {
if ('_' == *cPtr) {
decoded[dlen++] = ' ';
} else if ('=' == *cPtr) {
decoded[dlen++] =
((strchr(alphabetHEX, cPtr[1])-alphabetHEX)<<4) +
(strchr(alphabetHEX, cPtr[2])-alphabetHEX);
cPtr += 2;
} else {
decoded[dlen++] = *cPtr;
}
}
decoded[dlen] = '\0';
}
Tcl_ExternalToUtfDString(encoding, decoded, dlen, &tmp);
ckfree(decoded);
Tcl_DStringAppend(&ds,
Tcl_DStringValue(&tmp), Tcl_DStringLength(&tmp));
Tcl_DStringFree(&tmp);
}
if (*point) {
for (sPtr = point; *sPtr; sPtr++) {
if ('\n' != *sPtr) {
Tcl_DStringAppend(&ds, (char*)sPtr, 1);
}
}
}
if (myPtr) {
Tcl_DStringFree(myPtr);
ckfree(myPtr);
}
return Tcl_DStringValue(&ds);
}
/*
*----------------------------------------------------------------------
*
* RatDecode --
*
* General decoding interface. It takes as arguments a chunk of data,
* the encoding the data is in. And returns a new ckalloced block of
* decoded data. The decoded data will not have any \r or \0 in it
* \0 will be changed to the string \0, unless the toCharset parameter
* is NULL. If that is the case the data is assumed to be wanted
* in raw binary form.
* It is also possible to get this routine to do some character set
* transformation, but this is not yet implemented.
*
* Results:
* A block of decoded data. It is the callers responsibility to free
* this data.
*
* Side effects:
* None.
*
*
*----------------------------------------------------------------------
*/
Tcl_DString*
RatDecode(Tcl_Interp *interp, int cte, const char *data, int length,
const char *charset)
{
char *dst, buf[64], lbuf[4];
const char *src;
int allocated, dataIndex = 0, index, srcLength, len;
Tcl_Encoding enc = NULL;
Tcl_DString *dsPtr = (Tcl_DString*)ckalloc(sizeof(Tcl_DString)),
tmpDs;
Tcl_DStringInit(dsPtr);
if (charset) {
enc = RatGetEncoding(interp, charset);
}
while (dataIndex < length) {
if (cte == ENCBASE64) {
src = buf;
for (srcLength = 0; dataIndex < length
&& srcLength < sizeof(buf)-2;) {
for (index=0; dataIndex<length && index<4; dataIndex++) {
if (strchr(alphabet64, data[dataIndex])) {
lbuf[index++] = strchr(alphabet64, data[dataIndex])
- alphabet64;
}
}
if (0 == index) {
continue;
}
buf[srcLength++] = lbuf[0] << 2 | ((lbuf[1]>>4)&0x3);
if (strchr(alphabet64, '=')-alphabet64 != lbuf[2]) {
buf[srcLength++] = lbuf[1] << 4 | ((lbuf[2]>>2)&0xf);
if (strchr(alphabet64, '=')-alphabet64 != lbuf[3]) {
buf[srcLength++] = lbuf[2] << 6 | (lbuf[3]&0x3f);
}
}
}
} else if (cte == ENCQUOTEDPRINTABLE) {
src = buf;
for (srcLength = 0; dataIndex < length &&
srcLength < sizeof(buf); ) {
if ('=' == data[dataIndex]) {
if ('\r' == data[dataIndex+1]) {
dataIndex += 3;
} else if ('\n' == data[dataIndex+1]) {
dataIndex += 2;
} else {
buf[srcLength++] = 16*(strchr(alphabetHEX,
data[dataIndex+1])-alphabetHEX)
+ strchr(alphabetHEX,
data[dataIndex+2])-alphabetHEX;
dataIndex += 3;
}
} else {
buf[srcLength++] = data[dataIndex++];
}
}
} else {
src = data;
srcLength = length;
dataIndex = length;
allocated = 0;
}
if (charset) {
Tcl_ExternalToUtfDString(enc, src, srcLength, &tmpDs);
Tcl_DStringAppend(dsPtr,
Tcl_DStringValue(&tmpDs), Tcl_DStringLength(&tmpDs));
Tcl_DStringFree(&tmpDs);
} else {
Tcl_DStringAppend(dsPtr, src, srcLength);
}
}
if (charset) {
len = Tcl_DStringLength(dsPtr);
for (src = dst = Tcl_DStringValue(dsPtr); *src; src++) {
if (*src != '\r') {
*dst++ = *src;
} else {
len--;
}
}
Tcl_DStringSetLength(dsPtr, len);
}
return dsPtr;
}
/*
*----------------------------------------------------------------------
*
* CreateEncWord --
*
* Tres to create an encoded word (if needed) by the given string.
* It uses at most length bytes from raw and stores the result in
* dest. The result will be no more than maxUse characters.
*
* Results:
* Returns non-zero if the encoding was successful.
*
* Side effects:
* None.
*
*
*----------------------------------------------------------------------
*/
static int
CreateEncWord(Tcl_Interp *interp, Tcl_Encoding enc, const char *charset,
unsigned char *raw, int length, Tcl_DString *dest, int maxUse)
{
unsigned char buf[RFC2047_MAX_ENCODED_WORD_LENGTH+1],
buf2[RFC2047_MAX_ENCODED_WORD_LENGTH+1];
Tcl_EncodingState state;
int i, consumed, wrote, d;
/*
* Check if we must encode this
*/
for (i=0; i<length && raw[i] < 0x80; i++);
if (i == length) {
Tcl_DStringAppend(dest, raw, length);
return 1;
}
/*
* Nope, we must encode this. Adjust the max output size
*/
if (maxUse > sizeof(buf)-1) {
maxUse = sizeof(buf)-1;
}
/*
* Try to convert to external encoding
*/
if (TCL_OK != Tcl_UtfToExternal(interp, enc, raw, length,
TCL_ENCODING_START|TCL_ENCODING_END,
&state, buf2, sizeof(buf2),
&consumed, &wrote, NULL)
|| consumed != length) {
return 0;
}
/*
* Convert into quoted-printable, check that we have room all the time
*/
snprintf(buf, sizeof(buf), "=?%s?Q?", charset);
for (i=0, d=strlen(buf); i<wrote && d < maxUse-2; i++) {
if (' ' == buf2[i]) {
buf[d++] = '_';
} else if (buf2[i] & 0x80
|| !isprint(buf2[i])
|| '=' == buf2[i]
|| '_' == buf2[i]
|| '?' == buf2[i]) {
if (d+3 >= maxUse-2) {
return 0;
}
buf[d++] = '=';
buf[d++] = alphabetHEX[buf2[i]>>4];
buf[d++] = alphabetHEX[buf2[i]&0xf];
} else {
buf[d++] = buf2[i];
}
}
if (i < wrote) {
return 0;
}
buf[d++] = '?';
buf[d++] = '=';
Tcl_DStringAppend(dest, buf, d);
return 1;
}
/*
*----------------------------------------------------------------------
*
* RatEncodeHeaderLine --
*
* Encodes one header line according to MIME (rfc2047).
* The nameLength argument should tell how long the header name is in
* characters. This is so that the line folding can do its job properly.
*
* Results:
* A block of encoded header line. THis block of data will be valid
* until the next call to thius function.
*
* Side effects:
* None.
*
*
*----------------------------------------------------------------------
*/
char*
RatEncodeHeaderLine (Tcl_Interp *interp, Tcl_Obj *line, int nameLength)
{
static Tcl_DString ds;
static int initialized = 0;
Tcl_Obj **objv;
int i, objc, l, l1, pre = nameLength, maxUse;
char *s;
const char *charset;
Tcl_Encoding enc;
if (NULL == line) {
return NULL;
}
if (!initialized) {
Tcl_DStringInit(&ds);
initialized = 1;
} else {
Tcl_DStringSetLength(&ds, 0);
}
/*
* Find suitable encoding
*/
Tcl_ListObjGetElements(interp,
Tcl_GetVar2Ex(interp, "option",
"charset_candidates",
TCL_GLOBAL_ONLY),
&objc, &objv);
s = Tcl_GetStringFromObj(line, &l);
for (i=0; i<objc; i++) {
if (RatCheckEncoding(interp, Tcl_GetString(objv[i]), s, l)) {
break;
}
}
if (i<objc) {
charset = Tcl_GetString(objv[i]);
} else {
charset = Tcl_GetVar2(interp, "option", "charset", TCL_GLOBAL_ONLY);
}
enc = RatGetEncoding(interp, charset);
/*
* Do while we have characters left to consume
* - Find candidate for line-break
* - Loop while it can NOT be encoded into a word
* - Search backwards for new canidate
* - If no new canididate is found switch to test every character
*/
while (*s) {
if (strlen(s)+pre <= RFC2047_MAX_LINE_LENGTH) {
l = strlen(s);
} else {
for (l = RFC2047_MAX_LINE_LENGTH-pre; l>0 && !isspace(s[l]); l--);
if (0 == l) {
l = RFC2047_MAX_LINE_LENGTH-pre;
}
}
maxUse = RFC2047_MAX_LINE_LENGTH-pre;
while (!CreateEncWord(interp, enc, charset, s, l, &ds, maxUse)) {
for (l1 = l-1; l1 > 0 && !isspace(s[l1]); l1--);
if (0 < l1) {
l = l1;
} else {
maxUse = 1024;
l--;
}
}
s += l;
if (*s) {
Tcl_DStringAppend(&ds, "\r\n", 2);
for (pre=0; isspace(*s) && pre<RFC2047_MAX_LINE_LENGTH; s++,pre++){
Tcl_DStringAppend(&ds, s, 1);
}
if (0 == pre) {
Tcl_DStringAppend(&ds, " ", 1);
pre = 1;
}
}
}
Tcl_FreeEncoding(enc);
return Tcl_DStringValue(&ds);
}
/*
*----------------------------------------------------------------------
*
* RatEncodeAddresses --
*
* Encodes the fullname portions of a bunch of addreses.
*
* Results:
* None.
*
* Side effects:
* The fullnames of the addresses may change.
*
*
*----------------------------------------------------------------------
*/
void
RatEncodeAddresses(Tcl_Interp *interp, ADDRESS *adrPtr)
{
Tcl_Obj *oPtr;
char *cPtr;
while (adrPtr) {
if (adrPtr->personal) {
for (cPtr = adrPtr->personal; *cPtr; cPtr++) {
if (*cPtr & 0x80) {
oPtr = Tcl_NewStringObj(adrPtr->personal, -1);
cPtr = RatEncodeHeaderLine(interp, oPtr, 0);
Tcl_DecrRefCount(oPtr);
ckfree(adrPtr->personal);
adrPtr->personal = cpystr(cPtr);
}
}
}
adrPtr = adrPtr->next;
}
}
/*
*----------------------------------------------------------------------
*
* RatGetEncoding --
*
* Return the tcl-encoding attached to the given name. This name
* may be mapped from a MIME-name into a tcl-name.
*
* Results:
* A tcl Tcl_Endoding blob. The given encoding must be freed by the
* caller by calling Tcl_FreeEncoding().
*
* Side effects:
* None.
*
*
*----------------------------------------------------------------------
*/
Tcl_Encoding
RatGetEncoding(Tcl_Interp *interp, const char *name)
{
Tcl_Encoding enc;
const char *tclName;
char lname[256];
if (NULL == name) {
return NULL;
}
strlcpy(lname, name, sizeof(lname));
lcase(lname);
tclName = Tcl_GetVar2(interp, "charsetMapping", lname, TCL_GLOBAL_ONLY);
if (NULL == tclName) {
tclName = lname;
}
enc = Tcl_GetEncoding(interp, tclName);
if (NULL == enc) {
return NULL;
}
return enc;
}
/*
*----------------------------------------------------------------------
*
* RatCheckEncoding --
*
* Check if the given encoding can encode the given string
*
* Results:
* Non-zero if all characters in the give string can be encoded
* successfully
*
* Side effects:
* None.
*
*
*----------------------------------------------------------------------
*/
static int
RatCheckEncoding(Tcl_Interp *interp, char *encoding_name,
const char *string, int length)
{
Tcl_EncodingState state;
Tcl_Encoding enc;
char buf[1024];
int ret, in;
if (NULL == (enc = RatGetEncoding(interp, encoding_name))) {
return 0;
}
ret = 0;
while (length && TCL_CONVERT_UNKNOWN != ret) {
ret = Tcl_UtfToExternal(interp, enc, string, length,
TCL_ENCODING_STOPONERROR|TCL_ENCODING_START,
&state, buf, sizeof(buf),
&in, NULL, NULL);
string += in;
length -= in;
}
Tcl_FreeEncoding(enc);
return TCL_CONVERT_UNKNOWN != ret;
}
/*
*----------------------------------------------------------------------
*
* RatCheckEncodingsCmd --
*
* See ../doc/interface for a descriptions of arguments and result.
*
* Results:
* See above
*
* Side effects:
* None.
*
*
*----------------------------------------------------------------------
*/
int
RatCheckEncodingsCmd(ClientData dummy, Tcl_Interp *interp, int objc,
Tcl_Obj *const objv[])
{
int i, listLength, srcLen;
Tcl_Obj *oPtr, *vPtr;
char *src;
if (3 != objc) {
Tcl_AppendResult(interp, "Usage: ", Tcl_GetString(objv[0]), \
" variable charsets", (char*) NULL);
return TCL_ERROR;
}
vPtr = Tcl_GetVar2Ex(interp, Tcl_GetString(objv[1]), NULL, 0);
Tcl_ListObjLength(interp, objv[2], &listLength);
src = Tcl_GetStringFromObj(vPtr, &srcLen);
for (i=0; i<listLength; i++) {
Tcl_ListObjIndex(interp, objv[2], i, &oPtr);
if (RatCheckEncoding(interp, Tcl_GetString(oPtr), src, srcLen)) {
Tcl_SetObjResult(interp, oPtr);
return TCL_OK;
}
}
Tcl_SetResult(interp, "", TCL_STATIC);
return TCL_OK;
}
/*
*----------------------------------------------------------------------
*
* RatCode64 --
*
* Encode the given object in base64
*
* Results:
* A new Tcl_Obj
*
* Side effects:
* None.
*
*
*----------------------------------------------------------------------
*/
Tcl_Obj*
RatCode64(Tcl_Obj *sPtr)
{
Tcl_Obj *dPtr = Tcl_NewObj();
unsigned char *cPtr, buf[4];
int l, ll;
cPtr = (unsigned char*)Tcl_GetStringFromObj(sPtr, &l);
for (ll = 0; l > 0; l -= 3, cPtr += 3) {
buf[0] = alphabet64[cPtr[0] >> 2];
buf[1] = alphabet64[((cPtr[0] << 4) + (l>1 ? (cPtr[1]>>4) : 0))&0x3f];
buf[2] = l > 1 ?
alphabet64[((cPtr[1]<<2) + (l>2 ? (cPtr[2]>>6) : 0)) & 0x3f] : '=';
buf[3] = l > 2 ? alphabet64[cPtr[2] & 0x3f] : '=';
Tcl_AppendToObj(dPtr, (char*)buf, 4);
if (18 == ++ll || l < 4) {
Tcl_AppendToObj(dPtr, "\n", 1);
ll = 0;
}
}
return dPtr;
}
/*
*----------------------------------------------------------------------
*
* RatUtf8to16 --
*
* Convert the given utf-8 character to UCS-2
*
* Results:
* Returns the number of characters consumed from src
* On failure a negative number is returned.
*
* Side effects:
* None.
*
*
*----------------------------------------------------------------------
*/
static int
RatUtf8to16(const unsigned char *src, unsigned char *dst)
{
if (0 == (*src & 0x80)) {
dst[0] = 0;
dst[1] = *src;
return 1;
} else if (0xc0 == (*src & 0xe0)) {
if (!(src[1] & 0x80)) {
return 1;
}
dst[0] = (src[0] & 0x1f) >> 2;
dst[1] = ((src[0] & 0x03) << 6) + (src[1] & 0x3f);
return 2;
} else if (0xe0 == (*src & 0xf0)) {
if (!(src[1] & 0x80) && !(src[2] & 0x80)) {
return 1;
}
dst[0] = ((src[0] & 0x0f) << 4) + ((src[1] & 0x3f) >> 2);
dst[1] = ((src[1] & 0x03) << 6) + (src[2] & 0x3f);
return 3;
} else {
dst[0] = 0;
dst[1] = *src;
return 1;
}
}
/*
*----------------------------------------------------------------------
*
* RatUtf16to8 --
*
* Convert the given UCS-2 character to utf-8
*
* Results:
* Returns the length of the generated string on success.
* On failure a negative number is returned.
*
* Side effects:
* None.
*
*
*----------------------------------------------------------------------
*/
static int
RatUtf16to8(const unsigned char *src, unsigned char *dst)
{
if (src[0] >= 0x08) {
dst[0] = 0xe0 | (src[0] >> 4);
dst[1] = 0x80 | ((src[0] & 0x0f) << 2) | (src[1] >> 6);
dst[2] = 0x80 | (src[1] & 0x3f);
return 3;
} else if (src[0] || src[1] > 0x7f) {
dst[0] = 0xc0 | (src[0] << 2) | (src[1] >> 6);
dst[1] = 0x80 | (src[1] & 0x3f);
return 2;
} else {
dst[0] = src[1];
return 1;
}
}
/*
*----------------------------------------------------------------------
*
* RatUtf8toMutf7 --
*
* Convert the given utf-8 encoded text to modified utf-7
*
* Results:
* Returns a pointer to a static buffer containing the new text
*
* Side effects:
* None.
*
*
*----------------------------------------------------------------------
*/
char*
RatUtf8toMutf7(const char *signed_src)
{
static unsigned char *dst = NULL;
static int dstlen = 0;
unsigned char buf[3], *src = (unsigned char*)signed_src;
int len = 0, overflow = 0;
if (dstlen < strlen((char*)src)*3+1) {
dstlen = strlen((char*)src)*3;
dst = (unsigned char *)ckrealloc(dst, dstlen);
}
while (*src) {
if ('&' == *src) {
if (dstlen <= len+2) {
dstlen += 128;
dst = (unsigned char *)ckrealloc(dst, dstlen);
}
dst[len++] = '&';
dst[len++] = '-';
src++;
} else if (*src & 0x80) {
if (dstlen <= len+6) {
dstlen += 128;
dst = (unsigned char *)ckrealloc(dst, dstlen);
}
dst[len++] = '&';
do {
if (dstlen <= len+5) {
dstlen += 128;
dst = (unsigned char *)ckrealloc(dst, dstlen);
}
if (overflow) {
buf[0] = buf[3];
if (*src & 0x80) {
src += RatUtf8to16(src, buf+1);
} else {
buf[1] = buf[2] = 0;
}
overflow = 0;
} else {
src += RatUtf8to16(src, buf);
if (*src & 0x80) {
src += RatUtf8to16(src, buf+2);
overflow = 1;
} else {
buf[2] = buf[3] = 0;
}
}
dst[len++] = modified64[buf[0] >> 2];
dst[len++] = modified64[((buf[0] << 4) + (buf[1]>>4)) & 0x3f];
if (buf[1] || buf[2]) {
dst[len++] =
modified64[((buf[1]<<2) + (buf[2]>>6)) & 0x3f];
if (buf[2]) {
dst[len++] = modified64[buf[2] & 0x3f];
}
}
} while (*src & 0x80 || overflow);
if (strchr(modified64, *src) || '\0' == *src) {
dst[len++] = '-';
}
} else {
if (dstlen <= len+1) {
dstlen += 128;
dst = (unsigned char *)ckrealloc(dst, dstlen);
}
dst[len++] = *src++;
}
}
dst[len] = '\0';
return (char*)dst;
}
/*
*----------------------------------------------------------------------
*
* RatMutf7toUtf8 --
*
* Convert the given modified utf-7 encoded text to utf-8
*
* Results:
* Returns the length of the generated string on success.
* On failure a negative number is returned.
*
* Side effects:
* None.
*
*
*----------------------------------------------------------------------
*/
char*
RatMutf7toUtf8(const char *signed_src)
{
static unsigned char *dst = NULL;
static int dstlen = 0;
unsigned char utf16[2], lbuf[4], *src = (unsigned char*)signed_src;
int i, l, len=0, odd;
if (dstlen < strlen((char*)src)*3) {
dstlen = strlen((char*)src)*3;
dst = (unsigned char *)ckrealloc(dst, dstlen);
}
while (*src) {
if (len >= dstlen) {
dstlen += 128;
dst = (unsigned char *)ckrealloc(dst, dstlen);
}
if ('&' == *src && '-' == src[1]) {
dst[len++] = '&';
src += 2;
} else if ('&' == *src) {
src++;
odd = 0;
do {
for (i=0; i<4; i++) {
if (strchr(modified64, *src)) {
lbuf[i] = strchr(modified64, *src++) - modified64;
} else {
lbuf[i] = 0;
}
}
if (odd) {
odd = 0;
if (len >= dstlen+6) {
dstlen += 128;
dst = (unsigned char *)ckrealloc(dst, dstlen);
}
utf16[1] = (lbuf[0] << 2) | (lbuf[1] >> 4);
len += RatUtf16to8(utf16, dst+len);
utf16[0] = (lbuf[1] << 4) | (lbuf[2] >> 2);
utf16[1] = (lbuf[2] << 6) | lbuf[3];
if (utf16[0] != 0 || utf16[1] != 0) {
l = RatUtf16to8(utf16, dst+len);
len += l;
}
} else {
if (len >= dstlen+3) {
dstlen += 128;
dst = (unsigned char *)ckrealloc(dst, dstlen);
}
utf16[0] = (lbuf[0] << 2) | (lbuf[1] >> 4);
utf16[1] = (lbuf[1] << 4) | (lbuf[2] >> 2);
len += RatUtf16to8(utf16, dst+len);
utf16[0] = (lbuf[2] << 6) | lbuf[3];
odd = 1;
}
} while (strchr(modified64, *src));
if ('-' == *src) {
src++;
}
} else {
dst[len++] = *src++;
}
}
dst[len] = '\0';
return (char*)dst;
}
/*
*----------------------------------------------------------------------
*
* RatEncodeQP -
*
* Encode the given text to QP
*
* Results:
* Returns an intialized Tcl_DString pointer. It is up to the caller to
* free this when not needing it anymore.
*
* Side effects:
* None.
*
*
*----------------------------------------------------------------------
*/
Tcl_DString*
RatEncodeQP(const unsigned char *line)
{
Tcl_DString *ds = (Tcl_DString*)ckalloc(sizeof(*ds));
const unsigned char *c;
unsigned char buf[4];
Tcl_DStringInit(ds);
for (c=line; *c; c++) {
if ('=' == *c || 0x80 <= *c) {
snprintf(buf, sizeof(buf), "=%02X", *c);
Tcl_DStringAppend(ds, buf, 3);
} else {
Tcl_DStringAppend(ds, c, 1);
}
}
return ds;
}
/*
*----------------------------------------------------------------------
*
* RatEncodeQPCmd --
*
* See ../doc/interface
*
* Results:
* A standard tcl result
*
* Side effects:
* None
*
*
*----------------------------------------------------------------------
*/
int
RatEncodeQPCmd(ClientData dummy, Tcl_Interp *interp, int objc,
Tcl_Obj *const objv[])
{
Tcl_Encoding enc;
Tcl_DString ext, *encoded;
if (objc != 3) {
Tcl_AppendResult(interp, "Bad usage", TCL_STATIC);
return TCL_ERROR;
}
enc = Tcl_GetEncoding(interp, Tcl_GetString(objv[1]));
Tcl_UtfToExternalDString(enc, Tcl_GetString(objv[2]), -1, &ext);
encoded = RatEncodeQP(Tcl_DStringValue(&ext));
Tcl_DStringFree(&ext);
Tcl_DStringResult(interp, encoded);
Tcl_FreeEncoding(enc);
ckfree(encoded);
return TCL_OK;
}
/*
*----------------------------------------------------------------------
*
* RatDecodeQP -
*
* Dencode the given text from QP
*
* Results:
* Returns a pointer to a string. This string has been allocated with
* ckalloc and it is up to the caller to free it when not needing it.
*
* Side effects:
* None.
*
*
*----------------------------------------------------------------------
*/
unsigned char*
RatDecodeQP(unsigned char *line)
{
unsigned char *s, *d;
d = s = line;
while (*s) {
if ('=' == *s && isxdigit(s[1]) && isxdigit(s[2])) {
*d++ = ((strchr(alphabetHEX, s[1])-alphabetHEX)<<4) +
(strchr(alphabetHEX, s[2])-alphabetHEX);
s += 3;
} else {
*d++ = *s++;
}
}
*d = '\0';
return line;
}
/*
*----------------------------------------------------------------------
*
* RatDecodeQPCmd --
*
* See ../doc/interface
*
* Results:
* A standard tcl result
*
* Side effects:
* None
*
*
*----------------------------------------------------------------------
*/
int
RatDecodeQPCmd(ClientData dummy, Tcl_Interp *interp, int objc,
Tcl_Obj *const objv[])
{
Tcl_Encoding enc;
Tcl_DString utf;
char *text;
if (objc != 3) {
Tcl_AppendResult(interp, "Bad usage", TCL_STATIC);
return TCL_ERROR;
}
enc = Tcl_GetEncoding(interp, Tcl_GetString(objv[1]));
text = cpystr(Tcl_GetString(objv[2]));
RatDecodeQP(text);
Tcl_ExternalToUtfDString(enc, text, -1, &utf);
ckfree(text);
Tcl_DStringResult(interp, &utf);
Tcl_FreeEncoding(enc);
return TCL_OK;
}
/*
* Test code for Mutf7 <-> utf8 functions
static void
Test(unsigned char *in)
{
unsigned char stage1[1024], stage2[1024];
printf("In: %s\n", in); fflush(stdin);
RatUtf8toMutf7(in, stage1, sizeof(stage1));
printf("Stage1: %s\n", stage1); fflush(stdin);
RatMutf7toUtf8(stage1, stage2, sizeof(stage2));
printf("Stage2: %s\n", stage2); fflush(stdin);
if (strcmp(stage2, in)) {
printf("ERROR\n");
}
printf("\n");
}
int main()
{
Test("får");
Test("Räksmörgås");
Test("Ã¥");
Test("åä");
Test("åäö");
Test("åäöå");
Test("åäöåä");
Test("åäöåäö");
return 0;
} */
syntax highlighted by Code2HTML, v. 0.9.1