#! /bin/sh ( cat <<\/eoc #include void printutf8 (unichar) unsigned long unichar; { /* this is literal output, not C string preparation; so in contrast to the mk* scripts, do not apply \ escaping */ if (unichar < 0x80) { printf ("%c", unichar); } else if (unichar < 0x800) { printf ("%c", 0xC0 | (unichar >> 6)); printf ("%c", 0x80 | (unichar & 0x3F)); } else if (unichar < 0x10000) { printf ("%c", 0xE0 | (unichar >> 12)); printf ("%c", 0x80 | ((unichar >> 6) & 0x3F)); printf ("%c", 0x80 | (unichar & 0x3F)); } else if (unichar < 0x200000) { printf ("%c", 0xF0 | (unichar >> 18)); printf ("%c", 0x80 | ((unichar >> 12) & 0x3F)); printf ("%c", 0x80 | ((unichar >> 6) & 0x3F)); printf ("%c", 0x80 | (unichar & 0x3F)); } else if (unichar < 0x4000000) { printf ("%c", 0xF8 | (unichar >> 24)); printf ("%c", 0x80 | ((unichar >> 18) & 0x3F)); printf ("%c", 0x80 | ((unichar >> 12) & 0x3F)); printf ("%c", 0x80 | ((unichar >> 6) & 0x3F)); printf ("%c", 0x80 | (unichar & 0x3F)); } else if (unichar < 0x80000000) { printf ("%c", 0xFC | (unichar >> 30)); printf ("%c", 0x80 | ((unichar >> 24) & 0x3F)); printf ("%c", 0x80 | ((unichar >> 18) & 0x3F)); printf ("%c", 0x80 | ((unichar >> 12) & 0x3F)); printf ("%c", 0x80 | ((unichar >> 6) & 0x3F)); printf ("%c", 0x80 | (unichar & 0x3F)); } } int prio (unichar) unsigned long unichar; { /* 4E00..9FFF; CJK Unified Ideographs 3400..4DBF; CJK Unified Ideographs Extension A 20000..2A6DF; CJK Unified Ideographs Extension B 2E80..2EFF; CJK Radicals Supplement F900..FAFF; CJK Compatibility Ideographs 2F800..2FA1F; CJK Compatibility Ideographs Supplement 3300..33FF; CJK Compatibility FE30..FE4F; CJK Compatibility Forms 3000..303F; CJK Symbols and Punctuation 3200..32FF; Enclosed CJK Letters and Months */ if (unichar >= 0x4E00 && unichar <= 0x9FFF) { /* CJK Unified Ideographs */ return 1; } else if (unichar >= 0x3400 && unichar <= 0x4DBF) { /* CJK Unified Ideographs Extension A */ return 2; } else if (unichar >= 0x20000 && unichar <= 0x2A6DF) { /* CJK Unified Ideographs Extension B */ return 3; } else if (unichar >= 0x2E80 && unichar <= 0x2EFF) { /* CJK Radicals Supplement */ return 11; } else if (unichar >= 0xF900 && unichar <= 0xFAFF) { /* CJK Compatibility Ideographs */ return 21; } else if (unichar >= 0x2F800 && unichar <= 0x2FA1F) { /* CJK Compatibility Ideographs Supplement */ return 22; } else if (unichar >= 0x3300 && unichar <= 0x33FF) { /* CJK Compatibility */ return 23; } else if (unichar >= 0xFE30 && unichar <= 0xFE4F) { /* CJK Compatibility Forms */ return 31; } else if (unichar >= 0x3000 && unichar <= 0x303F) { /* CJK Symbols and Punctuation */ return 41; } else if (unichar >= 0x3200 && unichar <= 0x32FF) { /* Enclosed CJK Letters and Months */ return 42; } else { return 99; } } int main () { /eoc case "$1" in -d) shift cat $* | sed -e 's/\(["%\\]\)/\\\1/g' -e 's/^/ printf ("/' -e 's/$/\\n");/' \ -e 's/U+\([0-9A-Fa-f][0-9A-Fa-f]*\)/"); printf ("%02d %ld ", prio (0x\1), 0x\1); printutf8 (0x\1); printf (" U+\1/' ;; -p) shift cat $* | sed -e 's/\(["%\\]\)/\\\1/g' -e 's/^/ printf ("/' -e 's/$/\\n");/' \ -e 's/U+\([0-9A-Fa-f][0-9A-Fa-f]*\)/"); printf ("%02d ", prio (0x\1)); printf ("U+\1/' ;; *) case "$1" in -u) shift sed -e "s,^\([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]*[^0-9A-Za-z]\),U+\1," $*;; *) cat $*;; esac | sed -e 's/\(["%\\]\)/\\\1/g' -e 's/^/ printf ("/' -e 's/$/\\n");/' \ -e 's/U+\([0-9A-Fa-f][0-9A-Fa-f]*\)/"); printutf8 (0x\1); printf (" U+\1/' ;; esac cat < insutf8.c cc -o insutf8.exe insutf8.c && ./insutf8.exe rm -f insutf8.c insutf8.exe