#! /bin/sh # extract table of character categories from Unicode data description file sed=true if make UCD.html >&2 then true else echo Could not acquire Unicode description file UCD.html >&2 exit 1 fi case "$1" in -sed) sed=true; shift;; -h) sed=false; shift;; esac ( echo ' {"L&", "Letter"},' echo ' {"Sc", "Currency Symbol"},' sed -e '// b' -e d UCD.html | sed -e 's/]*> *\([^<(]*\) *.*/\1/' -e t -e d | sed -e 's/^ *//' -e 's/ *$//' -e 's/Other//' | sed -e 's/$/@@/' -e N -e 's/\(.*\)@@.\(.*\)/ {"\1"; "\2"};/' \ -e 's/, */"; "/' -e 's/;/,/g' -e 's/, ""//' ) | if $sed then # make sed script sed -e 's/ {"\([^"]*\)", "\([^"]*\)".*/s@"\1"@"\2"@/' > categors.sed else # make C table cat > categors.t fi