/*======================================================================*\ |* Editor mined *| |* Text editing functions *| \*======================================================================*/ #include "mined.h" #include "textfile.h" /* default_lineend */ #include "io.h" #include "termprop.h" /* utf8_input */ /*======================================================================*\ |* Local function forward declarations *| \*======================================================================*/ static void S _((character newchar)); /*======================================================================*\ |* Global variables *| \*======================================================================*/ FLAG redraw_pending = False; /* was a redraw suppressed in find_y ? */ /*======================================================================*\ |* Character properties *| \*======================================================================*/ static struct scriptentry scripttable [] = { #include "scripts.t" }; /* Determine script info of Unicode character according to script range table. */ static struct scriptentry * scriptinfo (ucs) unsigned long ucs; { int min = 0; int max = sizeof (scripttable) / sizeof (struct scriptentry) - 1; int mid; /* binary search in table */ while (max >= min) { mid = (min + max) / 2; if (scripttable [mid].last < ucs) { min = mid + 1; } else if (scripttable [mid].first > ucs) { max = mid - 1; } else if (scripttable [mid].first <= ucs && scripttable [mid].last >= ucs) { return & scripttable [mid]; } } return 0; } char * script (ucs) unsigned long ucs; { struct scriptentry * se = scriptinfo (ucs); if (se) { return se->scriptname; } else { return ""; } } char * category (ucs) unsigned long ucs; { struct scriptentry * se = scriptinfo (ucs); if (se) { return se->categoryname; } else { return ""; } } int is_right_to_left (ucs) unsigned long ucs; { if (ucs < 0x0590) return 0; return (ucs >= 0x0590 && ucs <= 0x05FF) /* Hebrew */ || (ucs >= 0xFB1D && ucs <= 0xFB4F) /* Hebrew presentation forms */ || (ucs >= 0x0600 && ucs <= 0x07BF) /* Arabic, Syriac, Thaana */ || (ucs >= 0xFB50 && ucs <= 0xFDFF) /* Arabic presentation forms A */ || (ucs >= 0xFE70 && ucs <= 0xFEFF) /* Arabic presentation forms B */ || (ucs == 0x200F) /* right-to-left mark */ #ifdef RLmarks || (ucs == 0x202B) /* right-to-left embedding */ || (ucs == 0x202E) /* right-to-left override */ #endif || (ucs >= 0x07C0 && ucs <= 0x07FF) /* NKo */ || (ucs >= 0x10800 && ucs <= 0x1083F) /* Cypriot Syllabary */ || (ucs >= 0x10900 && ucs <= 0x1091F) /* Phoenician */ || (ucs >= 0x10A00 && ucs <= 0x10A5F) /* Kharoshthi */ ; } static struct hanentry * lookup_handescr (unichar) unsigned long unichar; { int min = 0; int max = hantable_len - 1; int mid; /* binary search in table */ while (max >= min) { mid = (min + max) / 2; if (hantable [mid].unicode < unichar) { min = mid + 1; } else if (hantable [mid].unicode > unichar) { max = mid - 1; } else { return & hantable [mid]; } } return 0; } static FLAG idfchar (cpoi) character * cpoi; { unsigned long unichar = unicodevalue (cpoi); if ('0' <= unichar && unichar <= '9') { return True; } else if (unichar == '_' || unichar == '$') { return True; } else { char * cat = category (unichar); return streq (cat, "Letter") || streq (cat, "Mark"); } } FLAG isLetter (unichar) unsigned long unichar; { char * cat = category (unichar); return streq (cat, "Letter"); } /** charcode () returns the encoded character value of a Unicode character */ static unsigned long charcode (code) unsigned long code; { if (cjk_text || mapped_text) { return encodedchar (code); } else { return code; } } FLAG iswide_unichar (ucs) unsigned long ucs; { int save_width_data_version = width_data_version; int w; width_data_version = WIDTH_DATA_MAX; w = iswide (ucs); width_data_version = save_width_data_version; return w; } /*======================================================================*\ |* Scrolling *| \*======================================================================*/ /* * Perform a forward scroll. It returns ERRORS if we're at the last line of * the file. */ static int forward_scroll (update) FLAG update; { if (bot_line->next == tail) { /* Last line of file. No dice */ return ERRORS; } top_line = top_line->next; bot_line = bot_line->next; cur_line = cur_line->next; line_number ++; /* Perform the scroll on screen */ if (update) { clean_menus (); scroll_forward (); scrollbar_scroll_up (0); set_cursor (0, SCREENMAX); line_print (SCREENMAX, bot_line); } return FINE; } /* * Perform a backwards scroll. It returns ERRORS if we're at the first line * of the file. It updates the display completely if update is True. * Otherwise it leaves that to the caller (page up function). */ static int reverse_scroll (update) FLAG update; { if (top_line->prev == header) { /* Top of file. Can't scroll */ return ERRORS; } if (last_y != SCREENMAX) { /* Reset last_y if necessary */ last_y ++; } else { bot_line = bot_line->prev; /* Else adjust bot_line */ } top_line = top_line->prev; cur_line = cur_line->prev; line_number --; /* Perform the scroll on screen */ if (update) { if (can_add_line || can_scroll_reverse) { clean_menus (); if (MENU && can_add_line) { add_line (0); } else { set_cursor (0, - MENU); scroll_reverse (); } scrollbar_scroll_down (0); set_cursor (0, YMAX); /* Erase very bottom line */ clear_lastline (); if (MENU && ! can_add_line) { displaymenuline (); } set_cursor (0, 0); line_print (0, top_line); } else { display (0, top_line, last_y, y); } } return FINE; } /*======================================================================*\ |* Text/line insertion and deletion *| \*======================================================================*/ /* * text_bytes_of () returns the number of bytes in the string 'string' * up to and excluding the first '\n'. */ static int text_bytes_of (string) register char * string; { register int count = 0; if (string != NIL_PTR) { while (* string != '\0' && * string != '\n') { string ++; count ++; } } return count; } /* * Determine new syntax status, given previous status and current * text pointer. */ char syntax_state (prev, s) char prev; char * s; { switch (* s) { case '<': if (strncmp (s, "<%", 2) == 0) { return prev | syntax_JSP; } else if (prev & syntax_JSP) { return prev; } else if (strncmp (s, "", 3) == 0) { return prev & ~ syntax_comment; } else { return prev; } case '>': if (prev & syntax_JSP) { return prev; } else { return prev & ~ syntax_HTML; } default: return prev; } /* if (strncmp (s, "<%", 2) == 0) { return prev | syntax_JSP; } else if (strncmp (s, "%>", 2) == 0) { return prev & ~ syntax_JSP; } else if (prev & syntax_JSP) { return prev; } else if (strncmp (s, "", 3) == 0) { return prev & ~ syntax_comment; } else if (* s == '<') { return prev | syntax_HTML; } else if (* s == '>') { return prev & ~ syntax_HTML; } else { return prev; } */ } /* * Determine status of HTML on this line: * line->syntax_marker is a bitmask indicating in which kinds of * syntax constructs the line ends * If changing, continue with subsequent lines. */ void update_text_state (line) LINE * line; { char * lpoi = line->text; char syntax_marker = line->prev->syntax_marker; /* state at line begin */ char old_syntax_marker = line->syntax_marker; /* previous state of line */ if (dim_HTML == False) { return; } while (* lpoi != '\0') { syntax_marker = syntax_state (syntax_marker, lpoi); advance_char (& lpoi); } line->syntax_marker = syntax_marker; if (syntax_marker != old_syntax_marker && line->next != tail) { update_text_state (line->next); } } /* * make_line installs the buffer into a LINE structure. * It returns a pointer to the allocated structure. */ static LINE * make_line (buffer, length, return_type) char * buffer; int length; lineend_type return_type; { register LINE * new_line = alloc_header (); if (new_line == NIL_LINE) { ring_bell (); error ("Cannot allocate more memory for new line header"); return NIL_LINE; } else { new_line->text = alloc (length + 1); if (new_line->text == NIL_PTR) { ring_bell (); error ("Cannot allocate more memory for new line"); return NIL_LINE; } else { new_line->shift_count = 0; new_line->return_type = return_type; strncpy (new_line->text, buffer, length); new_line->text [length] = '\0'; new_line->syntax_marker = syntax_none; /* undetermined */ return new_line; } } } /* * Line_insert () inserts a new line with text pointed to by 'string'. * It returns the address of the new line. */ LINE * line_insert (line, string, len, return_type) register LINE * line; char * string; int len; lineend_type return_type; { register LINE * new_line; /* Allocate space for LINE structure and text */ new_line = make_line (string, len, return_type); if (new_line != NIL_LINE) { /* Install the line into the double linked list */ new_line->prev = line; new_line->next = line->next; line->next = new_line; new_line->next->prev = new_line; /* Adjust information about text attribute state (HTML marker) */ update_text_state (new_line); /* Increment total_lines */ total_lines ++; } return new_line; } /* * Insert_text () inserts the string 'string' at the given line and location. Do not pass a string with an embedded (non-terminating) newline! Make sure cur_text is properly reset afterwards! (may be left undefined) */ int insert_text (line, location, string) register LINE * line; char * location; char * string; { register char * bufp = text_buffer; /* Buffer for building line */ register char * textp = line->text; char * newbuf; lineend_type old_return_type; lineend_type new_return_type; if (viewonly) { viewonlyerr (); return ERRORS; } if (length_of (textp) + text_bytes_of (string) >= MAX_CHARS) { error ("Line too long"); return ERRORS; } /* Copy part of line until 'location' has been reached */ while (textp != location) { * bufp ++ = * textp ++; } /* Insert string at this location */ while (* string != '\0') { * bufp ++ = * string ++; } * bufp = '\0'; /* First, allocate memory for next line contents to make sure the */ /* operation succeeds or fails as a whole */ newbuf = alloc (length_of (text_buffer) + length_of (location) + 1); if (newbuf == NIL_PTR) { ring_bell (); error ("Cannot allocate memory for insertion"); return ERRORS; } else { /* Install the new text in this line */ if (* (string - 1) == '\n') { /* Insert a new line */ old_return_type = line->return_type; if (old_return_type == lineend_NUL || old_return_type == lineend_NONE) { /* line->return_type = top_line->return_type; */ line->return_type = default_lineend; new_return_type = old_return_type; } else if (utf8_lineends && ((keyshift & ctrlshift_mask) || (hop_flag > 0))) { if (keyshift & ctrl_mask) { line->return_type = lineend_LS; } else { line->return_type = lineend_PS; } new_return_type = old_return_type; } else if (old_return_type == lineend_LS || old_return_type == lineend_PS) { if (hop_flag > 0) { line->return_type = lineend_PS; } else { line->return_type = lineend_LS; } new_return_type = old_return_type; } else { new_return_type = old_return_type; } if (line_insert (line, location, length_of (location), new_return_type) == NIL_LINE) { return ERRORS; } set_modified (); } else { /* Append last part of line to text_buffer */ copy_string (bufp, location); } free_space (line->text); set_modified (); line->text = newbuf; copy_string (line->text, text_buffer); update_text_state (line); return FINE; } } /* * Line_delete () deletes the argument line out of the line list. * The pointer to the next line is returned. */ static LINE * line_delete (line) register LINE * line; { register LINE * next_line = line->next; line->prev->return_type = line->return_type; /* Delete the line */ line->prev->next = line->next; line->next->prev = line->prev; /* Free allocated space */ free_space (line->text); free_header (line); /* Decrement total_lines */ total_lines --; return next_line; } /* * Delete_text () deletes all the characters (including newlines) between * startposition and endposition and fixes the screen accordingly. * It displays the number of lines deleted. */ int delete_text (start_line, start_textp, end_line, end_textp) LINE * start_line; char * start_textp; LINE * end_line; char * end_textp; { register char * textp = start_line->text; register char * bufp = text_buffer; /* Storage for new line->text */ LINE * line; LINE * after_end = end_line->next; int line_cnt = 0; /* Nr of lines deleted */ int count = 0; int shift = 0; /* Used in shift calculation */ int nx = x; int ret = FINE; char * newbuf; int newpos_offset = start_textp - textp; FLAG isdeleting_lastcombining = False; int redraw_cols = 0; if (viewonly) { viewonlyerr (); return ret; } set_modified (); /* File will have been modified */ if (combining_mode && encoding_has_combining ()) { unsigned long unichar = unicodevalue (start_textp); if (iscombined (unichar, start_textp, start_line->text)) { char * cp = start_textp; advance_char (& cp); unichar = unicodevalue (cp); if (! iscombining_unichar (unichar)) { isdeleting_lastcombining = True; cp = start_textp; do { precede_char (& cp, start_line->text); unichar = unicodevalue (cp); } while (cp != start_line->text && iscombining_unichar (unichar)); if (unichar == '\t') { redraw_cols = 0; } else if (iswide (unichar)) { redraw_cols = 2; } else { redraw_cols = 1; } } } } /* Set up new line. Copy first part of start line until start_position. */ while (textp < start_textp) { * bufp ++ = * textp ++; count ++; } /* Check if line doesn't exceed MAX_CHARS */ if (count + length_of (end_textp) >= MAX_CHARS) { error ("Line too long"); return ret; } /* Copy last part of end_line if end_line is not tail */ copy_string (bufp, (end_textp != NIL_PTR) ? end_textp : "\n"); /* Delete all lines between start and end_position (including end_line) */ line = start_line->next; while (line != after_end && line != tail) { /* Here, the original mined compared with end_line->next which has already been discarded when the comparison should become true. This severe error remained undetected until I ported to MSDOS */ line = line_delete (line); line_cnt ++; } /* Check if last line of file should be deleted */ if (end_textp == NIL_PTR && length_of (start_line->text) == 1 && total_lines > 1) { start_line = start_line->prev; (void) line_delete (start_line->next); line_cnt ++; } else { /* Install new text */ newbuf = alloc (length_of (text_buffer) + 1); if (newbuf == NIL_PTR) { ring_bell (); error ("No more memory after deletion"); ret = ERRORS; } else { free_space (start_line->text); start_line->text = newbuf; copy_string (start_line->text, text_buffer); update_text_state (start_line); } } /* Update screen */ if (line_cnt == 0) { /* Check if only one line changed */ if (shift > 0) { /* Reprint whole line */ set_cursor (0, y); line_print (y, start_line); move_to (nx, y); /* Reset cur_text */ } else { /* Just display last part of line */ move_address (cur_line->text + newpos_offset, y); if (isdeleting_lastcombining) { if (redraw_cols == 0 || proportional) { set_cursor (0, y); line_print (y, start_line); } else { set_cursor (x - redraw_cols, y); put_line (y, start_line, x - redraw_cols, True, False); } } else { put_line (y, start_line, x, True, False); } set_cursor_xy (); } return ret; } shift = last_y; /* Save value */ reset (top_line, y); if ((line_cnt <= SCREENMAX - y) && can_delete_line) { clear_status (); display (y, start_line, 0, y); line = proceed (start_line, SCREENMAX - y - line_cnt + 1); while (line_cnt -- > 0) { delete_line (y + 1); scrollbar_scroll_up (y + 1); if (line != tail) { set_cursor (0, SCREENMAX); line_print (SCREENMAX, line); line = line->next; } } } else { display (y, start_line, shift - y, y); } move_to (nx, y); return ret; } /*======================================================================*\ |* Move commands *| \*======================================================================*/ /* * Move one line up. */ void MUP () { if ((keyshift & ctrlshift_mask) == ctrlshift_mask) { keyshift = 0; MPPARA (); return; } else if (keyshift & shift_mask) { keyshift = 0; HIGH (); return; } if (hop_flag > 0) { HIGH (); } else if (y == 0) { /* Top line of screen. Scroll one line */ if (reverse_scroll (True) != ERRORS) { move_y (y); } } else { /* Move to previous line */ move_y (y - 1); } } /* * Move one line down. */ void MDN () { if ((keyshift & ctrlshift_mask) == ctrlshift_mask) { keyshift = 0; MNPARA (); return; } else if (keyshift & shift_mask) { keyshift = 0; LOW (); return; } if (hop_flag > 0) { LOW (); } else if (y == last_y) { /* Last line of screen. Scroll one line */ if (bot_line->next == tail && bot_line->text [0] != '\n') { return; } else { (void) forward_scroll (True); move_y (y); } } else { /* Move to next line */ move_y (y + 1); } } /* * Move to end of next line (for pico mode). */ void MDNEL () { MDN (); ELINE (); } /* * Move left one Unicode character (may enter into combined character). */ static void ctrl_MLF () { char * curpoi; if (hop_flag > 0) { BLINE (); } else if (cur_text == cur_line->text) {/* Begin of line */ if (cur_line->prev != header) { MUP (); /* Move one line up */ move_to (LINE_END, y); } } else { curpoi = cur_text; precede_char (& curpoi, cur_line->text); move_address (curpoi, y); } } /* * Move right one Unicode character (may enter into combined character). */ static void ctrl_MRT () { char * curpoi; if (hop_flag > 0) { ELINE (); } else if (* cur_text == '\n') { if (cur_line->next != tail) { /* Last char of file */ MDN (); /* Move one line down */ move_to (LINE_START, y); } } else { curpoi = cur_text; advance_char (& curpoi); move_address (curpoi, y); } } /* * Move left one position. */ void MLF () { if ((keyshift & ctrlshift_mask) == ctrlshift_mask) { keyshift = 0; BLINE (); return; } if (keyshift & ctrl_mask) { keyshift = 0; ctrl_MLF (); return; } if (keyshift & shift_mask) { keyshift = 0; MPW (); return; } if (hop_flag > 0) { BLINE (); } else if (x == 0 && cur_line->shift_count == 0) { /* Begin of line */ if (cur_line->prev != header) { MUP (); /* Move one line up */ move_to (LINE_END, y); } } else { move_to (x - 1, y); } } /* * Move right one position. */ void MRT () { if ((keyshift & ctrlshift_mask) == ctrlshift_mask) { keyshift = 0; ELINE (); return; } if (keyshift & ctrl_mask) { keyshift = 0; ctrl_MRT (); return; } if (keyshift & shift_mask) { keyshift = 0; MNW (); return; } if (hop_flag > 0) { ELINE (); } else if (* cur_text == '\n') { if (cur_line->next != tail) { /* Last char of file */ MDN (); /* Move one line down */ move_to (LINE_START, y); } } else { move_to (x + 1, y); } } /* * Move to top of screen */ void HIGH () { move_y (0); } /* * Move to bottom of screen */ void LOW () { move_y (last_y); } /* * Move to begin of line. */ void BLINE () { move_to (LINE_START, y); } /* * Move to end of line. */ void ELINE () { move_to (LINE_END, y); } /* * GOTO () prompts for a linenumber and moves to that line. */ void goline (number) int number; { LINE * line; if (number <= 0 || (line = proceed (header->next, number - 1)) == tail) { error2 ("Invalid line number: ", dec_out ((long) number)); } else { Pushmark (); clear_status (); move_y (find_y (line)); } } void goproz (number) int number; { goline ((long) (total_lines - 1) * number / 100 + 1); } void GOTO () { unsigned long c; int end; int number; if (MENU) { hop_flag = 1; displayflags (); set_cursor_xy (); flush (); hop_flag = 0; } if (! char_ready_within (500)) { status_msg ("HOP/Go: type command (to fortify) or number (to go to ...) ..."); } if (quit) { return; } c = readcharacter (); if (quit) { return; } if ('0' <= c && c <= '9') { if (lines_per_page > 0) { end = get_number ("...number [% | p(age | m(ark | g(o marker | f(ile #]", c, & number); } else { end = get_number ("...number [% | m(ark | g(o marker | f(ile #]", c, & number); } if (end == '%') { goproz (number); } else if (end == 'm' || end == 'M' || end == ',') { MARKn (number); } else if (end == '\'' || end == '.' || end == 'g' || end == 'G') { GOMAn (number); } else if (end == 'f' || end == 'F' || end == '#') { edit_nth_file (number); } else if (lines_per_page > 0 && (end == 'p' || end == 'P') && number > 0) { goline (number * lines_per_page - lines_per_page + 1); } else if (end != ERRORS) { goline (number); } return; } else { clear_status (); hop_flag = 1; invoke_key_function (c); return; } } /* * Scroll forward one page or to eof, whatever comes first. (Bot_line becomes * top_line of display.) Try to leave the cursor on the same line. If this is * not possible, leave cursor on the line halfway the page. */ void PD () { register int i; int new_y; if (keyshift & ctrl_mask) { keyshift = 0; SD (); return; } if (hop_flag > 0) { hop_flag = 0; EFILE (); return; } for (i = 0; i < SCREENMAX; i ++) { if (forward_scroll (page_scroll) == ERRORS) { break; /* EOF reached */ } } if (y - i < 0) { /* Line no longer on screen */ new_y = page_stay ? 0 : SCREENMAX >> 1; } else { new_y = y - i; } if (page_scroll == False) { display (0, top_line, last_y, new_y); } else if (MENU && ! can_delete_line) { displaymenuline (); } move_y (new_y); } /* * Scroll backwards one page or to top of file, whatever comes first. * (Top_line becomes bot_line of display). * The very bottom line (YMAX) is always blank. * Try to leave the cursor on the same line. * If this is not possible, leave cursor on the line halfway the page. */ void PU () { register int i; int new_y; if (keyshift & ctrl_mask) { keyshift = 0; SU (); return; } if (hop_flag > 0) { hop_flag = 0; BFILE (); return; } for (i = 0; i < SCREENMAX; i ++) { if (reverse_scroll (page_scroll) == ERRORS) { /* should also flag reverse_scroll that clearing of bottom line is not desired */ break; /* Top of file reached */ } } if (y + i > SCREENMAX) { /* line no longer on screen */ new_y = page_stay ? last_y : SCREENMAX >> 1; } else { new_y = y + i; } if (can_scroll_reverse && page_scroll) { set_cursor (0, YMAX); /* Erase very bottom line */ clear_lastline (); } else { display (0, top_line, last_y, new_y); } move_y (new_y); } /* * Go to top of file, scrolling if possible, else redrawing screen. */ void BFILE () { Pushmark (); if (proceed (top_line, - SCREENMAX) == header) { PU (); /* It fits. Let PU do it */ } else { reset (header->next, 0); /* Reset top_line, etc. */ RD_y (0); /* Display full page */ } move_to (LINE_START, 0); } /* * Go to last position of text, scrolling if possible, else redrawing screen */ void EFILE () { Pushmark (); if (proceed (bot_line, SCREENMAX) == tail) { PD (); /* It fits. Let PD do it */ } else { reset (proceed (tail->prev, - SCREENMAX), SCREENMAX); RD_y (last_y); /* Display full page */ } move_to (LINE_END, last_y); } /* * Scroll one line up. Leave the cursor on the same line (if possible). */ void SU () { register int i; if (hop_flag > 0) { hop_flag = 0; for (i = 0; i < (SCREENMAX >> 1); i ++) { if (i > 0 && disp_scrollbar) { (void) display_scrollbar (True); } SU (); } return; } if (reverse_scroll (True) != ERRORS) { /* else we are at top of file */ move_y ((y == SCREENMAX) ? SCREENMAX : y + 1); } } /* * Scroll one line down. Leave the cursor on the same line (if possible). */ void SD () { register int i; if (hop_flag > 0) { hop_flag = 0; for (i = 0; i < (SCREENMAX >> 1); i ++) { if (i > 0 && disp_scrollbar) { (void) display_scrollbar (True); } SD (); } return; } if (forward_scroll (True) != ERRORS) { move_y ((y == 0) ? 0 : y - 1); } } /*----------------------------------------------------------------------*\ Contents-dependent moves \*----------------------------------------------------------------------*/ /* * A word was previously defined as a number of non-blank characters * separated by tabs, spaces or linefeeds. * By consulting idfchar (), sequences of real letters only or digits * or underlines are recognized as words. */ /* * BSEN () and ESEN () look for the beginning or end of the current sentence. */ void BSEN () { search_for ("[;.]", REVERSE, False); } void ESEN () { search_for ("[;.]", FORWARD, False); } /* * MNPARA () and MPPARA () look for end or beginning of current paragraph. */ void MNPARA () { do { if (cur_line->next == tail) { ELINE (); break; } if (JUSmode == 0 && cur_line->text [strlen (cur_line->text) - 2] != ' ') { MDN (); BLINE (); break; } MDN (); if (JUSmode == 1 && * (cur_line->text) == '\n') { break; } } while (True); } void MPPARA () { if (JUSmode == 0 && cur_text == cur_line->text) { /* prevent sticking if already at paragraph beginning */ MUP (); } do { if (cur_line->prev == header) { BLINE (); break; } if (JUSmode == 0 && cur_line->prev->text [strlen (cur_line->prev->text) - 2] != ' ') { BLINE (); break; } MUP (); if (JUSmode == 1 && * (cur_line->text) == '\n') { break; } } while (True); } static void search_tag (poi) char * poi; { char pat [maxLINE_LEN]; FLAG direction = FORWARD; char * patpoi = & pat [3]; strcpy (pat, "' && * poi != '\0') { * patpoi = * poi; patpoi ++; poi ++; } * patpoi = '\0'; search_corresponding (pat, direction, "/"); } /** SCORR () looks for a corresponding bracket, HTML tag, or looks for the next/previous MIME separator or mail header */ void SCORR (pref_direction) FLAG pref_direction; { char * poi; unsigned int cv = charvalue (cur_text); char * errmsg; if (hop_flag > 0) { hop_flag = 0; search_wrong_enc (); return; } switch (cv) { case '(': search_corresponding ("[()]", FORWARD, ")"); return; case ')': search_corresponding ("[()]", REVERSE, "("); return; case '[': search_corresponding ("[\\[\\]]", FORWARD, "]"); return; case ']': search_corresponding ("[\\[\\]]", REVERSE, "["); return; case '{': search_corresponding ("[{}]", FORWARD, "}"); return; case '}': search_corresponding ("[{}]", REVERSE, "{"); return; case (character) '': /* « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */ if (utf8_text) { search_corresponding ("[«»]", FORWARD, "»"); } else { search_corresponding ("[]", FORWARD, ""); } return; case (character) '': /* » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */ if (utf8_text) { search_corresponding ("[«»]", REVERSE, "«"); } else { search_corresponding ("[]", REVERSE, ""); } return; case 0x2039: /* ‹ SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ search_corresponding ("[‹›]", FORWARD, "›"); return; case 0x203A: /* › SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ search_corresponding ("[‹›]", REVERSE, "‹"); return; case 0x2045: /* ⁅ LEFT SQUARE BRACKET WITH QUILL */ search_corresponding ("[⁅⁆]", FORWARD, "⁆"); return; case 0x2046: /* ⁆ RIGHT SQUARE BRACKET WITH QUILL */ search_corresponding ("[⁅⁆]", REVERSE, "⁅"); return; case 0x207D: /* ⁽ SUPERSCRIPT LEFT PARENTHESIS */ search_corresponding ("[⁽⁾]", FORWARD, "⁾"); return; case 0x207E: /* ⁾ SUPERSCRIPT RIGHT PARENTHESIS */ search_corresponding ("[⁽⁾]", REVERSE, "⁽"); return; case 0x208D: /* ₍ SUBSCRIPT LEFT PARENTHESIS */ search_corresponding ("[₍₎]", FORWARD, "₎"); return; case 0x208E: /* ₎ SUBSCRIPT RIGHT PARENTHESIS */ search_corresponding ("[₍₎]", REVERSE, "₍"); return; default: if (dim_HTML) { poi = cur_text; if (* poi == '>') { MLF (); } while (poi != cur_line->text && * poi != '<') { precede_char (& poi, cur_line->text); } if (* poi == '<') { search_tag (poi); return; } else { errmsg = "No bracket or tag to match"; } } else { if (* cur_text == '<') { search_corresponding ("[<>]", FORWARD, ">"); return; } else if (* cur_text == '>') { search_corresponding ("[<>]", REVERSE, "<"); return; } else { errmsg = "No bracket to match"; } } } if (pref_direction != REVERSE) { pref_direction = FORWARD; } if ((cv == '/' && * (cur_text + 1) == '*') || (cv == '*' && cur_text != cur_line->text && * (cur_text - 1) == '/') ) { search_for ("\\*/", FORWARD, False); } else if ((cv == '*' && * (cur_text + 1) == '/') || (cv == '/' && cur_text != cur_line->text && * (cur_text - 1) == '*') ) { search_for ("/\\*", REVERSE, False); } else if (* cur_line->text == '#') { /* #if #else #endif */ char * cp = cur_line->text; cp ++; while (white_space (* cp)) { cp ++; } /* #if/#elif/#else/#endif matching */ if (strisprefix ("if", cp)) { search_corresponding ("^#[ ]*[ie][fln]", FORWARD, "#1"); } else if (strisprefix ("end", cp)) { search_corresponding ("^#[ ]*[ie][fln]", REVERSE, "#3"); } else if (strisprefix ("el", cp)) { if (pref_direction == FORWARD) { search_corresponding ("^#[ ]*[ie][fln]", FORWARD, "#2"); } else { search_corresponding ("^#[ ]*[ie][fln]", REVERSE, "#2"); } } else { /* nothing to match */ error (errmsg); } } else if (strisprefix ("--", cur_line->text)) { /* search for next/previous MIME separator */ char pattern [maxLINE_LEN + 1]; int len = strlen (cur_line->text); BLINE (); pattern [0] = '^'; strcpy (& pattern [1], cur_line->text); if (pattern [len] == '\n') { pattern [len] = '\0'; len --; } if (streq ("--", & pattern [len - 1])) { pattern [len - 1] = '\0'; } search_for (pattern, pref_direction, False); } else { /* try to find mail header and search for next/previous mail */ LINE * line = cur_line; char * text; while (white_space (* (line->text)) && line->prev != header) { line = line->prev; } text = line->text; while (* text != ':' && * text != '\0' && ! white_space (* text)) { advance_char (& text); } if ((* text == ':' && text != line->text) || strisprefix ("From ", line->text)) { /* mail header found */ if (pref_direction == REVERSE) { /* go to beginning of current mail message */ MDN (); search_for ("^From ", REVERSE, True); } search_for ("^From ", pref_direction, True); } else { /* nothing to match */ error (errmsg); } } } /* get_idf extracts the identifier at the current position (text) into idf_buf. start points to the beginning of the current line. */ int get_idf (idf_buf, text, start) char * idf_buf; char * text; char * start; { char * idf_buf_poi = idf_buf; char * idf_poi; char * copy_poi; if (! idfchar (text)) { error ("No identifier"); return ERRORS; } else { idf_poi = text; while (idfchar (idf_poi) && idf_poi != start) { precede_char (& idf_poi, start); } if (! idfchar (idf_poi)) { advance_char (& idf_poi); } while (idfchar (idf_poi)) { copy_poi = idf_poi; advance_char (& idf_poi); while (copy_poi != idf_poi) { * idf_buf_poi ++ = * copy_poi ++; } } * idf_buf_poi = '\0'; return FINE; } } /* * SIDF () searches for the identifier at the current position */ void SIDF (method) FLAG method; { char idf_buf [MAX_CHARS]; /* identifier to search for */ int ret = get_idf (idf_buf, cur_text, cur_line->text); if (ret == ERRORS) { return; } search_expr (idf_buf, method, False); } /* * MPW () moves to the start of the previous word. A word is defined as a * number of non-blank characters separated by tabs spaces or linefeeds. */ static void move_previous_word (remove) FLAG remove; { register char * begin_line; char * textp; char start_char = * cur_text; char * start_pos = cur_text; FLAG idfsearch; if (remove == DELETE && viewonly) { viewonlyerr (); return; } /* First check if we're at the beginning of line. */ if (cur_text == cur_line->text) { if (cur_line->prev == header) { return; } start_char = '\0'; } MLF (); begin_line = cur_line->text; textp = cur_text; /* Check if we're in the middle of a word. */ if (! alpha (* textp) || ! alpha (start_char)) { while (textp != begin_line && (white_space (* textp) || * textp == '\n')) { precede_char (& textp, begin_line); } } /* Now we're at the end of previous word. Skip non-blanks until a blank comes */ if (wordnonblank) { while (textp != begin_line && alpha (* textp)) { precede_char (& textp, begin_line); } } else { if (idfchar (textp)) { idfsearch = True; while (textp != begin_line && idfchar (textp)) { precede_char (& textp, begin_line); } } else { idfsearch = False; while (textp != begin_line && alpha (* textp) && ! idfchar (textp)) { precede_char (& textp, begin_line); } } } /* Go to the next char if we're not at the beginning of the line */ /* At the beginning of the line, check whether to stay or to go to the word */ if (textp != begin_line && * textp != '\n') { advance_char (& textp); } else if (textp == begin_line && * textp != '\n' && (wordnonblank ? * textp == ' ' : (idfsearch ? ! idfchar (textp) : (! alpha (* textp) || idfchar (textp))))) { advance_char (& textp); if (white_space (* textp) || textp == start_pos) { /* no word there or not moved, so go back */ precede_char (& textp, begin_line); } } /* Find the x-coordinate of this address, and move to it */ move_address (textp, y); if (remove == DELETE) { (void) delete_text (cur_line, textp, cur_line, start_pos); } } void MPW () { if (hop_flag > 0) { BSEN (); } else { move_previous_word (NO_DELETE); } } /* * MNW () moves to the start of the next word. A word is defined as a number of * non-blank characters separated by tabs spaces or linefeeds. Always keep in * mind that the pointer shouldn't pass the '\n'. */ static void move_next_word (remove) FLAG remove; { char * textp = cur_text; if (remove == DELETE && viewonly) { viewonlyerr (); return; } /* Move to the end of the current word. */ if (wordnonblank) { if (* textp != '\n') { advance_char (& textp); } while (alpha (* textp)) { advance_char (& textp); } } else { if (idfchar (textp)) { while (* textp != '\n' && idfchar (textp)) { advance_char (& textp); } } else { while (alpha (* textp) && ! idfchar (textp)) { advance_char (& textp); } } } /* Skip all white spaces */ while (* textp != '\n' && white_space (* textp)) { textp ++; } /* If we're deleting, delete the text in between */ if (remove == DELETE) { delete_text_buf (cur_line, cur_text, cur_line, textp); return; } /* If we're at end of line, move to the beginning of (first word on) the next line */ if (* textp == '\n' && cur_line->next != tail) { MDN (); move_to (LINE_START, y); textp = cur_text; /* while (* textp != '\n' && white_space (* textp)) { textp ++; } */ } move_address (textp, y); } void MNW () { if (hop_flag > 0) { ESEN (); } else { move_next_word (NO_DELETE); } } /* * find_y () checks if the matched line is on the current page. If it is, it * returns the new y coordinate, else it displays the correct page with the * matched line in the middle (unless redrawflag is set to False) * and returns the new y value. */ static int find_y_RD (match_line, redrawflag) LINE * match_line; FLAG redrawflag; { register LINE * line; register int count = 0; /* Check if match_line is on the same page as currently displayed. */ for (line = top_line; line != match_line && line != bot_line->next; line = line->next) { count ++; } if (line != bot_line->next) { return count; } /* Display new page, with match_line in center. */ if ((line = proceed (match_line, - (SCREENMAX >> 1))) == header) { /* Can't display in the middle. Make first line of file top_line */ count = 0; for (line = header->next; line != match_line; line = line->next) { count ++; } line = header->next; } else { /* New page is displayed. Set cursor to middle of page */ count = SCREENMAX >> 1; } /* Reset pointers and redraw the screen */ reset (line, 0); if (redrawflag) { RD_y (count); redraw_pending = False; } else { redraw_pending = True; } return count; } int find_y (match_line) LINE * match_line; { return find_y_RD (match_line, True); } int find_y_w_o_RD (match_line) LINE * match_line; { return find_y_RD (match_line, False); } /*======================================================================*\ |* Modify commands: delete *| \*======================================================================*/ /* * DCC deletes the character under the cursor. If this character is a '\n' the * current line is joined with the next one. * If this character is the only character of the line, the current line will * be deleted. * DCC0 deletes without justification. */ static void delete_char (with_combinings) FLAG with_combinings; { if (* cur_text == '\n') { if (cur_line->next == tail) { if (cur_line->return_type != lineend_NONE) { set_modified (); cur_line->return_type = lineend_NONE; set_cursor_xy (); put_line (y, cur_line, x, True, False); status_msg ("Trailing line-end deleted"); } } else { (void) delete_text (cur_line, cur_text, cur_line->next, cur_line->next->text); } } else { char * after_char = cur_text; advance_char (& after_char); if (with_combinings && combining_mode && encoding_has_combining ()) { /* check subsequent characters whether they are actually combined (or joined); mind: doesn't work with poor man's bidi */ unsigned long unichar = unicodevalue (cur_text); /* skip this if already positioned within a combined char */ if (! iscombined_unichar (unichar, cur_text, cur_line->text)) { /* delete combining accents together with base char */ unichar = unicodevalue (after_char); while (iscombined_unichar (unichar, after_char, cur_line->text)) { advance_char (& after_char); unichar = unicodevalue (after_char); } } } (void) delete_text (cur_line, cur_text, cur_line, after_char); } } void DCC () { if (keyshift & ctrl_mask) { keyshift = 0; delete_char (False); } else { delete_char (True); } } void DCC0 () { DCC (); } /* DPC0 deletes the character on the left side of the cursor. If the cursor is at the beginning of the line, the line end is deleted, merging the two lines. With hop flag, delete left part of line from current point. */ void DPC0 () { char * delete_pos; if (x == 0 && cur_line->prev == header) { /* Top of file */ return; } if (viewonly) { viewonlyerr (); return; } if (hop_flag > 0) { hop_flag = 0; if (emulation == 'e') { /* emacs mode */ DPW (); } else if (cur_text != cur_line->text) { delete_pos = cur_text; BLINE (); (void) delete_text (cur_line, cur_line->text, cur_line, delete_pos); } } else { FLAG was_on_comb = iscombining (unicodevalue (cur_text)); if (keyshift & ctrl_mask) { ctrl_MLF (); } else { MLF (); } if (was_on_comb) { delete_char (False); } else { delete_char (True); } } } /* DPC normally deletes the character left just as DPC0 does. However, unless the hop flag is set, it first checks if there is anything but white space on the current line left of the current position. If there is only white space, it tries to perform a "backtab" function, reverting the indentation to the previous amount above in the text (unless if it's in the line immediately above the current line). */ void DPC () { if (keyshift & alt_mask) { DCC (); } else if (keyshift & ctrl_mask) { DPC0 (); } else if (hop_flag > 0) { DPC0 (); } else { char * cp = cur_line->text; int column = 0; while (* cp != '\0' && cp != cur_text && white_space (* cp)) { advance_char_scr (& cp, & column, cur_line->text); } if (cp == cur_text) { /* only white space left of current position */ int previous_col = column; LINE * lp = cur_line->prev; while (previous_col >= column && lp != header) { /* count white space on line lp */ cp = lp->text; previous_col = 0; while (* cp != '\0' && previous_col < column && white_space (* cp)) { advance_char_scr (& cp, & previous_col, lp->text); } if (* cp == '\n' || * cp == '\0') { /* don't count space lines */ previous_col = column; } lp = lp->prev; } /* if less indented previous line was found, and this was not on the line immediately preceeding the current line, perform the back TAB function */ if (previous_col < column && cur_line->prev != lp->next) { while (column > previous_col) { DPC0 (); column = 0; cp = cur_line->text; while (* cp != '\0' && cp != cur_text) { advance_char_scr (& cp, & column, cur_line->text); } } while (column < previous_col) { S (' '); column ++; } } else { DPC0 (); } } else { /* if in combined char, delete base char, not 1 more left */ keyshift |= ctrl_mask; DPC0 (); } } } /* * DLINE delete the whole current line. */ void DLINE () { if (viewonly) { viewonlyerr (); return; } if (hop_flag > 0) { hop_flag = 0; if (* cur_text != '\n') { delete_text_buf (cur_line, cur_text, cur_line, cur_text + length_of (cur_text) - 1); } } else { BLINE (); if (* cur_text != '\n') { DLN (); } DCC (); } } /* * DLN deletes all characters until the end of the line. If the current * character is a '\n', then delete that char. */ void DLN () { if (hop_flag > 0) { hop_flag = 0; DLINE (); } else if (* cur_text == '\n') { /* DCC (); */ if (cur_line->next != tail) { delete_text_buf (cur_line, cur_text, cur_line->next, cur_line->next->text); } } else { delete_text_buf (cur_line, cur_text, cur_line, cur_text + length_of (cur_text) - 1); } } /* * DNW () deletes the next word (as defined in MNW ()) */ void DNW () { if (* cur_text == '\n') { DCC (); } else { move_next_word (DELETE); } } /* * DPW () deletes the previous word (as defined in MPW ()) */ void DPW () { if (cur_text == cur_line->text) { DPC0 (); } else { move_previous_word (DELETE); } } /*======================================================================*\ |* Modify commands: insert *| \*======================================================================*/ static void enterNUL () { if (viewonly) { viewonlyerr (); return; } S ('\n'); /* Insert a new line */ MUP (); /* Move one line up */ move_to (LINE_END, y); /* Move to end of this line */ cur_line->return_type = lineend_NUL; put_line (y, cur_line, x, True, False); MRT (); /* move behind inserted NUL */ } /* * Functions to insert character at current location. * S0 inserts without justification. */ static unsigned long previous_unichar = 0; /* * S1byte: enter a byte of a character; collect bytes for multi-byte characters */ static void S1byte (newchar, JUSlvl, utf8_transform) register character newchar; int JUSlvl; FLAG utf8_transform; { static character buffer [7]; static character * utfpoi = buffer; static int utfcount = 1; static int cjkremaining = 0; static character firstbyte = '\0'; static unsigned long unichar; int offset; int width = 1; if (newchar == '\0') { if (firstbyte != '\0') { firstbyte = '\0'; ring_bell (); } else { enterNUL (); } return; } if (utf8_text) { if (utf8_transform) { /* UTF-8 input for UTF-8 text */ if (newchar < 0x80) { unichar = newchar; * utfpoi = newchar; utfpoi ++; * utfpoi = '\0'; utfpoi = buffer; } else if ((newchar & 0xC0) == 0x80) { /* UTF-8 sequence byte */ unichar = (unichar << 6) | (newchar & 0x3F); * utfpoi = newchar; utfpoi ++; utfcount --; if (utfcount == 0) { * utfpoi = '\0'; utfpoi = buffer; width = uniscrwidth (unichar, cur_text, cur_line->text); } else { return; } } else { /* first UTF-8 byte */ utfpoi = buffer; * utfpoi = newchar; if ((newchar & 0xE0) == 0xC0) { utfcount = 2; unichar = newchar & 0x1F; } else if ((newchar & 0xF0) == 0xE0) { utfcount = 3; unichar = newchar & 0x0F; } else if ((newchar & 0xF8) == 0xF0) { utfcount = 4; unichar = newchar & 0x07; } else if ((newchar & 0xFC) == 0xF8) { utfcount = 5; unichar = newchar & 0x03; } else if ((newchar & 0xFE) == 0xFC) { utfcount = 6; unichar = newchar & 0x01; } else /* ignore illegal UTF-8 code */ return; utfpoi ++; utfcount --; return; } } else { /* 8-bit input for UTF-8 text */ unichar = newchar; if (newchar < 0x80) { buffer [0] = newchar; buffer [1] = '\0'; } else { buffer [0] = (newchar >> 6) | 0xC0; buffer [1] = (newchar & 0x3F) | 0x80; buffer [2] = '\0'; } } } else if (cjk_text) { /* 8/16-bit (CJK) input for CJK text */ if (cjkremaining > 0) { * utfpoi ++ = newchar; * utfpoi = '\0'; cjkremaining --; if (cjkremaining > 0) { return; } } else if (firstbyte != '\0') { buffer [0] = firstbyte; buffer [1] = newchar; buffer [2] = '\0'; cjkremaining = CJK_len (buffer) - 2; if (cjkremaining > 0) { firstbyte = '\0'; utfpoi = & buffer [2]; return; } } else if (multichar (newchar)) { firstbyte = newchar; return; } else { buffer [0] = newchar; buffer [1] = '\0'; } if (* buffer == '\t') { width = 1; } else { width = col_count (buffer); } firstbyte = '\0'; } else if (utf8_transform) { /* UTF-8 input for 8-bit text */ buffer [1] = '\0'; if (newchar < 0x80) { buffer [0] = newchar; } else if ((newchar & 0xC0) == 0x80) { /* UTF-8 sequence byte; not handled here anymore */ unichar = (unichar << 6) | (newchar & 0x3F); utfcount --; if (utfcount == 0) { if ((unichar & 0xFF) == unichar) { buffer [0] = unichar & 0xFF; } else { buffer [0] = ''; } } else { return; } } else { /* first UTF-8 byte; not handled here anymore */ if ((newchar & 0xE0) == 0xC0) { utfcount = 2; unichar = newchar & 0x1F; } else if ((newchar & 0xF0) == 0xE0) { utfcount = 3; unichar = newchar & 0x0F; } else if ((newchar & 0xF8) == 0xF0) { utfcount = 4; unichar = newchar & 0x07; } else if ((newchar & 0xFC) == 0xF8) { utfcount = 5; unichar = newchar & 0x03; } else if ((newchar & 0xFE) == 0xFC) { utfcount = 6; unichar = newchar & 0x01; } else { /* ignore illegal UTF-8 code */ return; } utfcount --; return; } } else { /* 8-bit input for 8-bit text */ buffer [0] = newchar; buffer [1] = '\0'; } /* right-to-left support */ if (poormansbidi && utf8_text && is_right_to_left (previous_unichar)) { if (newchar == '\n') { ELINE (); } else if (iscombining (unichar) && * cur_text != '\n') { MRT (); } else if (unichar != ' ' && unichar != '\t' && ! is_right_to_left (unichar)) { unsigned long rc = charvalue (cur_text); while (rc == ' ' || rc == '\t' || is_right_to_left (rc) || iscombining (rc)) { MRT (); rc = charvalue (cur_text); } } } /* Insert the new character */ offset = cur_text - cur_line->text + length_of (buffer); if (insert_text (cur_line, cur_text, buffer) == ERRORS) { return; } /* Fix screen */ if (newchar == '\n') { set_cursor (0, y); if (y == SCREENMAX) { /* Can't use display () */ line_print (y, cur_line); (void) forward_scroll (True); move_to (0, y); } else { reset (top_line, y); /* Reset pointers */ if (can_add_line) { clean_menus (); add_line (y + 1); scrollbar_scroll_down (y + 1); clear_status (); display (y, cur_line, 1, y + 1); } else { display (y, cur_line, last_y - y, y + 1); } move_to (0, y + 1); } } else if (x + width == XBREAK) { /* If line must be shifted, just call move_to */ move_to (x + width, y); /* adjust in case of combining character position */ if (cur_line->text + offset != cur_text) { move_address (cur_line->text + offset, y); } } else { /* else display rest of line */ /* also redraw previous char if it was incomplete ... just always redraw it! if (width != 0) { put_line (y, cur_line, x, False, False); move_to (x + width, y); } else */ { /* take care of combining char added to left char */ int newx = x + width; move_to (newx, y); if (iswide (unicodevalue (buffer))) { move_to (x - 3, y); } else { move_to (x - 1, y); } put_line (y, cur_line, x, False, False); /* adjust to new position */ move_address (cur_line->text + offset, y); } } /* right-to-left support */ if (poormansbidi && utf8_text) { if (unichar == ' ' || unichar == '\t') { if (is_right_to_left (previous_unichar)) { move_to (x - 1, y); } } else if (unichar != '\n') { if (iscombining (unichar)) { if (is_right_to_left (previous_unichar)) { move_to (x - 1, y); } } else { if (is_right_to_left (unichar)) { move_to (x - 1, y); } previous_unichar = unichar; } } } if (JUSlvl > 0) { JUSandreturn (); } } void S0 (newchar) register character newchar; { S1byte (newchar, 0, utf8_input); } static void Sbyte (newchar) register character newchar; { S1byte (newchar, JUSlevel, False); } /* * Insert newline with auto indentation. */ static void SNLindent () { char * coptext; S ('\n'); coptext = cur_line->prev->text; while (* coptext == ' ' || * coptext == '\t') { S (* coptext); coptext ++; } } /* * Insert new line at current location. */ void SNL () { if (keyshift & alt_mask) { keyshift = 0; Popmark (); return; } if (utf8_lineends == False && (keyshift & ctrl_mask)) { keyshift = 0; /* new line within same paragraph */ if (JUSmode == 0) { if (* (cur_text -1) != ' ') { S (' '); } } } if (autoindent == False || last_delta_readchar < 10 || average_delta_readchar < 10) { S ('\n'); } else { SNLindent (); } } /** Insert character by function */ void STAB () { S ('\t'); } void SSPACE () { S (' '); } /** Underline preceding header */ void Underline () { FLAG at_end = * cur_text == '\n'; int cols; hop_flag = 0; if (cur_text == cur_line->text) { MLF (); } ELINE (); cols = col_count (cur_line->text); SNLindent (); cols -= col_count (cur_line->text); while (cols > 0) { S0 ('-'); cols --; } if (! at_end) { MRT (); if (white_space (* cur_text)) { MNW (); } } } static void Spair (l, r) character l; character r; { S1byte (l, JUSlevel, utf8_input); SNLindent (); S1byte (r, JUSlevel, utf8_input); MUP (); ELINE (); } static character lastchar = '\0'; void reset_smart_replacement () { lastchar = '\0'; } static void Sutf8 (newchar) unsigned long newchar; { if (newchar < 0x80) { S1byte ((character) newchar, JUSlevel, True); } else if (newchar < 0x800) { S1byte ((character) (0xC0 | (newchar >> 6)), JUSlevel, True); S1byte ((character) (0x80 | (newchar & 0x3F)), JUSlevel, True); } else if (newchar < 0x10000) { S1byte ((character) (0xE0 | (newchar >> 12)), JUSlevel, True); S1byte ((character) (0x80 | ((newchar >> 6) & 0x3F)), JUSlevel, True); S1byte ((character) (0x80 | (newchar & 0x3F)), JUSlevel, True); } else if (newchar < 0x200000) { S1byte ((character) (0xF0 | (newchar >> 18)), JUSlevel, True); S1byte ((character) (0x80 | ((newchar >> 12) & 0x3F)), JUSlevel, True); S1byte ((character) (0x80 | ((newchar >> 6) & 0x3F)), JUSlevel, True); S1byte ((character) (0x80 | (newchar & 0x3F)), JUSlevel, True); } else if (newchar < 0x4000000) { S1byte ((character) (0xF8 | (newchar >> 24)), JUSlevel, True); S1byte ((character) (0x80 | ((newchar >> 18) & 0x3F)), JUSlevel, True); S1byte ((character) (0x80 | ((newchar >> 12) & 0x3F)), JUSlevel, True); S1byte ((character) (0x80 | ((newchar >> 6) & 0x3F)), JUSlevel, True); S1byte ((character) (0x80 | (newchar & 0x3F)), JUSlevel, True); } else if (newchar < 0x80000000) { S1byte ((character) (0xFC | (newchar >> 30)), JUSlevel, True); S1byte ((character) (0x80 | ((newchar >> 24) & 0x3F)), JUSlevel, True); S1byte ((character) (0x80 | ((newchar >> 18) & 0x3F)), JUSlevel, True); S1byte ((character) (0x80 | ((newchar >> 12) & 0x3F)), JUSlevel, True); S1byte ((character) (0x80 | ((newchar >> 6) & 0x3F)), JUSlevel, True); S1byte ((character) (0x80 | (newchar & 0x3F)), JUSlevel, True); } else { error ("Invalid Unicode value"); } } static void Scjk (code) unsigned long code; { character cjkbytes [5]; character * cp; if (no_char (code)) { ring_bell (); error ("Invalid character"); } else { (void) cjkencode (code, cjkbytes); if (* cjkbytes != '\0') { cp = cjkbytes; while (* cp != '\0') { S1byte (* cp ++, JUSlevel, False); } } else { ring_bell (); error ("Invalid CJK character code"); } } } static void S (newchar) register character newchar; { if (hop_flag > 0) { lastchar = newchar; if (newchar == '\n') { S1byte (newchar, JUSlevel, utf8_input); return; } hop_flag = 0; flags_changed = True; if (newchar == '(') { Spair ('(', ')'); return; } else if (newchar == '[') { Spair ('[', ']'); return; } else if (newchar == '{') { Spair ('{', '}'); return; } else if (newchar == '<') { Spair ('<', '>'); return; } else if (newchar == '/') { if (* cur_text != '\n') { SNLindent (); MUP (); } S1byte ('/', JUSlevel, utf8_input); S1byte ('*', JUSlevel, utf8_input); S1byte ('*', JUSlevel, utf8_input); SNLindent (); S1byte (' ', JUSlevel, utf8_input); SNLindent (); S1byte ('*', JUSlevel, utf8_input); S1byte ('/', JUSlevel, utf8_input); MUP (); S1byte (' ', JUSlevel, utf8_input); S1byte (' ', JUSlevel, utf8_input); return; } } else if (expand_tabs && newchar == '\t') { do { S1byte (' ', JUSlevel, utf8_input); } while (x % tabsize != 0); return; } else if (newchar == ' ' && (keyshift & ctrlshift_mask) == ctrlshift_mask) { (void) insert_unichar (0xA0); /*   NO-BREAK SPACE */ return; } else if (utf8_text && quote_type != 0 && newchar == '-' && lastchar == '-' && cur_text != cur_line->text && * (cur_text - 1) == '-') { /* handle smart dashes */ lastchar = ' '; DPC (); if (cur_text != cur_line->text && * (cur_text - 1) == ' ') { Sutf8 (0x2013); /* – EN DASH */ S1byte (' ', JUSlevel, utf8_input); } else { Sutf8 (0x2014); /* — EM DASH */ } return; } else if (utf8_text && quote_type != 0 && newchar == '-' && lastchar == '<' && cur_text != cur_line->text && * (cur_text - 1) == '<') { /* handle smart arrows */ lastchar = ' '; DPC (); Sutf8 (0x2190); /* ← LEFTWARDS ARROW */ return; } else if (utf8_text && quote_type != 0 && newchar == '>' && lastchar == '-' && cur_text != cur_line->text && * (cur_text - 1) == '-') { /* handle smart arrows */ lastchar = ' '; DPC (); Sutf8 (0x2192); /* → RIGHTWARDS ARROW */ return; } else if (utf8_text && quote_type != 0 && newchar == '>' && lastchar == '<' && cur_text != cur_line->text && * (cur_text - 1) == '<') { /* handle smart arrows */ lastchar = ' '; DPC (); Sutf8 (0x2194); /* ↔ LEFT RIGHT ARROW */ return; } else if (quote_type != 0 && newchar == '-' && (streq (script (charvalue (cur_text)), "Hebrew") || streq (script (precedingchar (cur_text, cur_line->text)), "Hebrew") ) ) { /* handle Maqaf */ lastchar = ' '; (void) insert_unichar (0x05BE); /* ־ MAQAF */ return; } lastchar = newchar; S1byte (newchar, JUSlevel, utf8_input); } void Scharacter (code) unsigned long code; { if (no_char (code)) { ring_bell (); error ("Invalid character"); } else if (code < 0x80) { S (code); /* go through HOP handling */ } else if (utf8_text) { Sutf8 (code); } else if (cjk_text) { Scjk (code); } else if (code < 0x100) { Sbyte (code & 0xFF); } else { ring_bell (); error ("Invalid character"); } } /* * insert_character inserts character literally */ static FLAG insert_character (code) unsigned long code; { if (code == CHAR_UNKNOWN) { ring_bell (); error ("Unknown character mnemonic"); return False; } else if (code == CHAR_INVALID) { ring_bell (); error ("Invalid character"); return False; } else if (utf8_text) { Sutf8 (code); return True; } else if (cjk_text) { Scjk (code); return True; } else if (code < 0x100) { Sbyte (code); return True; } else { ring_bell (); error ("Invalid character"); return False; } } FLAG insert_unichar (uc) unsigned long uc; { return insert_character (charcode (uc)); } /* * LIB inserts a line at the current position and moves back to the end of * the previous line. It keeps the line end type of the current line. */ void LIB () { lineend_type return_type; if (viewonly) { viewonlyerr (); return; } if (hop_flag > 0) { return_type = lineend_NONE; hop_flag = 0; } else if (cur_line->return_type == lineend_PS) { return_type = lineend_LS; } else { return_type = cur_line->return_type; } S ('\n'); /* Insert the line */ MUP (); /* Move one line up */ move_to (LINE_END, y); /* Move to end of this line */ if (cur_line->return_type != return_type) { cur_line->return_type = return_type; put_line (y, cur_line, x, True, False); } } /*======================================================================*\ |* Smart quotes *| \*======================================================================*/ typedef enum {LEFTDOUBLE, RIGHTDOUBLE, LEFTSINGLE, RIGHTSINGLE} quoteposition_type; static unsigned long quote_mark_value (pos) quoteposition_type pos; { unsigned long unichar; int utflen; char * q = quote_mark (quote_type, pos); utf8_info (q, & utflen, & unichar); return unichar; } int quote_type = 0; static FLAG quote_open [2] = {False, False}; static void reset_quote_state () { quote_open [False] = False; quote_open [True] = False; } void quote_type_up () { if (quote_type < count_quote_types () - 1) { quote_type ++; if (* (quote_mark (quote_type, 0)) == '\0') { quote_type ++; } } else { quote_type = 0; } reset_quote_state (); } void quote_type_down () { if (quote_type > 0) { quote_type --; if (* (quote_mark (quote_type, 0)) == '\0') { quote_type --; } } else { quote_type = count_quote_types () - 1; } reset_quote_state (); } void set_quote_type (qt) int qt; { if (qt >= 0 && qt < count_quote_types ()) { quote_type = qt; } reset_quote_state (); } void set_quote_style (q) char * q; { set_quote_type (lookup_quotes (q)); } static void Squote (doublequote) FLAG doublequote; { unsigned long prevchar; FLAG insert_left; unsigned long leftquote; unsigned long rightquote; if (doublequote) { leftquote = quote_mark_value (LEFTDOUBLE); rightquote = quote_mark_value (RIGHTDOUBLE); } else { leftquote = quote_mark_value (LEFTSINGLE); rightquote = quote_mark_value (RIGHTSINGLE); } if (! utf8_text) { leftquote = encodedchar (leftquote); rightquote = encodedchar (rightquote); if (no_char (leftquote) || no_char (rightquote)) { error ("Quote marks style not available in current encoding"); return; } } if (smart_quotes) { prevchar = unicode (precedingchar (cur_text, cur_line->text)); if (quote_open [doublequote]) { insert_left = False; quote_open [doublequote] = False; } else if (prevchar == 0x0A) { insert_left = True; quote_open [doublequote] = UNSURE; } else if (iswide_unichar (prevchar)) { if (quote_open [doublequote] == UNSURE || quote_open [doublequote] == OPEN) { insert_left = False; quote_open [doublequote] = False; } else { insert_left = True; quote_open [doublequote] = True; } } else { insert_left = prevchar == (unsigned char) '(' || prevchar == (unsigned char) '[' || prevchar == (unsigned char) '{' || prevchar == (unsigned char) ' ' || prevchar == (unsigned char) '\t' || prevchar == (unsigned char) '\n' || prevchar == quote_mark_value (LEFTDOUBLE) || prevchar == quote_mark_value (LEFTSINGLE) ; if (insert_left) { quote_open [doublequote] = OPEN; } else { quote_open [doublequote] = False; } } if (insert_left) { Scharacter (leftquote); } else { Scharacter (rightquote); } } else { if (doublequote) { S ('"'); } else { S ('\''); } } } void Sdoublequote () { if (keyshift & altshift_mask) { S ('"'); } else { Squote (True); } } void Ssinglequote () { if (keyshift & altshift_mask) { S ('\''); } else if (hop_flag > 0 && utf8_text) { /* enter apostrophe */ Sutf8 (0x2019); } else { Squote (False); } } void Sdash () { if (hop_flag > 0) { Underline (); } else { S ('-'); } } /*======================================================================*\ |* Character code handling *| \*======================================================================*/ /* * Replace current character with its hex/octal/decimal representation. */ static character hexdig (c) character c; { if (c < 10) { return c + '0'; } else { return c - 10 + 'A'; } } static void insertcode (c, radix) character c; int radix; { int radix2; if (radix == 8) { S (hexdig ((c >> 6) & 007)); S (hexdig ((c >> 3) & 007)); S (hexdig ((c) & 007)); } else if (radix == 16) { S (hexdig ((c >> 4) & 017)); S (hexdig ((c) & 017)); } else { /* assume radix = 10 or, at least, three digits suffice */ radix2 = radix * radix; S (hexdig (c / radix2)); S (hexdig ((c % radix2) / radix)); S (hexdig (c % radix)); } } static void insertunicode (unichar) unsigned long unichar; { if (no_unichar (unichar)) { error ("No Unicode value"); } else { if (unichar > 0xFFFF) { if (unichar > 0x10FFFF) { insertcode (unichar >> 24, 16); } insertcode (unichar >> 16, 16); } insertcode (unichar >> 8, 16); insertcode (unichar, 16); } } static void insertvalue (v, radix) unsigned long v; int radix; { char buffer [12]; char * bufpoi = & buffer [11]; if (radix == 16) { insertunicode (v); } else { * bufpoi = '\0'; while (v > 0) { bufpoi --; * bufpoi = hexdig (v % radix); v = v / radix; } while (* bufpoi != '\0') { S (* bufpoi ++); } } } static void changetocode (radix, univalue) int radix; FLAG univalue; { character c = * cur_text; int utfcount = 1; unsigned long unichar; #ifdef endlessloop int utflen; #endif char buffer [7]; char * textpoi; char * utfpoi; if (c == '\n') { switch (cur_line->return_type) { case lineend_NONE: unichar = CHAR_INVALID; break; case lineend_NUL: unichar = 0x00; break; case lineend_LF: unichar = 0x0A; break; case lineend_CRLF: unichar = 0x0D0A; break; case lineend_CR: unichar = 0x0D; break; case lineend_LS: unichar = 0x2028; break; case lineend_PS: unichar = 0x2029; break; default: unichar = CHAR_INVALID; break; } if (unichar < 0x100) { insertcode (unichar, 16); } else if (! no_unichar (unichar)) { insertunicode (unichar); } } else if (utf8_text) { if ((c & 0x80) == 0x00) { utfcount = 1; unichar = c; } else if ((c & 0xE0) == 0xC0) { utfcount = 2; unichar = c & 0x1F; } else if ((c & 0xF0) == 0xE0) { utfcount = 3; unichar = c & 0x0F; } else if ((c & 0xF8) == 0xF0) { utfcount = 4; unichar = c & 0x07; } else if ((c & 0xFC) == 0xF8) { utfcount = 5; unichar = c & 0x03; } else if ((c & 0xFE) == 0xFC) { utfcount = 6; unichar = c & 0x01; } else /* illegal UTF-8 code */ { if (! univalue) { /* DCC (); */ insertcode (c, radix); } error ("Invalid UTF-8 sequence"); return; } utfcount --; utfpoi = buffer; * utfpoi ++ = c; textpoi = cur_text; textpoi ++; while (utfcount > 0 && (* textpoi & 0xC0) == 0x80) { c = * textpoi ++; * utfpoi ++ = c; unichar = (unichar << 6) | (c & 0x3F); utfcount --; } * utfpoi = '\0'; /* delete_char (False); */ if (univalue) { insertvalue (unichar, radix); } else { utfpoi = buffer; while (* utfpoi != '\0') { insertcode (* utfpoi ++, radix); } } #ifdef endlessloop utf8_info (cur_text, & utflen, & unichar); if (iscombining_unichar (unichar)) { changetocode (radix, univalue); } #endif if (utfcount > 0) { error ("Invalid UTF-8 sequence"); } } else if (cjk_text) { if (univalue) { insertvalue (lookup_encodedchar (charvalue (cur_text)), radix); } else { (void) cjkencode (charvalue (cur_text), buffer); /* DCC (); */ textpoi = buffer; while (* textpoi != '\0') { insertcode (* textpoi ++, radix); } } } else if (mapped_text && univalue) { /* DCC (); */ insertvalue (lookup_encodedchar (c), radix); } else { /* DCC (); */ insertcode (c, radix); } } static void changefromcode (format, univalue) char * format; FLAG univalue; { long scancode; unsigned long code; if (sscanf (cur_text, format, & scancode) > 0) { if (scancode == -1) { ring_bell (); error ("Character code too long to scan"); return; } code = scancode; if (univalue && (cjk_text || mapped_text)) { code = encodedchar (code); if (no_char (code)) { ring_bell (); error ("Invalid character"); return; } } if (utf8_text && ! univalue) { unsigned char buffer [9]; int i = 9; int utfcount; buffer [-- i] = '\0'; while (code) { buffer [-- i] = code & 0xFF; code = code >> 8; } utf8_info (& buffer [i], & utfcount, & code); if (utfcount != sizeof (buffer) - 1 - i || utfcount != UTF8_len (buffer [i]) || (buffer [i] & 0xC0) == 0x80) { ring_bell (); error ("Illegal UTF-8 sequence"); return; } } (void) insert_character (code); } else { ring_bell (); error ("No character code at text position"); hop_flag = 0; MRT (); } } void changehex () { if (hop_flag > 0) { hop_flag = 0; if (utf8_text || cjk_text) { changefromcode ("%lx", False); } else { changefromcode ("%2lx", False); } } else { changetocode (16, False); } } void changeuni () { if (hop_flag > 0) { hop_flag = 0; changefromcode ("%lx", True); } else { changetocode (16, True); } } void changeoct () { if (hop_flag > 0) { hop_flag = 0; changefromcode ("%lo", True); } else { changetocode (8, True); } } void changedec () { if (hop_flag > 0) { hop_flag = 0; changefromcode ("%lu", True); } else { changetocode (10, True); } } /*======================================================================*\ |* Character information display *| \*======================================================================*/ static char title [MAX_CHARS]; /* buffer for description menu title */ /** Han info popup menu structure; must be static because it may be refreshed later. */ static menuitemtype descr_menu [27]; /** display_Han shows information about Han characters (pronunciations and definition from Unihan database) */ void display_Han (cpoi, force_utf8) char * cpoi; FLAG force_utf8; { int utfcount; unsigned long hanchar; unsigned long unichar; struct hanentry * entry; char * flag_Mandarin = ""; char * value_Mandarin = ""; char * flag_Cantonese = ""; char * value_Cantonese = ""; char * flag_Japanese = ""; char * value_Japanese = ""; char * flag_Sino_Japanese = ""; char * value_Sino_Japanese = ""; char * flag_Hangul = ""; char * value_Hangul = ""; char * flag_Korean = ""; char * value_Korean = ""; char * flag_Vietnamese = ""; char * value_Vietnamese = ""; char * flag_HanyuPinlu = ""; char * value_HanyuPinlu = ""; char * flag_Tang = ""; char * value_Tang = ""; char * flag_description = ""; char * value_description = ""; int i; char s [27] [MAX_CHARS]; /* buffer for description menu */ int descr_line; int descr_col; char * descrpoi; char * descrline; char * lastcomma; char * lastsemicolon; char * lastblank; char * lastcharacter; char * cut; char * bufpoi; int maxwidth = XMAX - 2; int col; if (force_utf8 || utf8_text) { utf8_info (cpoi, & utfcount, & unichar); if (cjk_text) { hanchar = encodedchar (unichar); } } else if (cjk_text) { hanchar = charvalue (cpoi); unichar = lookup_encodedchar (hanchar); } else { return; } entry = lookup_handescr (unichar); if (! entry) { /* ignore missing entry if valid Unicode character is not assigned to Han character range */ if (! no_char (unichar) && ! streq (script (unichar), "Han")) { return; } } if (disp_Han_full && ! force_utf8) { i = 0; if (cjk_text && ! no_unichar (unichar)) { build_string (title, "%04lX U+%04lX", hanchar, unichar); } else if (cjk_text) { if (valid_cjk (hanchar, NIL_PTR)) { build_string (title, "%04lX U? unknown", hanchar); } else { build_string (title, "Invalid %04lX", hanchar); } } else { build_string (title, "U+%04lX", unichar); } if (entry && disp_Han_Mandarin && * entry->Mandarin) { strcpy (s [i], "Mandarin: "); strcat (s [i], entry->Mandarin); fill_menuitem (& descr_menu [i], s [i]); i ++; } if (entry && disp_Han_Cantonese && * entry->Cantonese) { strcpy (s [i], "Cantonese: "); strcat (s [i], entry->Cantonese); fill_menuitem (& descr_menu [i], s [i]); i ++; } if (entry && disp_Han_Japanese && * entry->Japanese) { strcpy (s [i], "Japanese: "); strcat (s [i], entry->Japanese); fill_menuitem (& descr_menu [i], s [i]); i ++; } if (entry && disp_Han_Sino_Japanese && * entry->Sino_Japanese) { strcpy (s [i], "Sino-Japanese: "); strcat (s [i], entry->Sino_Japanese); fill_menuitem (& descr_menu [i], s [i]); i ++; } if (entry && disp_Han_Hangul && * entry->Hangul) { strcpy (s [i], "Hangul: "); strcat (s [i], entry->Hangul); fill_menuitem (& descr_menu [i], s [i]); i ++; } if (entry && disp_Han_Korean && * entry->Korean) { strcpy (s [i], "Korean: "); strcat (s [i], entry->Korean); fill_menuitem (& descr_menu [i], s [i]); i ++; } if (entry && disp_Han_Vietnamese && * entry->Vietnamese) { strcpy (s [i], "Vietnamese: "); strcat (s [i], entry->Vietnamese); fill_menuitem (& descr_menu [i], s [i]); i ++; } if (entry && disp_Han_HanyuPinlu && * entry->HanyuPinlu) { strcpy (s [i], "HanyuPinlu: "); strcat (s [i], entry->HanyuPinlu); fill_menuitem (& descr_menu [i], s [i]); i ++; } if (entry && disp_Han_Tang && * entry->Tang) { strcpy (s [i], "Tang: "); strcat (s [i], entry->Tang); fill_menuitem (& descr_menu [i], s [i]); i ++; } /* append separator */ fill_menuitem (& descr_menu [i], NIL_PTR); i ++; if (entry && disp_Han_description && * entry->Definition) { #define wrap_description #ifdef wrap_description descrpoi = entry->Definition; while (* descrpoi != '\0') { descrline = descrpoi; col = 0; lastcomma = NIL_PTR; lastsemicolon = NIL_PTR; lastblank = NIL_PTR; /* find max string length to fit in line */ while (col < maxwidth && * descrpoi != '\0') { if (* descrpoi == ' ') { lastblank = descrpoi; } else if (* descrpoi == ',') { lastcomma = descrpoi; } else if (* descrpoi == ';') { lastsemicolon = descrpoi; } else { lastcharacter = descrpoi; } advance_char_scr (& descrpoi, & col, descrline); } /* determine cut at last separator */ if (* descrpoi == '\0') { cut = descrpoi; } else if (lastsemicolon != NIL_PTR) { cut = lastsemicolon + 1; } else if (lastcomma != NIL_PTR) { cut = lastcomma + 1; } else if (lastblank != NIL_PTR) { cut = lastblank; } else { cut = lastcharacter; } /* add line to menu, adjust poi to cut */ descrpoi = descrline; bufpoi = s [i]; while (descrpoi != cut) { * bufpoi ++ = * descrpoi ++; } * bufpoi = '\0'; fill_menuitem (& descr_menu [i], s [i]); i ++; /* skip white space */ while (* descrpoi == ' ') { descrpoi ++; } } #else strcpy (s [i], entry->Definition); fill_menuitem (& descr_menu [i], s [i]); i ++; #endif } /* determine menu position; adjust if too far down */ descr_col = x; descr_line = y + 1; if (descr_line + i + 2 > YMAX) { descr_line = y - i - 2; if (descr_line < 0) { descr_line = 0; descr_col ++; } } (void) popup_menu (descr_menu, i, descr_col, descr_line, title, False, True, NIL_PTR); } else { if (entry && disp_Han_Mandarin && * entry->Mandarin) { flag_Mandarin = " M: "; value_Mandarin = entry->Mandarin; } if (entry && disp_Han_Cantonese && * entry->Cantonese) { flag_Cantonese = " C: "; value_Cantonese = entry->Cantonese; } if (entry && disp_Han_Japanese && * entry->Japanese) { flag_Japanese = " J: "; value_Japanese = entry->Japanese; } if (entry && disp_Han_Sino_Japanese && * entry->Sino_Japanese) { flag_Sino_Japanese = " S: "; value_Sino_Japanese = entry->Sino_Japanese; } if (entry && disp_Han_Hangul && * entry->Hangul) { flag_Hangul = " H: "; value_Hangul = entry->Hangul; } if (entry && disp_Han_Korean && * entry->Korean) { flag_Korean = " K: "; value_Korean = entry->Korean; } if (entry && disp_Han_Vietnamese && * entry->Vietnamese) { flag_Vietnamese = " V: "; value_Vietnamese = entry->Vietnamese; } if (entry && disp_Han_HanyuPinlu && * entry->HanyuPinlu) { flag_HanyuPinlu = " P: "; value_HanyuPinlu = entry->HanyuPinlu; } if (entry && disp_Han_Tang && * entry->Tang) { flag_Tang = " T: "; value_Tang = entry->Tang; } if (entry && disp_Han_description && * entry->Definition) { flag_description = " D: "; value_description = entry->Definition; } /* MIND! When adding new pronunciation tags (Unihan update), be sure to add %s%s tags each in all three formats! */ if (cjk_text && ! no_unichar (unichar)) { if (no_char (hanchar)) { build_string (text_buffer, "Unmapped Han (U+%04lX)%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", unichar, flag_Mandarin, value_Mandarin, flag_Cantonese, value_Cantonese, flag_Japanese, value_Japanese, flag_Sino_Japanese, value_Sino_Japanese, flag_Hangul, value_Hangul, flag_Korean, value_Korean, flag_Vietnamese, value_Vietnamese, flag_HanyuPinlu, value_HanyuPinlu, flag_Tang, value_Tang, flag_description, value_description ); } else { build_string (text_buffer, "%04lX (U+%04lX)%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", hanchar, unichar, flag_Mandarin, value_Mandarin, flag_Cantonese, value_Cantonese, flag_Japanese, value_Japanese, flag_Sino_Japanese, value_Sino_Japanese, flag_Hangul, value_Hangul, flag_Korean, value_Korean, flag_Vietnamese, value_Vietnamese, flag_HanyuPinlu, value_HanyuPinlu, flag_Tang, value_Tang, flag_description, value_description ); } } else if (cjk_text) { if (valid_cjk (hanchar, NIL_PTR)) { build_string (text_buffer, "%04lX (U? unknown)", hanchar); } else { build_string (text_buffer, "Invalid %04lX", hanchar); } } else { build_string (text_buffer, "U+%04lX%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", unichar, flag_Mandarin, value_Mandarin, flag_Cantonese, value_Cantonese, flag_Japanese, value_Japanese, flag_Sino_Japanese, value_Sino_Japanese, flag_Hangul, value_Hangul, flag_Korean, value_Korean, flag_Vietnamese, value_Vietnamese, flag_HanyuPinlu, value_HanyuPinlu, flag_Tang, value_Tang, flag_description, value_description ); } status_uni (text_buffer); } } /* * display_code shows UTF-8 code sequence and Unicode value on the status line */ char hexbuf [20]; char * hexbufpoi = hexbuf; static void appendcode (c) character c; { * hexbufpoi ++ = hexdig ((c >> 4) & 0x0F); * hexbufpoi ++ = hexdig ((c) & 0x0F); } void display_the_code () { character c = * cur_text; int utfcount = 1; unsigned long unichar; int utfcount2; unsigned long unichar2; char * textpoi; FLAG invalid = False; struct scriptentry * script_info; char * lengthmsg; char * scriptmsg; char * category; char * scriptsep; char * widemsg; char * combinedmsg; int utf_utf_len; int uni_utf_len; int len; unsigned long cjkchar; int charlen; character cjkbytes [5]; hexbufpoi = hexbuf; if (c == '\n') { switch (cur_line->return_type) { case lineend_NONE: textpoi = "No lineend (split line)"; break; case lineend_NUL: appendcode ('\0'); textpoi = "Nul character"; break; case lineend_LF: appendcode ('\n'); textpoi = "Line end"; break; case lineend_CRLF: appendcode ('\r'); appendcode ('\n'); textpoi = "DOS line end"; break; case lineend_CR: appendcode ('\r'); textpoi = "Mac line end"; break; case lineend_LS: appendcode ('\342'); appendcode ('\200'); appendcode ('\250'); textpoi = "Line separator U+2028"; break; case lineend_PS: appendcode ('\342'); appendcode ('\200'); appendcode ('\251'); textpoi = "Paragraph separator U+2029"; break; default: appendcode ('\n'); textpoi = "Unknown line end"; break; } * hexbufpoi = '\0'; build_string (text_buffer, "%s: %s", textpoi, hexbuf); status_msg (text_buffer); } else if (utf8_text) { if ((c & 0x80) == 0x00) { utfcount = 1; unichar = c; } else if ((c & 0xE0) == 0xC0) { utfcount = 2; unichar = c & 0x1F; } else if ((c & 0xF0) == 0xE0) { utfcount = 3; unichar = c & 0x0F; } else if ((c & 0xF8) == 0xF0) { utfcount = 4; unichar = c & 0x07; } else if ((c & 0xFC) == 0xF8) { utfcount = 5; unichar = c & 0x03; } else if ((c & 0xFE) == 0xFC) { utfcount = 6; unichar = c & 0x01; } else /* illegal UTF-8 code */ { appendcode (c); invalid = True; } utf_utf_len = utfcount; textpoi = cur_text; if (invalid == False) { utfcount --; appendcode (c); textpoi ++; while (utfcount > 0 && (* textpoi & 0xC0) == 0x80) { c = * textpoi ++; appendcode (c); unichar = (unichar << 6) | (c & 0x3F); utfcount --; } if (utfcount > 0) { invalid = True; } } else { textpoi ++; } if (unichar < 0x80) { uni_utf_len = 1; } else if (unichar < 0x800) { uni_utf_len = 2; } else if (unichar < 0x10000) { uni_utf_len = 3; } else if (unichar < 0x200000) { uni_utf_len = 4; } else if (unichar < 0x4000000) { uni_utf_len = 5; } else if (unichar < 0x80000000) { uni_utf_len = 6; } else { uni_utf_len = 0; } if (utf_utf_len == uni_utf_len) { lengthmsg = ""; } else { lengthmsg = " (too long)"; } utf8_info (textpoi, & utfcount2, & unichar2); if (iscombining (unichar2)) { combinedmsg = " - combined ..."; } else if (isjoined (unichar2, textpoi, cur_line->text)) { combinedmsg = " - joined ..."; } else { combinedmsg = ""; } * hexbufpoi = '\0'; if (invalid == False) { script_info = scriptinfo (unichar); if (script_info) { scriptmsg = script_info->scriptname; category = script_info->categoryname; scriptsep = " "; } else { scriptmsg = "Not Assigned "; category = ""; scriptsep = ""; } if (iswide_unichar (unichar)) { if (iscombining_unichar (unichar)) { if (isspacingcombining_unichar (unichar)) { widemsg = "wide spacing combining "; } else { widemsg = "wide combining "; } } else { widemsg = "wide "; } } else if (iscombining_unichar (unichar)) { if (isspacingcombining_unichar (unichar)) { widemsg = "spacing combining "; } else { widemsg = "combining "; } } else if (iscombined (unichar, cur_text, cur_line->text)) { if (iscombining (unichar)) { /* catch cases missed above; to be fixed ... */ widemsg = "combining "; } else { widemsg = "joining "; } } else if ((unichar & 0x7FFFFC00) == 0xD800) { widemsg = "single high surrogate "; } else if ((unichar & 0x7FFFFC00) == 0xDC00) { widemsg = "single low surrogate "; } else if ((unichar & 0x7FFFFFFE) == 0xFFFE) { widemsg = "reserved "; } else { widemsg = ""; } if (unichar > 0x10FFFF) { build_string (text_buffer, "Non-Unicode UTF-8: %s%s, %s%s%s%s%sU+%08lX%s", hexbuf, lengthmsg, widemsg, scriptmsg, scriptsep, category, scriptsep, unichar, combinedmsg); } else if (unichar > 0xFFFF) { build_string (text_buffer, "UTF-8: %s%s, %s%s%s%s%sU+%06lX%s", hexbuf, lengthmsg, widemsg, scriptmsg, scriptsep, category, scriptsep, unichar, combinedmsg); } else { build_string (text_buffer, "UTF-8: %s%s, %s%s%s%s%sU+%04lX%s", hexbuf, lengthmsg, widemsg, scriptmsg, scriptsep, category, scriptsep, unichar, combinedmsg); } } else { build_string (text_buffer, "Invalid UTF-8 sequence: %s%s", hexbuf, combinedmsg); } status_msg (text_buffer); } else if (cjk_text) { len = CJK_len (cur_text); cjkchar = charvalue (cur_text); charlen = cjkencode (cjkchar, cjkbytes); textpoi = cur_text; while (len > 0 && * textpoi != '\0' && * textpoi != '\n') { appendcode (* textpoi); textpoi ++; len --; charlen --; } * hexbufpoi = '\0'; combinedmsg = ""; if (text_encoding_tag == 'G' || text_encoding_tag == 'J' || text_encoding_tag == 'S') { if (iscombining (lookup_encodedchar (charvalue (textpoi)))) { combinedmsg = " - combined ..."; } } if (len != 0 || charlen != 0) { build_string (text_buffer, "Incomplete CJK character code: %s%s", hexbuf, combinedmsg); } else { unichar = lookup_encodedchar (cjkchar); if (c > 0x7F || unichar != (character) c) { if (valid_cjk (cjkchar, NIL_PTR)) { if (no_unichar (unichar)) { build_string (text_buffer, "CJK character code: %s (U? unknown)%s", hexbuf, combinedmsg); } else { char * format; unichar2 = 0; if (unichar >= 0x80000000) { unichar2 = (unichar >> 16) & 0x7FFF; unichar = unichar & 0xFFFF; format = "CJK %scharacter: %s (%s%s%s%sU+%04lX with U+%04lX)%s"; } else if (unichar > 0x10FFFF) { format = "CJK %scharacter: %s (%s%s%s%sU+%08lX)%s"; } else if (unichar > 0xFFFF) { format = "CJK %scharacter: %s (%s%s%s%sU+%06lX)%s"; } else { format = "CJK %scharacter: %s (%s%s%s%sU+%04lX)%s"; } script_info = scriptinfo (unichar); if (script_info) { scriptmsg = script_info->scriptname; category = script_info->categoryname; scriptsep = " "; } else { scriptmsg = "Not Assigned "; category = ""; scriptsep = ""; } /* determine combining status */ widemsg = ""; if (text_encoding_tag == 'G' || text_encoding_tag == 'J' || text_encoding_tag == 'S') { if (iscombining_unichar (unichar)) { if (isspacingcombining_unichar (unichar)) { widemsg = "spacing combining "; } else { widemsg = "combining "; } } } if (unichar2 == 0) { build_string (text_buffer, format, widemsg, hexbuf, scriptmsg, scriptsep, category, scriptsep, unichar, combinedmsg); } else { build_string (text_buffer, format, widemsg, hexbuf, scriptmsg, scriptsep, category, scriptsep, unichar, unichar2, combinedmsg); } } } else { build_string (text_buffer, "Invalid CJK character code: %s%s", hexbuf, combinedmsg); } } else { build_string (text_buffer, "Character code: %s%s", hexbuf, combinedmsg); } } status_msg (text_buffer); } else { appendcode (c); * hexbufpoi = '\0'; if (mapped_text) { combinedmsg = ""; if (iscombining (lookup_encodedchar ((character) * (cur_text + 1)))) { combinedmsg = " - combined ..."; } unichar = lookup_encodedchar (c); if (no_unichar (unichar)) { build_string (text_buffer, "Character code: %s (U? unknown)%s", hexbuf, combinedmsg); } else { widemsg = "C"; if (iscombining_unichar (unichar)) { if (isspacingcombining_unichar (unichar)) { widemsg = "Spacing combining c"; } else { widemsg = "Combining c"; } } script_info = scriptinfo (unichar); if (script_info) { scriptmsg = script_info->scriptname; category = script_info->categoryname; scriptsep = " "; } else { scriptmsg = "Not Assigned "; category = ""; scriptsep = ""; } build_string (text_buffer, "%sharacter code: %s (%s%s%s%sU+%04lX)%s", widemsg, hexbuf, scriptmsg, scriptsep, category, scriptsep, unichar, combinedmsg); } } else { build_string (text_buffer, "Character code: %s", hexbuf); } status_msg (text_buffer); } } void display_code () { if (hop_flag > 0) { hop_flag = 0; if (always_disp_code) { always_disp_code = False; } else { always_disp_code = True; } } else { display_the_code (); } } /*======================================================================*\ |* Character composition *| \*======================================================================*/ /** do_insert_accented combines and inserts accented character, handles multiple accent keys */ static void do_insert_accented (accentnames, ps, ps2, ps3) char * accentnames; struct prefixspec * ps; struct prefixspec * ps2; struct prefixspec * ps3; { unsigned long base; struct prefixspec * newps = 0; if (* accentnames == '\0') { return; } build_string (text_buffer, "Compose %s with:", accentnames); status_uni (text_buffer); base = readcharacter_unicode_mapped (); if (command (base) == DPC) { clear_status (); keyshift |= ctrl_mask; DPC0 (); return; } if (command (base) == CTRLINS) { unsigned long ctrl; status_uni ("Enter compose char /  mnemonic ..."); ctrl = readcharacter_unicode (); newps = lookup_prefix_char (ctrl); if (newps) { /* continue below */ } else if (ctrl == FUNcmd) { keyshift |= ctrl_mask; newps = lookup_prefix (keyproc, keyshift); if (newps) { /* continue below */ } else { error ("Mnemonic input or accent prefix expected"); return; } } else if (ctrl == ' ') { char mnemonic [maxLINE_LEN]; build_string (text_buffer, "Compose %s with character mnemonic:", accentnames); if (get_string_uni (text_buffer, mnemonic, False, " ") == ERRORS) { return; } base = compose_mnemonic (mnemonic); /* final compose and insert below */ } else if (ctrl > ' ' && ctrl != '#' && ctrl != 0x7F) { static character buf [7]; unsigned long ch; (void) utfencode (ctrl, buf); build_string (text_buffer, "Compose %s with %s..", accentnames, buf); status_uni (text_buffer); ch = readcharacter_unicode (); if (ch == '\033' || ch == FUNcmd) { clear_status (); return; } base = compose (ctrl, ch); /* final compose and insert below */ } else { error ("Mnemonic input expected"); return; } } else if (base == FUNcmd) { newps = lookup_prefix (keyproc, keyshift); if (newps) { /* continue below */ } else { clear_status (); return; } } else if (base == '\033') { clear_status (); return; } if (newps) { /* handle multiple accent prefixes */ if (ps3) { error ("Max. 3 accent prefix keys anticipated"); } else { char newaccentnames [maxLINE_LEN]; if (ps2) { strcpy (newaccentnames, accentnames); } else { strcpy (newaccentnames, ps->accentsymbol); } strcat (newaccentnames, " and "); strcat (newaccentnames, newps->accentsymbol); if (ps2) { do_insert_accented (newaccentnames, ps, ps2, newps); } else { do_insert_accented (newaccentnames, ps, newps, 0); } } } else { clear_status (); (void) insert_character (compose_patterns (base, ps, ps2, ps3)); } } /** insert_accented combines and inserts accented character */ static void insert_accented (ps) struct prefixspec * ps; { do_insert_accented (ps->accentname, ps, 0, 0); } /* * CTRLINS inserts a control-char or encoded or mnemonic character */ void CTRLINS () { unsigned long ctrl; unsigned long ch; struct prefixspec * ps; status_uni ("Enter control char / # hex/octal/decimal / compose char /  mnemonic ..."); ctrl = readcharacter_unicode (); ps = lookup_prefix_char (ctrl); if (ps) { insert_accented (ps); return; } if (ctrl == FUNcmd) { struct prefixspec * ps; keyshift |= ctrl_mask; ps = lookup_prefix (keyproc, keyshift); if (ps) { insert_accented (ps); } else { clear_status (); (* keyproc) ('\0'); } } else if (ctrl < ' ' || ctrl == 0x7F) { clear_status (); if (ctrl != '\177') { ctrl = ctrl & '\237'; } (void) insert_character (ctrl); } else if (ctrl == '#') { int finich; do { if (utf8_text) { finich = prompt_num_char (& ch, 0x7FFFFFFF); if (finich != ERRORS) { Sutf8 (ch); } } else if (cjk_text || mapped_text) { finich = prompt_num_char (& ch, max_char_value ()); if (finich != ERRORS) { if ((cjk_text && valid_cjk (ch, NIL_PTR)) || ch < 0x100) { (void) insert_character (ch); } else { ring_bell (); error ("Invalid character value"); } } } else { finich = prompt_num_char (& ch, 0xFF); if (finich != ERRORS) { if (ch < 0x100) { Sbyte (ch); } else { ring_bell (); error ("Invalid character value"); } } } } while (finich == ' '); } else if (ctrl == ' ') { char mnemonic [maxLINE_LEN]; if (get_string_uni ("Enter character mnemonic:", mnemonic, False, " ") == ERRORS) { return; } ch = compose_mnemonic (mnemonic); clear_status (); (void) insert_character (ch); } else { static character buf [7]; (void) utfencode (ctrl, buf); build_string (text_buffer, "Enter second composing character: %s..", buf); status_uni (text_buffer); ch = readcharacter_unicode (); if (ch == '\033' || ch == FUNcmd) { clear_status (); return; } ch = compose (ctrl, ch); clear_status (); (void) insert_character (ch); } } /* Insert next char with defined accent composition pattern (invoked by function key). */ static void insert_prefix (prefunc) voidfunc prefunc; { struct prefixspec * ps = lookup_prefix (prefunc, keyshift); if (ps) { insert_accented (ps); } else { status_msg ("Accent prefix with this shift state not assigned"); } } void COMPOSE () { insert_prefix (COMPOSE); } void F5 () { insert_prefix (F5); } void F6 () { insert_prefix (F6); } void key_0 () { insert_prefix (key_0); } void key_1 () { insert_prefix (key_1); } void key_2 () { insert_prefix (key_2); } void key_3 () { insert_prefix (key_3); } void key_4 () { insert_prefix (key_4); } void key_5 () { insert_prefix (key_5); } void key_6 () { insert_prefix (key_6); } void key_7 () { insert_prefix (key_7); } void key_8 () { insert_prefix (key_8); } void key_9 () { insert_prefix (key_9); } /*======================================================================*\ |* Character transformation *| \*======================================================================*/ /* Search for wrong encoded character. Searches for UTF-8 character in Latin-1 text or vice versa. */ void search_wrong_enc () { LINE * prev_line; char * prev_text; LINE * lpoi; char * cpoi; int utfcount; unsigned long unichar; FLAG isutf; if (cjk_text || mapped_text) { error ("Not supported in this encoding"); return; } if (hop_flag > 0) { int text_offset = cur_text - cur_line->text; (void) CONV (); move_address (cur_line->text + text_offset, y); } prev_line = cur_line; prev_text = cur_text; lpoi = cur_line; cpoi = cur_text; while (True) { /* advance character pointer */ if (* cpoi == '\n') { lpoi = lpoi->next; if (lpoi == tail) { lpoi = header->next; status_msg ("Search wrapped around end of file"); } cpoi = lpoi->text; } else { advance_char (& cpoi); } /* check if wrong encoded character */ if ((* cpoi & 0x80) != 0) { if ((* cpoi & 0xC0) == 0xC0) { utf8_info (cpoi, & utfcount, & unichar); isutf = UTF8_len (* cpoi) == utfcount; } else { isutf = False; } if (isutf != utf8_text) { break; } } /* check search wrap-around */ if (lpoi == prev_line && cpoi == prev_text) { status_msg ("No more wrong encoding found"); return; } } move_address (cpoi, find_y (lpoi)); } /* Replace character mnemonic with character. Prefer national characters according to parameter: g (German): ae -> ä/, oe -> ö/, ue -> ü/ d (Danish) ae -> æ/, oe -> ø/ f (French) oe -> œ/oe ligature */ void UML (lang) char lang; { unsigned long uc; unsigned long c1; unsigned long c2; char * cpoi = cur_text; unsigned long unichar = 0; c1 = unicodevalue (cpoi); if (c1 == '&') { char mnemo [MAX_CHARS]; char * mp = mnemo + 1; char * text = cur_text + 1; mnemo [0] = '&'; while ((* text >= 'a' && * text <= 'z') || (* text >= 'A' && * text <= 'Z')) { * mp ++ = * text ++; } * mp = '\0'; if (* text == ';') { text ++; } uc = compose_mnemonic (mnemo); if (insert_character (uc)) { (void) delete_text (cur_line, cur_text, cur_line, text); } return; } if (* cpoi != '\n') { advance_char (& cpoi); } c2 = unicodevalue (cpoi); /* first, try mnemonic / accented conversion */ uc = compose (c1, c2); /* result is already converted to encoding */ /* override with language-specific conversion preferences */ if (lang == 'd') { if (c1 == 'a' && c2 == 'e') { unichar = (character) ''; /* æ */ } else if (c1 == 'A' && (c2 & ~0x20) == 'E') { unichar = (character) ''; /* Æ */ } else if (c1 == 'o' && c2 == 'e') { unichar = (character) ''; /* ø */ } else if (c1 == 'O' && (c2 & ~0x20) == 'E') { unichar = (character) ''; /* Ø */ } } else if (lang == 'f') { if (c1 == 'o' && c2 == 'e') { unichar = 0x0153; /* œ */ } else if (c1 == 'O' && (c2 & ~0x20) == 'E') { unichar = 0x0152; /* Œ */ } } else if (lang == 'g') { if (c1 == 'a' && c2 == 'e') { unichar = (character) ''; /* ä */ } else if (c1 == 'o' && c2 == 'e') { unichar = (character) ''; /* ö */ } else if (c1 == 'u' && c2 == 'e') { unichar = (character) ''; /* ü */ } else if (c1 == 'A' && (c2 & ~0x20) == 'E') { unichar = (character) ''; /* Ä */ } else if (c1 == 'O' && (c2 & ~0x20) == 'E') { unichar = (character) ''; /* Ö */ } else if (c1 == 'U' && (c2 & ~0x20) == 'E') { unichar = (character) ''; /* Ü */ } else if (c1 == 's' && c2 == 's') { unichar = (character) ''; /* ß */ } } if (unichar != 0) { /* was explicitly set above */ uc = charcode (unichar); } if (uc == CHAR_INVALID) { ring_bell (); error ("Invalid character"); } else if (! no_char (uc)) { /* apply mnemonic conversion */ if (utf8_text) { DCC (); DCC (); Sutf8 (uc); } else if (cjk_text && ! mapped_text) { DCC (); DCC (); Scjk (uc); } else { if (uc < 0x100) { DCC (); DCC (); Sbyte (uc); } else { ring_bell (); error ("Invalid character"); } } } else if (! CONV ()) { ring_bell (); error ("Unknown character mnemonic"); } } /* Convert between Latin-1 and UTF-8 encodings. */ FLAG CONV () { character c1 = * cur_text; int utfcount; unsigned long unichar = 0; if (utf8_text && ((character) * cur_text) >= 0x80) { /* Latin-1 -> UTF-8 conversion */ if (c1 >= 0xC0) { utf8_info (cur_text, & utfcount, & unichar); if (UTF8_len (c1) == utfcount) { ring_bell (); error ("Already a UTF-8 character"); return True; } } if (delete_text (cur_line, cur_text, cur_line, cur_text + 1) == FINE) { Sutf8 (c1); } return True; } else if (! utf8_text && ! cjk_text && ! mapped_text && (c1 & 0xC0) == 0xC0) { /* UTF-8 -> Latin-1 conversion */ utf8_info (cur_text, & utfcount, & unichar); if (UTF8_len (c1) != utfcount) { ring_bell (); error ("Not a UTF-8 character"); return True; } if (mapped_text) { ring_bell (); error ("Cannot map Unicode character"); return True; } if (unichar < 0x100) { if (delete_text (cur_line, cur_text, cur_line, cur_text + utfcount) == FINE) { (void) insert_character (unichar); } } else { ring_bell (); error ("Cannot encode Unicode character"); } return True; } else { return False; } } /*======================================================================*\ |* Case conversion *| \*======================================================================*/ /** Delete base character only of combined character, leave combining accents. Called by case conversion functions. */ static void delete_basechar () { char * after_char = cur_text; int text_offset = cur_text - cur_line->text; advance_char (& after_char); (void) delete_text (cur_line, cur_text, cur_line, after_char); /* enforce proper placement of cursor on combining characters */ move_address (cur_line->text + text_offset, y); } static struct { unsigned long base; short toupper, tolower; unsigned long title; } caseconv_table [] = { #include "casetabl.t" }; #define caseconv_table_size (sizeof (caseconv_table) / sizeof (* caseconv_table)) typedef struct {unsigned short u1, u2, u3;} uniseq; #define U_cond_Final_Sigma 0x01 #define U_cond_After_I 0x02 #define U_cond_After_Soft_Dotted 0x04 #define U_cond_More_Above 0x08 #define U_cond_Not_Before_Dot 0x10 #define U_cond_tr 0x20 #define U_cond_lt 0x40 #define U_cond_az 0x80 #define U_conds_lang (U_cond_tr | U_cond_lt | U_cond_az) static struct { unsigned long base; uniseq lower, title, upper; short condition; } caseconv_special [] = { #include "casespec.t" }; #define caseconv_special_size (sizeof (caseconv_special) / sizeof (* caseconv_special)) struct interval { unsigned long first; unsigned long last; }; /* struct interval list_Soft_Dotted [] */ #include "softdot.t" static struct { unsigned long first; unsigned long last; char category; short combining_class; } combining_classes [] = { #include "combin.t" }; /** Look up combining class; return: if not found, -1 if category is "Spacing Combining" (Mc): -1 - combining class else combining class */ static int combining_class (ucs) unsigned long ucs; { int min = 0; int mid; int max = arrlen (combining_classes) - 1; if (ucs < combining_classes [0].first) { return -1; } while (max >= min) { mid = (min + max) / 2; if (ucs > combining_classes [mid].last) { min = mid + 1; } else if (ucs < combining_classes [mid].first) { max = mid - 1; } else { if (combining_classes [mid].category == 'c') { return -2 - combining_classes [mid].combining_class; } else { return combining_classes [mid].combining_class; } } } return -1; } FLAG iscombining_unichar (ucs) unsigned long ucs; { #ifdef spacingcombining_isnt_combining return combining_class (ucs) >= 0; #else return combining_class (ucs) != -1; #endif } FLAG isspacingcombining_unichar (ucs) unsigned long ucs; { return combining_class (ucs) <= -2; } FLAG iscombined_unichar (unichar, charpos, linebegin) unsigned long unichar; char * charpos; char * linebegin; { return isjoined (unichar, charpos, linebegin) || iscombining_unichar (unichar); } static int lookup (ucs, table, len) unsigned long ucs; struct interval * table; int len; { int min = 0; int mid; int max = len - 1; if (ucs < table [0].first || ucs > table [max].last) { return 0; } while (max >= min) { mid = (min + max) / 2; if (ucs > table [mid].last) { min = mid + 1; } else if (ucs < table [mid].first) { max = mid - 1; } else { return 1; } } return 0; } static int soft_dotted (ucs) unsigned long ucs; { return lookup (ucs, list_Soft_Dotted, arrlen (list_Soft_Dotted)); } static int lookup_caseconv (basechar) unsigned long basechar; { int low = 0; int high = caseconv_table_size - 1; int i; while (low <= high) { i = (low + high) / 2; if (caseconv_table [i].base == basechar) { return i; } else if (caseconv_table [i].base >= basechar) { high = i - 1; } else { low = i + 1; } } /* notify "not found" */ return -1; } /** case_convert converts a Unicode character to +2: title case +1: upper case -1: lower case */ unsigned long case_convert (unichar, dir) unsigned long unichar; int dir; { int tabix = lookup_caseconv (unichar); if (tabix >= 0) { if (dir == 2 && caseconv_table [tabix].title != 0) { return caseconv_table [tabix].title; } else if (dir > 0 && caseconv_table [tabix].toupper != 0) { return unichar + caseconv_table [tabix].toupper; } else if (dir < 0 && caseconv_table [tabix].tolower != 0) { return unichar + caseconv_table [tabix].tolower; } } return unichar; } static int lookup_caseconv_special (basechar, langcond) unsigned long basechar; short langcond; { int i; #ifdef caseconvsearch_uncond int low = 0; int high = caseconv_special_size - 1; /* plain binary search is not applicable as keys are ambiguous */ while (low <= high) { i = (low + high) / 2; if (caseconv_special [i].base == basechar) { return i; } else if (caseconv_special [i].base >= basechar) { high = i - 1; } else { low = i + 1; } } #else for (i = 0; i < caseconv_special_size; i ++) { if (caseconv_special [i].base == basechar) { short langcondi = caseconv_special [i].condition & U_conds_lang; if (langcondi == 0 || (langcondi & langcond)) { return i; } } } #endif /* notify "not found" */ return -1; } static int iscombining_notabove (unichar) unsigned long unichar; { int cc = combining_class (unichar); return cc > 0 && cc != 230; } static int iscombining_above (unichar) unsigned long unichar; { return combining_class (unichar) == 230; } static void check_After (unichar) unsigned long unichar; { if ((Turkish && unichar == 'I') /* tr / az */ || (Lithuanian && soft_dotted (unichar)) /* lt */ ) { /* Handle U+0307 COMBINING DOT ABOVE After_Soft_Dotted / After_I while handling the I or the soft dotted letter */ char * comb_char = cur_text; unsigned long unichar2; int utfcount; utf8_info (comb_char, & utfcount, & unichar2); while (iscombining_unichar (unichar2) && unichar2 != 0x0307) { advance_char (& comb_char); utf8_info (comb_char, & utfcount, & unichar2); } if (unichar2 == 0x0307) { char * after_char = comb_char; advance_char (& after_char); (void) delete_text (cur_line, comb_char, cur_line, after_char); } } } /** Convert lower and upper case letters dir == 0: toggle dir == 2: convert to title case dir == 1: convert to upper case dir == -1: convert to lower case */ static void lowcap (dir) int dir; { unsigned long unichar; int prev_x; int tabix; unsigned long convchar; short condition = 0; char * after_char; unsigned long unichar2; if (* cur_text == '\n') { MRT (); return; } if (Turkish) { condition |= U_cond_tr | U_cond_az; } if (Lithuanian) { condition |= U_cond_lt; } do { tabix = 0; unichar = unicodevalue (cur_text); if (dir >= 0 && unichar >= 0x3041 && unichar <= 0x3096) { /* Hiragana -> Katakana */ convchar = charcode (unichar + 0x60); if (no_char (convchar)) { ring_bell (); error ("Unencoded Katakana character"); break; } else { prev_x = x; delete_basechar (); (void) insert_character (convchar); if (x == prev_x) { /* may occur with combining chars */ move_to (prev_x + 1, y); } } } else if (dir <= 0 && unichar >= 0x30A1 && unichar <= 0x30F6) { /* Katakana -> Hiragana */ convchar = charcode (unichar - 0x60); if (no_char (convchar)) { ring_bell (); error ("Unencoded Hiragana character"); break; } else { prev_x = x; delete_basechar (); (void) insert_character (convchar); if (x == prev_x) { /* may occur with combining chars */ move_to (prev_x + 1, y); } } } else if ((tabix = lookup_caseconv_special (unichar, condition)) >= 0) { condition = caseconv_special [tabix].condition &= ~ U_conds_lang; if (condition & U_cond_Final_Sigma) { /** Final_Cased: Within the closest word boundaries containing C, there is a cased letter before C, and there is no cased letter after C. Before C [{cased=true}] [{word-Boundary≠true}]* After C !([{wordBoundary≠true}]* [{cased}])) */ after_char = cur_text; advance_char (& after_char); unichar2 = unicodevalue (after_char); while (iscombining_unichar (unichar2)) { advance_char (& after_char); unichar2 = unicodevalue (after_char); } if (unichar2 < (unsigned long) 'A' || (unichar2 > (unsigned long) 'Z' && unichar2 < (unsigned long) 'a') || (unichar2 > (unsigned long) 'z' && unichar2 < (unsigned long) (character) '') ) { /* final position detected */ condition &= ~ U_cond_Final_Sigma; } } if (condition & U_cond_Not_Before_Dot) { /* tr / az */ /** Before_Dot: C is followed by U+0307 COMBINING DOT ABOVE. Any sequence of characters with a combining class that is neither 0 nor 230 may intervene between the current character and the combining dot above. After C ([{cc≠230} & {cc≠0}])* [\u0307] */ after_char = cur_text; advance_char (& after_char); unichar2 = unicodevalue (after_char); while (iscombining_notabove (unichar2) && unichar2 != 0x0307) { advance_char (& after_char); unichar2 = unicodevalue (after_char); } if (unichar2 != 0x0307) { condition &= ~ U_cond_Not_Before_Dot; } } if (condition & U_cond_After_I) { /* tr / az */ /** After_I: The last preceding base character was an uppercase I, and there is no intervening combining character class 230 (ABOVE). Before C [I] ([{cc≠230} & {cc≠0}])* */ /* This case only works in separated display mode; for combined mode see explicit handling below. */ after_char = cur_text; precede_char (& after_char, cur_line->text); unichar2 = unicodevalue (after_char); while (iscombining_notabove (unichar2) && after_char != cur_line->text) { precede_char (& after_char, cur_line->text); unichar2 = unicodevalue (after_char); } if (unichar2 == 'I') { condition &= ~ U_cond_After_I; } } if (condition & U_cond_After_Soft_Dotted) { /* lt */ /** After_Soft_Dotted: The last preceding character with a combining class of zero before C was Soft_Dotted, and there is no intervening combining character class 230 (ABOVE). Before C [{Soft_Dotted=true}] ([{cc≠230} & {cc≠0}])* */ /* This case only works in separated display mode; for combined mode see explicit handling below. */ after_char = cur_text; precede_char (& after_char, cur_line->text); unichar2 = unicodevalue (after_char); while (iscombining_notabove (unichar2) && after_char != cur_line->text) { precede_char (& after_char, cur_line->text); unichar2 = unicodevalue (after_char); } if (soft_dotted (unichar2)) { condition &= ~ U_cond_After_Soft_Dotted; } } if (condition & U_cond_More_Above) { /* lt */ /** More_Above: C is followed by one or more characters of combining class 230 (ABOVE) in the combining character sequence. After C [{cc≠0}]* [{cc=230}] */ after_char = cur_text; advance_char (& after_char); unichar2 = unicodevalue (after_char); while (iscombining_notabove (unichar2)) { advance_char (& after_char); unichar2 = unicodevalue (after_char); } if (iscombining_above (unichar2)) { condition &= ~ U_cond_More_Above; } } if (condition == 0) { /* no condition or condition resolved */ FLAG do_convert = False; unsigned long convchar2; unsigned long convchar3; if (caseconv_special [tabix].base == caseconv_special [tabix].lower.u1) { /* is lower, toggle or convert to upper */ if (dir == 2) { convchar = caseconv_special [tabix].title.u1; convchar2 = caseconv_special [tabix].title.u2; convchar3 = caseconv_special [tabix].title.u3; do_convert = True; } else if (dir >= 0) { convchar = caseconv_special [tabix].upper.u1; convchar2 = caseconv_special [tabix].upper.u2; convchar3 = caseconv_special [tabix].upper.u3; do_convert = True; } } else { /* is upper, toggle or convert to lower */ if (dir <= 0) { convchar = caseconv_special [tabix].lower.u1; convchar2 = caseconv_special [tabix].lower.u2; convchar3 = caseconv_special [tabix].lower.u3; do_convert = True; } } if (do_convert) { FLAG inserted_something = False; if (convchar != 0) { convchar = charcode (convchar); if (convchar2 != 0) { convchar2 = charcode (convchar2); if (convchar3 != 0) { convchar3 = charcode (convchar3); } } } if (no_char (convchar) || no_char (convchar2) || no_char (convchar3)) { ring_bell (); error ("Unencoded case converted character(s)"); break; } else { prev_x = x; delete_basechar (); if (convchar != 0) { inserted_something = True; (void) insert_character (convchar); if (convchar2 != 0) { (void) insert_character (convchar2); if (convchar3 != 0) { (void) insert_character (convchar3); } } } if (inserted_something) { check_After (unichar); if (x == prev_x) { /* may occur with combining chars */ move_to (prev_x + 1, y); } } } } else { move_to (x + 1, y); } } else { /* notify to try further */ tabix = -1; } } if (tabix == -1 && (tabix = lookup_caseconv (unichar)) >= 0) { convchar = unichar; if (dir == 2 && caseconv_table [tabix].title != 0) { convchar = caseconv_table [tabix].title; } else if (caseconv_table [tabix].toupper != 0) { if (dir >= 0) { convchar = unichar + caseconv_table [tabix].toupper; } } else { if (dir <= 0) { convchar = unichar + caseconv_table [tabix].tolower; } } convchar = charcode (convchar); if (no_char (convchar)) { ring_bell (); error ("Unencoded case converted character"); break; } else { prev_x = x; delete_basechar (); (void) insert_character (convchar); if (Turkish || Lithuanian) { char * comb_char; move_to (prev_x, y); comb_char = cur_text; advance_char (& comb_char); move_address (comb_char, y); } check_After (unichar); if (x == prev_x) { /* may occur with combining chars */ move_to (prev_x + 1, y); } } } else if (tabix == -1) { move_to (x + 1, y); } } while (hop_flag > 0 && idfchar (cur_text)); } /** Toggle lower and upper case letters */ void LOWCAP () { lowcap (0); } /** Convert to lower case letters */ void LOWER () { lowcap (-1); } /** Convert to upper case letters */ void UPPER () { lowcap (1); } /** Convert single character to upper case letter, then skip word (emacs) */ void CAPWORD () { hop_flag = 0; lowcap (1); MLF (); MNW (); } /** Toggle low/cap/all cap (Windows) */ void LOWCAPWORD () { char * cp = cur_line->text; char * first_alpha = NIL_PTR; FLAG found = False; FLAG first_cap = False; FLAG first_title = False; FLAG subseq_cap = False; int letters = 0; #ifdef hop_title_case int upper_type = hop_flag > 0 ? 2 : 1; /* HOP -> title case */ #endif while (* cp != '\0' && * cp != '\n') { unsigned long uc = unicodevalue (cp); if (cp == cur_text) { found = True; } if (idfchar (cp)) { /* idfchar includes categories "Letter" and "Mark" and thus all combining characters */ /* check: any subsequent letter capital → make all letters small first letter capital → make all letters capital (all letters small) → make first letter capital consider the following Unicode categories as upper: Letter, uppercase Letter, titlecase (based on caseconv_table [...].tolower != 0) and these as lower or insignificant: Letter, other Letter, lowercase Letter, modifier all others */ FLAG iscapital = False; int tabix = lookup_caseconv (uc); if (tabix >= 0) { iscapital = caseconv_table [tabix].tolower != 0; } if (first_alpha == NIL_PTR) { first_alpha = cp; if (iscapital) { first_cap = True; } first_title = caseconv_table [tabix].title == uc; } else { if (iscapital) { subseq_cap = True; } } letters ++; } else if (found) { /* word has been scanned */ break; } else { /* word has not yet been passed; reset info */ first_alpha = NIL_PTR; first_cap = False; subseq_cap = False; letters = 0; } advance_char (& cp); } if (found && first_alpha != NIL_PTR) { int offset = cur_line->shift_count * SHIFT_SIZE + x; unsigned long uc; char * sn; move_address (first_alpha, y); uc = unicodevalue (cur_text); sn = script (uc); if (streq (sn, "Hiragana")) { hop_flag = 1; lowcap (1); } else if (streq (sn, "Katakana")) { hop_flag = 1; lowcap (-1); } else if (subseq_cap || (letters == 1 && first_cap && ! first_title)) { /* if (letters > 1 && first_cap) { hop_flag = 0; lowcap (2); } */ hop_flag = 1; lowcap (-1); } else if (first_cap) { hop_flag = 1; lowcap (1); } else { hop_flag = 0; lowcap (2); } move_to (offset - cur_line->shift_count * SHIFT_SIZE, y); } } /*======================================================================*\ |* Character/Code conversion *| \*======================================================================*/ static int ishex (c) char c; { return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); } static unsigned int hexval (c) char c; { if (c >= '0' && c <= '9') { return c - '0'; } else if (c >= 'A' && c <= 'F') { return c - 'A' + 10; } else if (c >= 'a' && c <= 'f') { return c - 'a' + 10; } else { return 0; } } /** AltX toggles text left of cursor between character and Unicode value * sequence of 2 <= n <= 6 hex digits, value <= 10FFFF -> Unicode character * no-digit character -> hex Unicode value, represented with >= 4 digits */ void AltX () { char * cp = cur_text; char * pp = cp; char * pp1; int n = 0; unsigned long c = 0; if (cur_text == cur_line->text) { return; } precede_char (& cp, cur_line->text); pp1 = cp; while (n < 6 && cp != pp && ishex (* cp)) { n ++; pp = cp; precede_char (& cp, cur_line->text); } if (pp != cur_text && n >= 2) { /* hex value found */ char * hp = pp; while (hp != cur_text) { c = ((c << 4) + hexval (* hp)); hp ++; } if (c > (unsigned long) 0x10FFFF) { n = 1; } } if (n >= 2) { /* hex -> uni */ (void) delete_text (cur_line, pp, cur_line, cur_text); /* sequence of 2 <= n <= 6 hex digits, value <= 10FFFF -> Unicode character */ if (insert_unichar (c)) { } } else { /* uni -> hex */ unsigned long cv = unicodevalue (pp1); move_address (pp1, y); delete_char (False); /* no-digit character -> hex Unicode value, represented with >= 4 digits */ if (! no_char (cv)) { insertunicode (cv); } } } /*======================================================================*\ |* End *| \*======================================================================*/