# suikyo-composer.rb: Preedition composer using Suikyo. # This library is for input methods such as PRIME. # $Id: suikyo-composer.rb,v 1.11 2005/03/07 09:41:14 komatsu Exp $ # # Copyright (C) 2004 Hiroyuki Komatsu # All rights reserved. # This is free software with ABSOLUTELY NO WARRANTY. # # You can redistribute it and/or modify it under the terms of # the GNU General Public License version 2. require 'suikyo/suikyo' class SuikyoCharChunk attr_accessor :chunk_prev, :chunk_next, :pending, :original, :conversion def SuikyoCharChunk::new_next(chunk) new_chunk = SuikyoCharChunk::new(chunk, chunk.chunk_next) if chunk.chunk_next then chunk.chunk_next.chunk_prev = new_chunk end chunk.chunk_next = new_chunk return new_chunk end def initialize (chunk_prev = nil, chunk_next = nil) @chunk_prev = chunk_prev @chunk_next = chunk_next reset() end def reset @pending = [] @original = [] @conversion = [] end ## It returns a duplicated value of pending. def pending_copy () if @pending.nil? then return nil else return @pending.dup() end end ## It returns a duplicated value of original. def original_copy () if @original.nil? then return nil else return @original.dup() end end ## It returns a duplicated value of conversion. def conversion_copy () if @conversion.nil? then return nil else return @conversion.dup() end end def length () return (@pending.length() + @conversion.length()) end def empty? return (@pending == [] and @original == [] and @conversion == []) end end class SuikyoComposerData attr_reader :position, :head_chunk, :tail_chunk, :typing_mode def initialize (position, head_chunk, tail_chunk, typing_mode) @position = position @head_chunk = head_chunk @tail_chunk = tail_chunk @typing_mode = typing_mode end end ## Main class of this file. class SuikyoComposer @@suikyo_katakana = Suikyo.new("hiragana-katakana") @@suikyo_half_katakana = Suikyo.new("hiragana-halfkatakana") @@suikyo_wide_ascii = Suikyo.new("ascii-wideascii") @@suikyo_katakana_reverse = Suikyo.new("katakana-hiragana") @@suikyo_half_katakana_reverse = Suikyo.new("halfkatakana-hiragana") @@suikyo_wide_ascii_reverse = Suikyo.new("wideascii-ascii") attr_accessor :mask, :hybrid_typing def initialize (table = nil, table_reverse = nil) @suikyo = Suikyo.new(table) @suikyo_reverse = Suikyo.new(table_reverse) reset_composer() ## @typing_mode means the current displaying method. Its value is ## one of :default, :raw, :katakana, :wide_ascii, and :half_katakana. @typing_mode = :default @undo_data = [] @redo_data = [] # not implemented yet. ## If @mask is true, the surface of tail pending character will be masked ## by "*". For example "たいy" => "たい*". This is usually for T-code. @mask = false ## If @hybrid_typing is true, Suikyo considers the validation of the ## preedition. ex). "あっplえ" => "apple". @hybrid_typing = false end # def SuikyoComposer::copy (composer) # new_composer = SuikyoComposer.new( composer.get_table(), # composer.get_reverse_table() ) # (head_chunk, tail_chunk) = composer.chunk_copy() # new_composer.set_composer(head_chunk, tail_chunk, composerposition # end ## This copys its chunks and returns a pair of the head and the tail. def chunk_copy () new_head_chunk = SuikyoCharChunk.new() new_tail_chunk = SuikyoCharChunk.new_next(new_head_chunk) new_chunk = new_head_chunk orig_chunk = @head_chunk until orig_chunk.chunk_next == @tail_chunk do orig_chunk = orig_chunk.chunk_next new_chunk = SuikyoCharChunk.new_next(new_chunk) new_chunk.conversion = orig_chunk.conversion_copy() new_chunk.original = orig_chunk.original_copy() new_chunk.pending = orig_chunk.pending_copy() end return [new_head_chunk, new_tail_chunk] end private :chunk_copy # def set_composer (head_chunk, tail_chunk, position = 0) # @position = position # @head_chunk = head_chunk # @tail_chunk = tail_chunk # end def reset_composer () @position = 0 @head_chunk = SuikyoCharChunk.new() @tail_chunk = SuikyoCharChunk.new_next(@head_chunk) end def set_table (tables, table_path = nil) tables.each { | table | @suikyo.table.loadfile(table, table_path) } end def get_table () return @suikyo.table end def set_reverse_table (tables, table_path = nil) tables.each { | table | @suikyo_reverse.table.loadfile(table, table_path) } end def get_reverse_table () return @suikyo_reverse.table end def set_table_entry (input, output, pending = nil, unescape = true) @suikyo.table.set(input, output, pending, unescape) end def set_reverse_table_entry (input, output, pending = nil, unescape = true) @suikyo_reverse.table.set(input, output, pending, unescape) end ### ### Undo operations ### ## This method sets the current status to the undo buffer. def undo_set () (head_chunk, tail_chunk) = chunk_copy() data = SuikyoComposerData.new(@position, head_chunk, tail_chunk, @typing_mode) # @undo_data.push(data) @undo_data = [data] end private :undo_set ## This method reads the @undo_data and sets them to the current status. ## If @undo_data contains no data this return false. def undo () if @undo_data.empty? then return false end ## FIXME: Add redo code. ## FIXME: (2004-12-05) data = @undo_data.shift() @position = data.position @head_chunk = data.head_chunk @tail_chunk = data.tail_chunk @typing_mode = data.typing_mode return true end ### ### Cursor operations ### def cursor_at (position) length = edit_get_length() if position < 0 then ## ex. if position == -1 then postion = length. position = length + position + 1 end if position > length then return false else @position = position return true end end def cursor_left () if @position == 0 then return false end return cursor_at(@position - 1) end def cursor_right () return cursor_at(@position + 1) end def cursor_left_edge () return cursor_at(0) end def cursor_right_edge () return cursor_at(-1) end ## This erases all of the composition string and reset position to 0. def edit_erase () undo_set() reset_composer() end def edit_delete () if cursor_right() then @position = edit_backspace() end return @position end def edit_backspace () (chunk, position) = chunk_get_at(@position) ## Position is out of order. if chunk == @tail_chunk then return -1 end ## Position is the beginning of the chunk list. if chunk == @head_chunk then return -1 end chunk_split(chunk, position) if chunk_get_length(chunk) == 1 then chunk = chunk_delete(chunk) @position = get_position(chunk) return @position end case @typing_mode when :default, :katakana, :half_katakana then @position = edit_backspace_internal_default(chunk) else # :raw, :wide_ascii @position = edit_backspace_internal_raw(chunk) end return @position end def edit_backspace_internal_default (chunk) if chunk.pending.length > 0 then char = chunk.pending.pop() if chunk.original != nil then if chunk.original[-1] == char then chunk.original.pop() else chunk.original = nil end end else ## Delete the last character. conversion = suikyo_convert( chunk.conversion.join() ).split(//) conversion.pop() chunk.conversion = suikyo_reconvert_to_default( conversion.join() ).split(//) chunk.original = nil end return get_position(chunk) end private :edit_backspace_internal_default def edit_backspace_internal_raw (chunk) chunk.pending.pop() chunk.original.pop() chunk.conversion = [] return get_position(chunk) end private :edit_backspace_internal_raw def edit_insert (string) (chunk, position) = chunk_get_at(@position) ## Position is out of order. if chunk == @tail_chunk then return -1 end # Setting an undo data. # undo_set() ## Position is the beginning of the chunk list. if chunk == @head_chunk then chunk = SuikyoCharChunk.new_next(chunk) current_chunk = convert(chunk, string) @position = get_position(current_chunk) return @position end chunk_split(chunk, position) ## Insert characters current_chunk = convert(chunk, string) @position = get_position(current_chunk) return @position end def edit_display () chunk = @head_chunk detail = "" until chunk.chunk_next == @tail_chunk do chunk = chunk.chunk_next original = chunk.original.nil? ? "*" : chunk.original.join() detail += "[#{chunk.conversion.join}+#{chunk.pending.join}|#{original}]" end puts(detail) (left, cursor, right) = edit_get_preediting_string() puts(left + "|" + cursor + right) end def get_position (chunk, offset = nil) position = 0 cur_chunk = @head_chunk until cur_chunk == @tail_chunk do if cur_chunk == chunk then return position + (offset or chunk_get_length(chunk)) end position += chunk_get_length(cur_chunk) cur_chunk = cur_chunk.chunk_next end return -1 end ## This returns a list of either original raw input characters or ## reverse-converted characters from the conversion. def edit_get_raw_input () chunk = @head_chunk original = [] until chunk.chunk_next == @tail_chunk do chunk = chunk.chunk_next if chunk.original.nil? then return [] end original += chunk.original end return [ original.join() ] end ## This method returns a translated string under the current mode. def edit_get_surface_string () chunk = @head_chunk surface = "" until chunk.chunk_next == @tail_chunk do chunk = chunk.chunk_next surface += chunk_get_surface(chunk) end return surface end ## This method returns a translated string under the current mode. ## If @mask is true, pending characters are masked by "*". def edit_get_surface_string_masked () chunk = @head_chunk surface = "" until chunk.chunk_next == @tail_chunk do chunk = chunk.chunk_next surface += chunk_get_surface_masked(chunk) end return surface end ## This checks the validation of the preediting string. If the preedition ## is valid, it returns true. If invalid, false. ## ex). "apple (あっplえ)" => false, "ringo(りんご)" => true. def hybrid_typing_check_validation () chunk = @head_chunk (current_chunk, offset) = chunk_get_at(@position) flag_valid = true flag_original = true original = "" until chunk.chunk_next == @tail_chunk do chunk = chunk.chunk_next ## If pending characters exist in the preediting chunks except the last ## chunk, the flag for validation becomes false. if (chunk.pending.length > 0 and chunk.chunk_next != @tail_chunk and chunk != current_chunk) or \ (chunk.pending.length + chunk.conversion.length == 0) then flag_valid = false end ## If there're chunks whose original value is nil, the flag for original ## input becomes false. if chunk.original.nil? then flag_original = false else original += chunk.original.join() end end if flag_valid == false and flag_original == true then return false end ## If the original string has capital characters in the middle of the ## string, the validation will be false. ## ex). "FreeWnn" => false, "PRIME" => false, "Anthy" => true if original =~ /.+[A-Z]/ then return false end return true end def hybrid_typing_update () unless @hybrid_typing then return @typing_mode end new_mode = hybrid_typing_check_validation() ? :default : :raw if new_mode == @typing_mode then return @typing_mode end if new_mode == :default then (chunk, offset) = edit_recover_conversion() else # @typing_mode == :raw (chunk, offset) = edit_recover_original() end @typing_mode = new_mode @position = get_position(chunk, offset) return @typing_mode end ## This method returns a list of a translated string under the current mode. ## The list nodes are: [left-string, cursor-character, right-string] ## ex). "aiu|eo" => ["aiu", "e", "o"] def edit_get_preediting_string () hybrid_typing_update() surface = edit_get_surface_string_masked() chars = surface.split(//) left_string = chars[0,@position].join() cursor_string = (chars[@position] or "") right_string = (chars[(@position + 1)..-1] or []).join() return [left_string, cursor_string, right_string] end ## This method returns a string for PRIME which is a Japanese PRedictive ## Input Method Editor. Basically it returns the original typed string, ## or the displayed string if the original string is broken. def edit_get_query_string () if @typing_mode != :default then return edit_get_surface_string() end conversion = "" original = "" chunk = @head_chunk until chunk.chunk_next == @tail_chunk do chunk = chunk.chunk_next if original then if chunk.original then original += chunk.original.join() else # chunk.original == nil original = nil end end conversion += chunk_get_surface(chunk) end if original then return original else return conversion end end ## This returns a list which has two entries, the raw input and ## the converted string. ex). "kom" => ["kom", "こm"] def edit_get_conversion () conversions = [] raw_input = edit_get_raw_input() if raw_input then conversions += raw_input end conversion = edit_get_surface_string() if conversion != raw_input then conversions.push(conversion) end return conversions end ## This method returns a list of candidate strings completed from the ## preedition string. For example, in the case the preedition is "sh", ## this returns ["sh", "しゃ", しゅ", "しょ"]. def edit_get_expansion () if @typing_mode == :raw or @typing_mode == :wide_ascii then return [ edit_get_surface_string() ] end return edit_get_expansion_internal() end def edit_get_expansion_internal () results = [] conversion = "" original = "" chunk = @head_chunk until chunk.chunk_next == @tail_chunk do chunk = chunk.chunk_next if original then if chunk.original then original += chunk.original.join() else # chunk.original == nil original = nil end end conversion += suikyo_convert( chunk.conversion.join() ) unless chunk.chunk_next == @tail_chunk then conversion += suikyo_convert( chunk.pending.join() ) end end ## Add the original raw input, if it exists. ## If the Conversion equals "", the Expansion contains the Original. if original and conversion != "" then results.push(original) end ## The pending of the last chunk should be expanded. if chunk.pending.length > 0 then (base, expansion) = @suikyo.expand( chunk.pending.join() ) expansion.each { | suffix | results.push( conversion + suikyo_convert(suffix) ) } else results.push(conversion) end return results end private :edit_get_expansion_internal def set_mode_hybrid () @hybrid_typing = true hybrid_typing_update() end def set_mode_default () # F6 @hybrid_typing = false (chunk, offset) = edit_recover_conversion() @typing_mode = :default @position = get_position(chunk, offset) end def set_mode_katakana () # F7 @hybrid_typing = false (chunk, offset) = edit_recover_conversion() @typing_mode = :katakana @position = get_position(chunk, offset) end def set_mode_half_katakana () # F8 @hybrid_typing = false (chunk, offset) = edit_recover_conversion() @typing_mode = :half_katakana @position = get_position(chunk, offset) end def set_mode_wide_ascii () # F9 @hybrid_typing = false (chunk, offset) = edit_recover_original() @typing_mode = :wide_ascii @position = get_position(chunk, offset) end def set_mode_raw () # F10 @hybrid_typing = false (chunk, offset) = edit_recover_original() @typing_mode = :raw @position = get_position(chunk, offset) end private ## This method converts the preedition string again and returns the ## current chunk. This method is supposed to be called when the ## typing mode changes to the :default mode. def edit_recover_conversion () (cur_chunk, offset) = chunk_get_at(@position) ## If the cursor positions the end of chunk, offset value is set to nil ## which means the end of chunk. if chunk_get_length(cur_chunk) == offset then offset = nil end chunk = @head_chunk pending_chunk = nil until chunk.chunk_next == @tail_chunk do if chunk.chunk_next.conversion.length > 0 or chunk.chunk_next.pending.length > 0 then if pending_chunk then chunk = convert(pending_chunk, "") end chunk = chunk.chunk_next pending_chunk = nil else # chunk.chunk_next.conversion.length == 0 chunk = chunk.chunk_next if pending_chunk then pending_chunk.pending += chunk.original next_chunk = chunk_delete(chunk) if cur_chunk == chunk then cur_chunk = next_chunk offset = nil end chunk = next_chunk else pending_chunk = chunk pending_chunk.conversion = [] pending_chunk.pending = pending_chunk.original_copy() end end end if pending_chunk then chunk = convert(pending_chunk, "") end return [cur_chunk, offset] end ## This method converts the preedition string again and returns the ## current chunk. This method is supposed to be called when the ## typing mode changes to the :raw mode. def edit_recover_original () (cur_chunk, offset) = chunk_get_at(@position) ## If the cursor positions the end of chunk, offset value is set to nil ## which means the end of chunk. if chunk_get_length(cur_chunk) == offset then offset = nil end chunk = @head_chunk reconv_chunk = nil reconv = "" until chunk.chunk_next == @tail_chunk do if chunk.chunk_next.original then if reconv_chunk then input = @suikyo_reverse.convert( reconv_chunk.conversion.join + " " ) reconv_chunk.conversion = [] chunk = convert(reconv_chunk, input) end chunk = chunk.chunk_next reconv_chunk = nil else chunk = chunk.chunk_next if reconv_chunk then reconv_chunk.conversion += chunk.conversion + chunk.pending next_chunk = chunk_delete(chunk) if cur_chunk == chunk then cur_chunk = next_chunk offset = 0 end chunk = next_chunk else reconv_chunk = chunk reconv_chunk.conversion += reconv_chunk.pending reconv_chunk.pending = [] reconv_chunk.original = [] end end end if reconv_chunk then input = @suikyo_reverse.convert( reconv_chunk.conversion.join + " " ) reconv_chunk.conversion = [] chunk = convert(reconv_chunk, input) end return [cur_chunk, offset] end def edit_get_length () chunk = @head_chunk.chunk_next length = 0 until chunk == @tail_chunk do length += chunk_get_length(chunk) chunk = chunk.chunk_next end return length end #### #### Chunk operations #### ## This function returns a string of the specified SuikyoChunk.for display. ## The string is depend on the current typing_mode. def chunk_get_surface (chunk) case @typing_mode when :default, :katakana, :half_katakana then string = chunk.conversion.join() + chunk.pending.join() else # @typing_mode == :raw, :wide-ascii string = chunk.original.join() end return suikyo_convert(string) end ## This is basically the same with chunk_get_surface. The difference is ## that if @mask is true this method masks pending characters with "*". def chunk_get_surface_masked (chunk) case @typing_mode when :default, :katakana, :half_katakana then if @mask then string = chunk.conversion.join() + ("*" * chunk.pending.length()) else string = chunk.conversion.join() + chunk.pending.join() end else # @typing_mode == :raw, :wide-ascii string = chunk.original.join() end return suikyo_convert(string) end ## This returns the result of suikyo.convert in each surface mode. def suikyo_convert (string) case @typing_mode when :default then # F6 return @suikyo.convert(string) when :katakana then # F7 return @@suikyo_katakana.convert(string + " ") when :half_katakana then # F8 return @@suikyo_half_katakana.convert(string + " ") when :wide_ascii then # F9 return @@suikyo_wide_ascii.convert(string + " ") when :raw then # F10 return string end end ## This reconverts a string to the default surface and returns it. def suikyo_reconvert_to_default (string) case @typing_mode when :default then # F6 return string when :katakana then # F7 return @@suikyo_katakana_reverse.convert(string + " ") when :half_katakana then # F8 return @@suikyo_half_katakana_reverse.convert(string + " ") when :wide_ascii then # F9 return @@suikyo_wide_ascii_reverse.convert(string + " ") when :raw then # F10 return string end end ## This function returns the length of the specified SuikyoChunk. ## The value is depend on the current typing_mode. def chunk_get_length (chunk) return chunk_get_surface(chunk).split(//).length() end ## This method returns a list of a chunk and the postion in the ## chunk from the specified global position. For example, if a ## chunk list is [HEAD]-[ta]-[i]-[ya]-[ki]-[TAIL] and a specified ## position is 4, this method returns a list of [ya] and 1. If 3, ## it returns [i] and 1. If 0, [HEAD] and 0. def chunk_get_at (position) chunk = @head_chunk until chunk.chunk_next == @tail_chunk do length = chunk_get_length(chunk) if position <= length then return [chunk, position] end position -= length chunk = chunk.chunk_next end position = [position, chunk_get_length(chunk)].min() return [chunk, position] end ## This splits the argument chunk to two chunks at the position ## and returns true. If it failed, it returns false. def chunk_split (chunk, position) case @typing_mode when :default, :katakana, :half_katakana then return chunk_split_default(chunk, position) else # :raw, :wide_ascii return chunk_split_raw(chunk, position) end end def chunk_split_default (chunk, position) if position > chunk_get_length(chunk) or position <= 0 then return false end ### Separate into two chunks ## Getting the pending and conversion data of left and right chunks. original = chunk.original conversion = suikyo_convert( chunk.conversion.join() ).split(//) pending = suikyo_convert( chunk.pending.join() ).split(//) if position > conversion.length then position_pending = position - conversion.length left_pending = pending[0,position_pending].join() left_conversion = conversion.join() right_pending = pending[position_pending..-1].join() right_conversion = "" else # position <= chunk.conversion.length left_pending = "" left_conversion = conversion[0,position].join() right_pending = pending.join() right_conversion = conversion[position..-1].join() end ## Getting the data original of left and right chunks. left_original = nil right_original = nil if original != nil and right_conversion == "" then index = original.join.rindex(right_pending) if index != nil then left_original = original[0,index] right_original = original[index..-1] end end ## Reflecting the data to chunks. chunk.conversion = suikyo_reconvert_to_default( left_conversion ).split(//) chunk.pending = suikyo_reconvert_to_default( left_pending ).split(//) chunk.original = left_original if right_conversion.length > 0 or right_pending.length > 0 then right_chunk = SuikyoCharChunk.new_next(chunk) right_chunk.conversion = suikyo_reconvert_to_default( right_conversion ).split(//) right_chunk.pending = suikyo_reconvert_to_default( right_pending ).split(//) right_chunk.original = right_original end return true end ## This splits chunk under the raw typing mode. def chunk_split_raw (chunk, position) if position >= chunk_get_length(chunk) or position <= 0 then return false end chars = chunk.original left_original = chars[0, position] right_original = chars[position..-1] chunk.conversion = [] chunk.pending = left_original chunk.original = left_original.dup() right_chunk = SuikyoCharChunk.new_next(chunk) right_chunk.conversion = [] right_chunk.pending = right_original right_chunk.original = right_original.dup() return true end ## This method deletes CHUNK and reconnect among the previous and ## next chunks. It returns the previous chunk, or the next chunk if ## the previous one does not exist. def chunk_delete (chunk) ## Delete this chunk current_chunk = nil if chunk.chunk_next then current_chunk = chunk.chunk_next chunk.chunk_next.chunk_prev = chunk.chunk_prev end if chunk.chunk_prev then current_chunk = chunk.chunk_prev chunk.chunk_prev.chunk_next = chunk.chunk_next end return current_chunk end def convert (chunk, string) chars = string.split(//) orig_table = @suikyo.table cont_chars = chunk.pending chunk.pending = [] loop { if cont_chars.length == 0 then unless chunk.empty? then chunk = SuikyoCharChunk.new_next(chunk) end end table = orig_table node = nil while table and cont_chars.length > 0 do head = cont_chars[0] tmp_node = table.get_word(head) table = (tmp_node and tmp_node.subtable) if tmp_node or chunk.pending == [] then chunk.pending += head.split(//) unless head == " " node = tmp_node cont_chars.shift end end while table and chars.length > 0 do head = chars[0] tmp_node = table.get_word(head) table = (tmp_node and tmp_node.subtable) if tmp_node or chunk.pending == [] then chunk.pending += head.split(//) unless head == " " chunk.original += head.split(//) unless head == " " node = tmp_node chars.shift end end if table.nil? and node and (node.result or node.cont) then chunk.pending = [] if node.cont then cont_chars = node.cont.split(//) end if node.result then chunk.conversion += node.result.split(//) end end if cont_chars.length == 0 and chars.length == 0 then if chunk.empty? then chunk = chunk_delete(chunk) end return chunk end } end end