# suikyo-composer.rb: Preedition composer using Suikyo.
# This library is for input methods such as PRIME.
# $Id: suikyo-composer.rb,v 1.11 2005/03/07 09:41:14 komatsu Exp $
#
# Copyright (C) 2004 Hiroyuki Komatsu <komatsu@taiyaki.org>
# All rights reserved.
# This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of
# the GNU General Public License version 2.
require 'suikyo/suikyo'
class SuikyoCharChunk
attr_accessor :chunk_prev, :chunk_next, :pending, :original, :conversion
def SuikyoCharChunk::new_next(chunk)
new_chunk = SuikyoCharChunk::new(chunk, chunk.chunk_next)
if chunk.chunk_next then
chunk.chunk_next.chunk_prev = new_chunk
end
chunk.chunk_next = new_chunk
return new_chunk
end
def initialize (chunk_prev = nil, chunk_next = nil)
@chunk_prev = chunk_prev
@chunk_next = chunk_next
reset()
end
def reset
@pending = []
@original = []
@conversion = []
end
## It returns a duplicated value of pending.
def pending_copy ()
if @pending.nil? then
return nil
else
return @pending.dup()
end
end
## It returns a duplicated value of original.
def original_copy ()
if @original.nil? then
return nil
else
return @original.dup()
end
end
## It returns a duplicated value of conversion.
def conversion_copy ()
if @conversion.nil? then
return nil
else
return @conversion.dup()
end
end
def length ()
return (@pending.length() + @conversion.length())
end
def empty?
return (@pending == [] and @original == [] and @conversion == [])
end
end
class SuikyoComposerData
attr_reader :position, :head_chunk, :tail_chunk, :typing_mode
def initialize (position, head_chunk, tail_chunk, typing_mode)
@position = position
@head_chunk = head_chunk
@tail_chunk = tail_chunk
@typing_mode = typing_mode
end
end
## Main class of this file.
class SuikyoComposer
@@suikyo_katakana = Suikyo.new("hiragana-katakana")
@@suikyo_half_katakana = Suikyo.new("hiragana-halfkatakana")
@@suikyo_wide_ascii = Suikyo.new("ascii-wideascii")
@@suikyo_katakana_reverse = Suikyo.new("katakana-hiragana")
@@suikyo_half_katakana_reverse = Suikyo.new("halfkatakana-hiragana")
@@suikyo_wide_ascii_reverse = Suikyo.new("wideascii-ascii")
attr_accessor :mask, :hybrid_typing
def initialize (table = nil, table_reverse = nil)
@suikyo = Suikyo.new(table)
@suikyo_reverse = Suikyo.new(table_reverse)
reset_composer()
## @typing_mode means the current displaying method. Its value is
## one of :default, :raw, :katakana, :wide_ascii, and :half_katakana.
@typing_mode = :default
@undo_data = []
@redo_data = [] # not implemented yet.
## If @mask is true, the surface of tail pending character will be masked
## by "*". For example "たいy" => "たい*". This is usually for T-code.
@mask = false
## If @hybrid_typing is true, Suikyo considers the validation of the
## preedition. ex). "あっplえ" => "apple".
@hybrid_typing = false
end
# def SuikyoComposer::copy (composer)
# new_composer = SuikyoComposer.new( composer.get_table(),
# composer.get_reverse_table() )
# (head_chunk, tail_chunk) = composer.chunk_copy()
# new_composer.set_composer(head_chunk, tail_chunk, composerposition
# end
## This copys its chunks and returns a pair of the head and the tail.
def chunk_copy ()
new_head_chunk = SuikyoCharChunk.new()
new_tail_chunk = SuikyoCharChunk.new_next(new_head_chunk)
new_chunk = new_head_chunk
orig_chunk = @head_chunk
until orig_chunk.chunk_next == @tail_chunk do
orig_chunk = orig_chunk.chunk_next
new_chunk = SuikyoCharChunk.new_next(new_chunk)
new_chunk.conversion = orig_chunk.conversion_copy()
new_chunk.original = orig_chunk.original_copy()
new_chunk.pending = orig_chunk.pending_copy()
end
return [new_head_chunk, new_tail_chunk]
end
private :chunk_copy
# def set_composer (head_chunk, tail_chunk, position = 0)
# @position = position
# @head_chunk = head_chunk
# @tail_chunk = tail_chunk
# end
def reset_composer ()
@position = 0
@head_chunk = SuikyoCharChunk.new()
@tail_chunk = SuikyoCharChunk.new_next(@head_chunk)
end
def set_table (tables, table_path = nil)
tables.each { | table |
@suikyo.table.loadfile(table, table_path)
}
end
def get_table ()
return @suikyo.table
end
def set_reverse_table (tables, table_path = nil)
tables.each { | table |
@suikyo_reverse.table.loadfile(table, table_path)
}
end
def get_reverse_table ()
return @suikyo_reverse.table
end
def set_table_entry (input, output, pending = nil, unescape = true)
@suikyo.table.set(input, output, pending, unescape)
end
def set_reverse_table_entry (input, output, pending = nil, unescape = true)
@suikyo_reverse.table.set(input, output, pending, unescape)
end
###
### Undo operations
###
## This method sets the current status to the undo buffer.
def undo_set ()
(head_chunk, tail_chunk) = chunk_copy()
data = SuikyoComposerData.new(@position, head_chunk, tail_chunk,
@typing_mode)
# @undo_data.push(data)
@undo_data = [data]
end
private :undo_set
## This method reads the @undo_data and sets them to the current status.
## If @undo_data contains no data this return false.
def undo ()
if @undo_data.empty? then
return false
end
## FIXME: Add redo code.
## FIXME: (2004-12-05) <Hiro>
data = @undo_data.shift()
@position = data.position
@head_chunk = data.head_chunk
@tail_chunk = data.tail_chunk
@typing_mode = data.typing_mode
return true
end
###
### Cursor operations
###
def cursor_at (position)
length = edit_get_length()
if position < 0 then
## ex. if position == -1 then postion = length.
position = length + position + 1
end
if position > length then
return false
else
@position = position
return true
end
end
def cursor_left ()
if @position == 0 then
return false
end
return cursor_at(@position - 1)
end
def cursor_right ()
return cursor_at(@position + 1)
end
def cursor_left_edge ()
return cursor_at(0)
end
def cursor_right_edge ()
return cursor_at(-1)
end
## This erases all of the composition string and reset position to 0.
def edit_erase ()
undo_set()
reset_composer()
end
def edit_delete ()
if cursor_right() then
@position = edit_backspace()
end
return @position
end
def edit_backspace ()
(chunk, position) = chunk_get_at(@position)
## Position is out of order.
if chunk == @tail_chunk then
return -1
end
## Position is the beginning of the chunk list.
if chunk == @head_chunk then
return -1
end
chunk_split(chunk, position)
if chunk_get_length(chunk) == 1 then
chunk = chunk_delete(chunk)
@position = get_position(chunk)
return @position
end
case @typing_mode
when :default, :katakana, :half_katakana then
@position = edit_backspace_internal_default(chunk)
else # :raw, :wide_ascii
@position = edit_backspace_internal_raw(chunk)
end
return @position
end
def edit_backspace_internal_default (chunk)
if chunk.pending.length > 0 then
char = chunk.pending.pop()
if chunk.original != nil then
if chunk.original[-1] == char then
chunk.original.pop()
else
chunk.original = nil
end
end
else
## Delete the last character.
conversion = suikyo_convert( chunk.conversion.join() ).split(//)
conversion.pop()
chunk.conversion =
suikyo_reconvert_to_default( conversion.join() ).split(//)
chunk.original = nil
end
return get_position(chunk)
end
private :edit_backspace_internal_default
def edit_backspace_internal_raw (chunk)
chunk.pending.pop()
chunk.original.pop()
chunk.conversion = []
return get_position(chunk)
end
private :edit_backspace_internal_raw
def edit_insert (string)
(chunk, position) = chunk_get_at(@position)
## Position is out of order.
if chunk == @tail_chunk then
return -1
end
# Setting an undo data.
# undo_set()
## Position is the beginning of the chunk list.
if chunk == @head_chunk then
chunk = SuikyoCharChunk.new_next(chunk)
current_chunk = convert(chunk, string)
@position = get_position(current_chunk)
return @position
end
chunk_split(chunk, position)
## Insert characters
current_chunk = convert(chunk, string)
@position = get_position(current_chunk)
return @position
end
def edit_display ()
chunk = @head_chunk
detail = ""
until chunk.chunk_next == @tail_chunk do
chunk = chunk.chunk_next
original = chunk.original.nil? ? "*" : chunk.original.join()
detail += "[#{chunk.conversion.join}+#{chunk.pending.join}|#{original}]"
end
puts(detail)
(left, cursor, right) = edit_get_preediting_string()
puts(left + "|" + cursor + right)
end
def get_position (chunk, offset = nil)
position = 0
cur_chunk = @head_chunk
until cur_chunk == @tail_chunk do
if cur_chunk == chunk then
return position + (offset or chunk_get_length(chunk))
end
position += chunk_get_length(cur_chunk)
cur_chunk = cur_chunk.chunk_next
end
return -1
end
## This returns a list of either original raw input characters or
## reverse-converted characters from the conversion.
def edit_get_raw_input ()
chunk = @head_chunk
original = []
until chunk.chunk_next == @tail_chunk do
chunk = chunk.chunk_next
if chunk.original.nil? then
return []
end
original += chunk.original
end
return [ original.join() ]
end
## This method returns a translated string under the current mode.
def edit_get_surface_string ()
chunk = @head_chunk
surface = ""
until chunk.chunk_next == @tail_chunk do
chunk = chunk.chunk_next
surface += chunk_get_surface(chunk)
end
return surface
end
## This method returns a translated string under the current mode.
## If @mask is true, pending characters are masked by "*".
def edit_get_surface_string_masked ()
chunk = @head_chunk
surface = ""
until chunk.chunk_next == @tail_chunk do
chunk = chunk.chunk_next
surface += chunk_get_surface_masked(chunk)
end
return surface
end
## This checks the validation of the preediting string. If the preedition
## is valid, it returns true. If invalid, false.
## ex). "apple (あっplえ)" => false, "ringo(りんご)" => true.
def hybrid_typing_check_validation ()
chunk = @head_chunk
(current_chunk, offset) = chunk_get_at(@position)
flag_valid = true
flag_original = true
original = ""
until chunk.chunk_next == @tail_chunk do
chunk = chunk.chunk_next
## If pending characters exist in the preediting chunks except the last
## chunk, the flag for validation becomes false.
if (chunk.pending.length > 0 and
chunk.chunk_next != @tail_chunk and chunk != current_chunk) or \
(chunk.pending.length + chunk.conversion.length == 0) then
flag_valid = false
end
## If there're chunks whose original value is nil, the flag for original
## input becomes false.
if chunk.original.nil? then
flag_original = false
else
original += chunk.original.join()
end
end
if flag_valid == false and flag_original == true then
return false
end
## If the original string has capital characters in the middle of the
## string, the validation will be false.
## ex). "FreeWnn" => false, "PRIME" => false, "Anthy" => true
if original =~ /.+[A-Z]/ then
return false
end
return true
end
def hybrid_typing_update ()
unless @hybrid_typing then
return @typing_mode
end
new_mode = hybrid_typing_check_validation() ? :default : :raw
if new_mode == @typing_mode then
return @typing_mode
end
if new_mode == :default then
(chunk, offset) = edit_recover_conversion()
else # @typing_mode == :raw
(chunk, offset) = edit_recover_original()
end
@typing_mode = new_mode
@position = get_position(chunk, offset)
return @typing_mode
end
## This method returns a list of a translated string under the current mode.
## The list nodes are: [left-string, cursor-character, right-string]
## ex). "aiu|eo" => ["aiu", "e", "o"]
def edit_get_preediting_string ()
hybrid_typing_update()
surface = edit_get_surface_string_masked()
chars = surface.split(//)
left_string = chars[0,@position].join()
cursor_string = (chars[@position] or "")
right_string = (chars[(@position + 1)..-1] or []).join()
return [left_string, cursor_string, right_string]
end
## This method returns a string for PRIME which is a Japanese PRedictive
## Input Method Editor. Basically it returns the original typed string,
## or the displayed string if the original string is broken.
def edit_get_query_string ()
if @typing_mode != :default then
return edit_get_surface_string()
end
conversion = ""
original = ""
chunk = @head_chunk
until chunk.chunk_next == @tail_chunk do
chunk = chunk.chunk_next
if original then
if chunk.original then
original += chunk.original.join()
else # chunk.original == nil
original = nil
end
end
conversion += chunk_get_surface(chunk)
end
if original then
return original
else
return conversion
end
end
## This returns a list which has two entries, the raw input and
## the converted string. ex). "kom" => ["kom", "こm"]
def edit_get_conversion ()
conversions = []
raw_input = edit_get_raw_input()
if raw_input then
conversions += raw_input
end
conversion = edit_get_surface_string()
if conversion != raw_input then
conversions.push(conversion)
end
return conversions
end
## This method returns a list of candidate strings completed from the
## preedition string. For example, in the case the preedition is "sh",
## this returns ["sh", "しゃ", しゅ", "しょ"].
def edit_get_expansion ()
if @typing_mode == :raw or @typing_mode == :wide_ascii then
return [ edit_get_surface_string() ]
end
return edit_get_expansion_internal()
end
def edit_get_expansion_internal ()
results = []
conversion = ""
original = ""
chunk = @head_chunk
until chunk.chunk_next == @tail_chunk do
chunk = chunk.chunk_next
if original then
if chunk.original then
original += chunk.original.join()
else # chunk.original == nil
original = nil
end
end
conversion += suikyo_convert( chunk.conversion.join() )
unless chunk.chunk_next == @tail_chunk then
conversion += suikyo_convert( chunk.pending.join() )
end
end
## Add the original raw input, if it exists.
## If the Conversion equals "", the Expansion contains the Original.
if original and conversion != "" then
results.push(original)
end
## The pending of the last chunk should be expanded.
if chunk.pending.length > 0 then
(base, expansion) = @suikyo.expand( chunk.pending.join() )
expansion.each { | suffix |
results.push( conversion + suikyo_convert(suffix) )
}
else
results.push(conversion)
end
return results
end
private :edit_get_expansion_internal
def set_mode_hybrid ()
@hybrid_typing = true
hybrid_typing_update()
end
def set_mode_default () # F6
@hybrid_typing = false
(chunk, offset) = edit_recover_conversion()
@typing_mode = :default
@position = get_position(chunk, offset)
end
def set_mode_katakana () # F7
@hybrid_typing = false
(chunk, offset) = edit_recover_conversion()
@typing_mode = :katakana
@position = get_position(chunk, offset)
end
def set_mode_half_katakana () # F8
@hybrid_typing = false
(chunk, offset) = edit_recover_conversion()
@typing_mode = :half_katakana
@position = get_position(chunk, offset)
end
def set_mode_wide_ascii () # F9
@hybrid_typing = false
(chunk, offset) = edit_recover_original()
@typing_mode = :wide_ascii
@position = get_position(chunk, offset)
end
def set_mode_raw () # F10
@hybrid_typing = false
(chunk, offset) = edit_recover_original()
@typing_mode = :raw
@position = get_position(chunk, offset)
end
private
## This method converts the preedition string again and returns the
## current chunk. This method is supposed to be called when the
## typing mode changes to the :default mode.
def edit_recover_conversion ()
(cur_chunk, offset) = chunk_get_at(@position)
## If the cursor positions the end of chunk, offset value is set to nil
## which means the end of chunk.
if chunk_get_length(cur_chunk) == offset then
offset = nil
end
chunk = @head_chunk
pending_chunk = nil
until chunk.chunk_next == @tail_chunk do
if chunk.chunk_next.conversion.length > 0 or
chunk.chunk_next.pending.length > 0 then
if pending_chunk then
chunk = convert(pending_chunk, "")
end
chunk = chunk.chunk_next
pending_chunk = nil
else # chunk.chunk_next.conversion.length == 0
chunk = chunk.chunk_next
if pending_chunk then
pending_chunk.pending += chunk.original
next_chunk = chunk_delete(chunk)
if cur_chunk == chunk then
cur_chunk = next_chunk
offset = nil
end
chunk = next_chunk
else
pending_chunk = chunk
pending_chunk.conversion = []
pending_chunk.pending = pending_chunk.original_copy()
end
end
end
if pending_chunk then
chunk = convert(pending_chunk, "")
end
return [cur_chunk, offset]
end
## This method converts the preedition string again and returns the
## current chunk. This method is supposed to be called when the
## typing mode changes to the :raw mode.
def edit_recover_original ()
(cur_chunk, offset) = chunk_get_at(@position)
## If the cursor positions the end of chunk, offset value is set to nil
## which means the end of chunk.
if chunk_get_length(cur_chunk) == offset then
offset = nil
end
chunk = @head_chunk
reconv_chunk = nil
reconv = ""
until chunk.chunk_next == @tail_chunk do
if chunk.chunk_next.original then
if reconv_chunk then
input = @suikyo_reverse.convert( reconv_chunk.conversion.join + " " )
reconv_chunk.conversion = []
chunk = convert(reconv_chunk, input)
end
chunk = chunk.chunk_next
reconv_chunk = nil
else
chunk = chunk.chunk_next
if reconv_chunk then
reconv_chunk.conversion += chunk.conversion + chunk.pending
next_chunk = chunk_delete(chunk)
if cur_chunk == chunk then
cur_chunk = next_chunk
offset = 0
end
chunk = next_chunk
else
reconv_chunk = chunk
reconv_chunk.conversion += reconv_chunk.pending
reconv_chunk.pending = []
reconv_chunk.original = []
end
end
end
if reconv_chunk then
input = @suikyo_reverse.convert( reconv_chunk.conversion.join + " " )
reconv_chunk.conversion = []
chunk = convert(reconv_chunk, input)
end
return [cur_chunk, offset]
end
def edit_get_length ()
chunk = @head_chunk.chunk_next
length = 0
until chunk == @tail_chunk do
length += chunk_get_length(chunk)
chunk = chunk.chunk_next
end
return length
end
####
#### Chunk operations
####
## This function returns a string of the specified SuikyoChunk.for display.
## The string is depend on the current typing_mode.
def chunk_get_surface (chunk)
case @typing_mode
when :default, :katakana, :half_katakana then
string = chunk.conversion.join() + chunk.pending.join()
else # @typing_mode == :raw, :wide-ascii
string = chunk.original.join()
end
return suikyo_convert(string)
end
## This is basically the same with chunk_get_surface. The difference is
## that if @mask is true this method masks pending characters with "*".
def chunk_get_surface_masked (chunk)
case @typing_mode
when :default, :katakana, :half_katakana then
if @mask then
string = chunk.conversion.join() + ("*" * chunk.pending.length())
else
string = chunk.conversion.join() + chunk.pending.join()
end
else # @typing_mode == :raw, :wide-ascii
string = chunk.original.join()
end
return suikyo_convert(string)
end
## This returns the result of suikyo.convert in each surface mode.
def suikyo_convert (string)
case @typing_mode
when :default then # F6
return @suikyo.convert(string)
when :katakana then # F7
return @@suikyo_katakana.convert(string + " ")
when :half_katakana then # F8
return @@suikyo_half_katakana.convert(string + " ")
when :wide_ascii then # F9
return @@suikyo_wide_ascii.convert(string + " ")
when :raw then # F10
return string
end
end
## This reconverts a string to the default surface and returns it.
def suikyo_reconvert_to_default (string)
case @typing_mode
when :default then # F6
return string
when :katakana then # F7
return @@suikyo_katakana_reverse.convert(string + " ")
when :half_katakana then # F8
return @@suikyo_half_katakana_reverse.convert(string + " ")
when :wide_ascii then # F9
return @@suikyo_wide_ascii_reverse.convert(string + " ")
when :raw then # F10
return string
end
end
## This function returns the length of the specified SuikyoChunk.
## The value is depend on the current typing_mode.
def chunk_get_length (chunk)
return chunk_get_surface(chunk).split(//).length()
end
## This method returns a list of a chunk and the postion in the
## chunk from the specified global position. For example, if a
## chunk list is [HEAD]-[ta]-[i]-[ya]-[ki]-[TAIL] and a specified
## position is 4, this method returns a list of [ya] and 1. If 3,
## it returns [i] and 1. If 0, [HEAD] and 0.
def chunk_get_at (position)
chunk = @head_chunk
until chunk.chunk_next == @tail_chunk do
length = chunk_get_length(chunk)
if position <= length then
return [chunk, position]
end
position -= length
chunk = chunk.chunk_next
end
position = [position, chunk_get_length(chunk)].min()
return [chunk, position]
end
## This splits the argument chunk to two chunks at the position
## and returns true. If it failed, it returns false.
def chunk_split (chunk, position)
case @typing_mode
when :default, :katakana, :half_katakana then
return chunk_split_default(chunk, position)
else # :raw, :wide_ascii
return chunk_split_raw(chunk, position)
end
end
def chunk_split_default (chunk, position)
if position > chunk_get_length(chunk) or position <= 0 then
return false
end
### Separate into two chunks
## Getting the pending and conversion data of left and right chunks.
original = chunk.original
conversion = suikyo_convert( chunk.conversion.join() ).split(//)
pending = suikyo_convert( chunk.pending.join() ).split(//)
if position > conversion.length then
position_pending = position - conversion.length
left_pending = pending[0,position_pending].join()
left_conversion = conversion.join()
right_pending = pending[position_pending..-1].join()
right_conversion = ""
else # position <= chunk.conversion.length
left_pending = ""
left_conversion = conversion[0,position].join()
right_pending = pending.join()
right_conversion = conversion[position..-1].join()
end
## Getting the data original of left and right chunks.
left_original = nil
right_original = nil
if original != nil and right_conversion == "" then
index = original.join.rindex(right_pending)
if index != nil then
left_original = original[0,index]
right_original = original[index..-1]
end
end
## Reflecting the data to chunks.
chunk.conversion = suikyo_reconvert_to_default( left_conversion ).split(//)
chunk.pending = suikyo_reconvert_to_default( left_pending ).split(//)
chunk.original = left_original
if right_conversion.length > 0 or right_pending.length > 0 then
right_chunk = SuikyoCharChunk.new_next(chunk)
right_chunk.conversion =
suikyo_reconvert_to_default( right_conversion ).split(//)
right_chunk.pending =
suikyo_reconvert_to_default( right_pending ).split(//)
right_chunk.original = right_original
end
return true
end
## This splits chunk under the raw typing mode.
def chunk_split_raw (chunk, position)
if position >= chunk_get_length(chunk) or position <= 0 then
return false
end
chars = chunk.original
left_original = chars[0, position]
right_original = chars[position..-1]
chunk.conversion = []
chunk.pending = left_original
chunk.original = left_original.dup()
right_chunk = SuikyoCharChunk.new_next(chunk)
right_chunk.conversion = []
right_chunk.pending = right_original
right_chunk.original = right_original.dup()
return true
end
## This method deletes CHUNK and reconnect among the previous and
## next chunks. It returns the previous chunk, or the next chunk if
## the previous one does not exist.
def chunk_delete (chunk)
## Delete this chunk
current_chunk = nil
if chunk.chunk_next then
current_chunk = chunk.chunk_next
chunk.chunk_next.chunk_prev = chunk.chunk_prev
end
if chunk.chunk_prev then
current_chunk = chunk.chunk_prev
chunk.chunk_prev.chunk_next = chunk.chunk_next
end
return current_chunk
end
def convert (chunk, string)
chars = string.split(//)
orig_table = @suikyo.table
cont_chars = chunk.pending
chunk.pending = []
loop {
if cont_chars.length == 0 then
unless chunk.empty? then
chunk = SuikyoCharChunk.new_next(chunk)
end
end
table = orig_table
node = nil
while table and cont_chars.length > 0 do
head = cont_chars[0]
tmp_node = table.get_word(head)
table = (tmp_node and tmp_node.subtable)
if tmp_node or chunk.pending == [] then
chunk.pending += head.split(//) unless head == " "
node = tmp_node
cont_chars.shift
end
end
while table and chars.length > 0 do
head = chars[0]
tmp_node = table.get_word(head)
table = (tmp_node and tmp_node.subtable)
if tmp_node or chunk.pending == [] then
chunk.pending += head.split(//) unless head == " "
chunk.original += head.split(//) unless head == " "
node = tmp_node
chars.shift
end
end
if table.nil? and node and (node.result or node.cont) then
chunk.pending = []
if node.cont then
cont_chars = node.cont.split(//)
end
if node.result then
chunk.conversion += node.result.split(//)
end
end
if cont_chars.length == 0 and chars.length == 0 then
if chunk.empty? then
chunk = chunk_delete(chunk)
end
return chunk
end
}
end
end
syntax highlighted by Code2HTML, v. 0.9.1