/*
* 文節の構造metawordをソートする
*
* 文節に対する複数の構造の候補をソートする
*
* Copyright (C) 2000-2007 TABATA Yusuke
*
*/
/*
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdlib.h>
#include <math.h>
#include <anthy/segment.h>
#include <anthy/ordering.h>
#include <anthy/feature_set.h>
#include <anthy/splitter.h>
#include <anthy/diclib.h>
#include "sorter.h"
static void *cand_info_array;
static double
calc_probability(struct feature_list *fl)
{
struct feature_freq *res, arg;
res = anthy_find_feature_freq(cand_info_array,
fl, &arg);
if (res) {
double pos = (double)res->f[15];
double neg = (double)res->f[14];
double prob = pos / (pos + neg);
prob = prob * prob;
/**/
return prob;
}
return 0;
}
static void
mw_eval(struct seg_ent *prev_seg, struct seg_ent *seg,
struct meta_word *mw)
{
int pc;
struct feature_list fl;
double prob;
(void)seg;
anthy_feature_list_init(&fl);
/**/
anthy_feature_list_set_cur_class(&fl, mw->seg_class);
anthy_feature_list_set_dep_word(&fl, mw->dep_word_hash);
anthy_feature_list_set_dep_class(&fl, mw->dep_class);
anthy_feature_list_set_mw_features(&fl, mw->mw_features);
/* 前の文節の素性 */
if (prev_seg) {
pc = prev_seg->best_seg_class;
} else {
pc = SEG_HEAD;
}
anthy_feature_list_set_class_trans(&fl, pc, mw->seg_class);
anthy_feature_list_sort(&fl);
/* 計算する */
prob = 0.1 + calc_probability(&fl);
if (prob < 0) {
prob = (double)1 / (double)1000;
}
anthy_feature_list_free(&fl);
mw->struct_score = RATIO_BASE * RATIO_BASE;
mw->struct_score *= prob;
/*
anthy_feature_list_print(&fl);
printf(" prob=%f, struct_score=%d\n", prob, mw->struct_score);
*/
/**/
if (mw->mw_features & MW_FEATURE_SUFFIX) {
mw->struct_score /= 2;
}
if (mw->mw_features & MW_FEATURE_WEAK_CONN) {
mw->struct_score /= 10;
}
}
static void
seg_eval(struct seg_ent *prev_seg,
struct seg_ent *seg)
{
int i;
for (i = 0; i < seg->nr_metaword; i++) {
mw_eval(prev_seg, seg, seg->mw_array[i]);
}
}
static void
sl_eval(struct segment_list *seg_list)
{
int i;
struct seg_ent *prev_seg = NULL;
for (i = 0; i < seg_list->nr_segments; i++) {
struct seg_ent *seg;
seg = anthy_get_nth_segment(seg_list, i);
seg_eval(prev_seg, seg);
prev_seg = seg;
}
}
static int
metaword_compare_func(const void *p1, const void *p2)
{
const struct meta_word * const *s1 = p1;
const struct meta_word * const *s2 = p2;
return (*s2)->struct_score - (*s1)->struct_score;
}
void
anthy_sort_metaword(struct segment_list *seg_list)
{
int i;
/**/
sl_eval(seg_list);
/**/
for (i = 0; i < seg_list->nr_segments; i++) {
struct seg_ent *seg = anthy_get_nth_segment(seg_list, i);
qsort(seg->mw_array, seg->nr_metaword, sizeof(struct meta_word *),
metaword_compare_func);
}
}
void
anthy_infosort_init(void)
{
cand_info_array = anthy_file_dic_get_section("cand_info");
}
syntax highlighted by Code2HTML, v. 0.9.1