.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 .\" .\" Standard preamble: .\" ======================================================================== .de Sh \" Subsection heading .br .if t .Sp .ne 5 .PP \fB\\$1\fR .PP .. .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. | will give a .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' .\" expand to `' in nroff, nothing in troff, for use with C<>. .tr \(*W-|\(bv\*(Tr .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' 'br\} .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .if \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . nr % 0 . rr F .\} .\" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .hy 0 .if n .na .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "AI::Categorizer::Learner::KNN 3" .TH AI::Categorizer::Learner::KNN 3 "2008-01-14" "perl v5.8.8" "User Contributed Perl Documentation" .SH "NAME" AI::Categorizer::Learner::KNN \- K Nearest Neighbour Algorithm For AI::Categorizer .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 1 \& use AI::Categorizer::Learner::KNN; .Ve .PP .Vb 1 \& # Here $k is an AI::Categorizer::KnowledgeSet object .Ve .PP .Vb 3 \& my $nb = new AI::Categorizer::Learner::KNN(...parameters...); \& $nb->train(knowledge_set => $k); \& $nb->save_state('filename'); .Ve .PP .Vb 1 \& ... time passes ... .Ve .PP .Vb 7 \& $l = AI::Categorizer::Learner->restore_state('filename'); \& my $c = new AI::Categorizer::Collection::Files( path => ... ); \& while (my $document = $c->next) { \& my $hypothesis = $l->categorize($document); \& print "Best assigned category: ", $hypothesis->best_category, "\en"; \& print "All assigned categories: ", join(', ', $hypothesis->categories), "\en"; \& } .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" This is an implementation of the k\-Nearest-Neighbor decision-making algorithm, applied to the task of document categorization (as defined by the AI::Categorizer module). See AI::Categorizer for a complete description of the interface. .SH "METHODS" .IX Header "METHODS" This class inherits from the \f(CW\*(C`AI::Categorizer::Learner\*(C'\fR class, so all of its methods are available unless explicitly mentioned here. .Sh "\fInew()\fP" .IX Subsection "new()" Creates a new \s-1KNN\s0 Learner and returns it. In addition to the parameters accepted by the \f(CW\*(C`AI::Categorizer::Learner\*(C'\fR class, the \&\s-1KNN\s0 subclass accepts the following parameters: .IP "threshold" 4 .IX Item "threshold" Sets the score threshold for category membership. The default is currently 0.1. Set the threshold lower to assign more categories per document, set it higher to assign fewer. This can be an effective way to trade of between precision and recall. .IP "k_value" 4 .IX Item "k_value" Sets the \f(CW\*(C`k\*(C'\fR value (as in k\-Nearest\-Neighbor) to the given integer. This indicates how many of each document's nearest neighbors should be considered when assigning categories. The default is 5. .Sh "\fIthreshold()\fP" .IX Subsection "threshold()" Returns the current threshold value. With an optional numeric argument, you may set the threshold. .ie n .Sh "train(knowledge_set => $k)" .el .Sh "train(knowledge_set => \f(CW$k\fP)" .IX Subsection "train(knowledge_set => $k)" Trains the categorizer. This prepares it for later use in categorizing documents. The \f(CW\*(C`knowledge_set\*(C'\fR parameter must provide an object of the class \f(CW\*(C`AI::Categorizer::KnowledgeSet\*(C'\fR (or a subclass thereof), populated with lots of documents and categories. See AI::Categorizer::KnowledgeSet for the details of how to create such an object. .Sh "categorize($document)" .IX Subsection "categorize($document)" Returns an \f(CW\*(C`AI::Categorizer::Hypothesis\*(C'\fR object representing the categorizer's \*(L"best guess\*(R" about which categories the given document should be assigned to. See AI::Categorizer::Hypothesis for more details on how to use this object. .Sh "save_state($path)" .IX Subsection "save_state($path)" Saves the categorizer for later use. This method is inherited from \&\f(CW\*(C`AI::Categorizer::Storable\*(C'\fR. .SH "AUTHOR" .IX Header "AUTHOR" Originally written by David Bell (\f(CW\*(C`\*(C'\fR), October 2002. .PP Added to AI::Categorizer November 2002, modified, and maintained by Ken Williams (\f(CW\*(C`\*(C'\fR). .SH "COPYRIGHT" .IX Header "COPYRIGHT" Copyright 2000\-2003 Ken Williams. All rights reserved. .PP This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. .SH "SEE ALSO" .IX Header "SEE ALSO" \&\fIAI::Categorizer\fR\|(3) .PP \&\*(L"A re-examination of text categorization methods\*(R" by Yiming Yang