#!/usr/bin/perl -w
#
# Copyright (C) 2000-2004 Nadav Har'El, Dan Kenigsberg
#
use Carp;
use FileHandle;
my $detailed_output=0;
my $detail_prefix;
# This arrays will be useful later to convert ordinary letters into final,
# and vice-versa.
my %fin = ('כ'=>'ך', 'מ'=>'ם', 'נ'=>'ן', 'פ'=>'ף', 'צ'=>'ץ');
my %nif = ('ך'=>'כ', 'ם'=>'מ', 'ן'=>'נ', 'ף'=>'פ', 'ץ'=>'צ');
sub outword {
my $word = shift;
my $details = shift;
# "*" sign used to signify non-existant word that should not be output.
# It will allow us to more-easily drop words without huge if()s.
return if $word =~ m/^\*/;
# change otiot-sofiot in the middle of the word
# (the silly a-z was added for our special "y" and "w" marks).
# (the ('?) and $2 are for סנדוויץ', סנדוויצ'ים)
$word =~ s/([ךםןףץ])('?)(?=[א-תa-z])/$nif{$1}$2/go;
# change special consonant marks into the proper Hebrew letters, using
# proper ktiv male rules.
# Note that the order of these conversion is important. Since they have
# the potential of changing so many words, it is highly recommended to
# diff the output files before and after the change, to see that no
# unexpected words got changed.
# The vowel markers 'a' and 'e' do nothing except to a yud (chirik male) -
# which turns it into a consonant yud; For example your(feminine) צי is
# צייך (tsere in the yud, so it's a consonant and doubled) and
# your(masculine) צי is ציך (yud is chirik male, and not doubled)
$word =~ s/י[ea]/y/go;
$word =~ s/[ea]//go;
# The vowel 'i' is a chirik chaser - it should be followed by a yud if
# necessary. We do nothing with it currently - it's only useful for words
# like סנאiי where we want to make sure that wolig.pl does not think this
# is the normal patach-aleph-yud (with no niqqud under the aleph) case as
# in תנאי.
# The first rule here is useful for transformation from שני to שנייה, via
# שני adj-inword> שנiי feminine> שנiיaה outword> שנiyה outword> שנייה
$word =~ s/iy/יי/go; # useful in stuff like שנiי - שנייה
$word =~ s/i//go;
# Y is the same as y, except it is not translated to a double-yud (but rather
# to a single yud) when it is the last letter of the word. It's used in words
# like חולי in which the original form of the word has a chirik male, but in
# all the inflections the yud from the chirik becomes a fully-fleged
# consonant. We do not need a similar trick for vav (w), because the
# Academia's rules do not do anything to a vav at the end of the word,
# contrary to what happens to a yud.
# I'm not sure this trick is "kosher" (based on the language), but it does
# work...
$word =~ s/Y($|(?=-))/י/go; # Y's at the end of the word
$word =~ s/Y/y/go; # the rest of the Y's are converted to y's
# The first conversion below implements the akademia's rule that a chirik
# before a yו should not be written with a י. So we convert יyו into יו.
# IDEA: to be more certain that the first י functions as a chirik, it would
# have been better to use the i character: in addition to the יה -> yה rule
# we have in the beginning of processing a word, we should do ייה -> iyה.
# Then here the rule would convert iyו, not יyו. [but everything is working
# well even without this idea]
$word =~ s/יyו/יו/go;
$word =~ s/(?<=[^ויy])y(?=[^ויyה]|$)/יי/go;
$word =~ s/y/י/go; # otherwise, just one yud.
# The first conversion below of וw to ו has an interesting story. In the
# original Hebrew, the consonant ו sounded like the English w or Arabic
# waw. An "u" sound (a kubuts, which we mark by ו) followed by this w
# sound sounded like a long "u", which was later written with a shuruk,
# i.e., one vav. This conversion is very useful for understanding how the
# word שוק is inflected (see explanation in wolig.dat).
$word =~ s/וw/ו/go;
$word =~ s/(?<=[^וw])w(?=[^וw-])/וו/go; # if vav needs to be doubled, do it
$word =~ s/w/ו/go; # otherwise, just one vav.
# A consonant ה (h) is always output as a ה. The only reason we are
# interested in which ה is consonant is to allow the rules earlier to double
# yud next to a consonant ה (i.e.. h), but not next to a em-kria ה.
# For example, compare אריה (lion) and ארייה (her lion).
$word =~ s/h/ה/go;
if($detailed_output && defined($details)){
$word =~ s/-$//; # smichut is already known by the details...
$word .= " ".$detail_prefix.$details;
}
print $word."\n";
}
sub inword {
# For some constructs built of אהוי in end or beginnings of words, we can
# immediately guess that these must be consonants (and not vowels) and make
# use of that knowledge by changing the Hebrew letters into the markers
# "w", "y" we use for consonants ו and י respectively.
#
# This function takes a word as inputted from wolig.dat, presumably written
# in ktiv male, and makes a few predictions, such as that a vav in the
# beginning of the word must be a consonant. Predictions that appear here
# must have two traits:
# 1. They must be useful for the correct inflection of some word.
# For example, realising that the וו at the end of מזווה is a consonant
# help us later avoid the false inflection מזווו and instead generate
# the correct מזוו.
# 2. They must be correct in 100% of the cases. For example, a rule saying
# that every appearance of וו in the input is a consonant (w) is wrong,
# because of words like ציווי.
# However, the rules only have to "appear" correct (for all the actual
# words in wolig.dat), not necessarily be linguisticly correct. For
# example, we'll see below a rule that a ו at the end of a word is a
# consonant (w). This is indeed true for most nouns (צו, מקווקו), but not
# for אחו. However, all of אחו's inflections have a consonant vav, and in
# the word itself we don't really care about mislabeling it "consonant"
# because a vav at the end of the word isn't doubled anyway under the
# Academia's rules.
#
# Actually the second rule can be relaxed a bit if we provide alternative
# ways to input a certain construct. For example, if "u" could signify a
# vowel vav in the input, then we wouldn't really care if in a few rare cases
# we wrongly decide a certain vav to be consonant: the user could override
# this decision by putting a "u" explicitly, instead of the vav, in the
# input file.
my $word = shift;
if(substr($word,0,1) eq "ו"){
# A word cannot start with a shuruk or kubuts!
substr($word,0,1)="w";
}
if(substr($word,-4,4) eq "וויה"){
# A word like חוויה, הלוויה, טריוויה. I can't imagine any base noun (or
# adjective) for which such a double-vav isn't a consonant but rather
# a vav and shuruk.
substr($word,-4,2)="w";
}
if(substr($word,-1,1) eq "ו"){
# This vav is a consonant (see comment above about why the few exceptions
# that do exist don't bother us).
substr($word,-1,1)="w";
} elsif(substr($word,-3,3) eq "ווה"){
# If the word ends with ווה, the user wrote in ktiv male and intended
# a consonant vav. Replace the וו by the character "w", which will be
# doubled if necessary (for ktiv male) by outword. This change actually
# makes a difference for the סגול_ה with ות cases: for example, the
# word מקווה has a plural מקוות and his-possesive מקוו. Without this
# change, we get the incorrect possesive מקווו and plural מקווות.
# Similarly it is needed for the adjective נאווה's correct feminine plural.
substr($word,-3,2)="w";
} elsif(substr($word,-2,2) eq "יה"){
substr($word,-2,1)="y";
# TODO: maybe convert ייה (in ktiv male, e.g., סופגנייה) into iyה.
# see outword above on a discussion about that. But everything also
# works without this change.
}
return $word;
}
#############################################################################
my ($fh,$word,$optstring,%opts);
my $infile;
if($#ARGV < 0){
$infile="wolig.dat";
} else {
if($ARGV[0] eq "-d"){
$detailed_output=!$detailed_output;
shift @ARGV;
}
$infile=$ARGV[0];
}
$fh = new FileHandle $infile, "r"
or croak "Couldn't open data file $infile for reading";
while(<$fh>){
print if /^#\*/; # print these comments.
chomp;
s/#.*$//o; # comments start with '#'.
next if /^[ ]*$/o; # ignore blank lines.
($word,$optstring)=split;
die "Type of word '".$word."' was not specified." if !defined($optstring);
undef %opts;
my $val;
foreach $opt (split /,/o, $optstring){
($opt, $val) = (split /=/o, $opt);
$val = 1 unless defined $val;
$opts{$opt}=$val;
}
if($opts{"ע"}){
############################# noun ######################################
# note that the noun may have several plural forms (see, for example,
# אות). When one of the plural forms isn't explicitly specified, wolig
# tries to guess, based on simplistic heuristics that work for the majority
# of the nouns (84% of them, at one time I counted).
my $plural_none = $opts{"יחיד"} || substr($word,-3,3) eq "יות";
my $plural_bizarre = exists($opts{"רבים"});
my $plural_implicit = !($opts{"ות"} || $opts{"ים"} || $opts{"יות"}
|| $opts{"אות"} || $opts{"יים"} || $plural_none
|| $plural_bizarre);
my $plural_iot = $opts{"יות"} ||
($plural_implicit && (substr($word,-2,2) eq "ות"));
my $plural_xot = $opts{"אות"};
my $plural_ot = $opts{"ות"} ||
($plural_implicit && !$plural_iot && (substr($word,-1,1) eq "ה" || substr($word,-1,1) eq "ת" ));
my $plural_im = $opts{"ים"} || ($plural_implicit && !$plural_ot && !$plural_iot);
my $plural_iim = $opts{"יים"};
# Find gender for detailed output. This has nothing to do with word
# inflection, it's just an added value of wolig.pl...
if($detailed_output){
my $gender;
if($opts{"זכר"}){
if($opts{"נקבה"}){
$gender="ז,נ";
} else {
$gender="ז";
}
} elsif($opts{"נקבה"}){
$gender="נ"
} elsif($opts{"סגול_ה"}){
$gender="ז";
} elsif((substr($word,-1,1) eq "ה") && !$opts{"אבד_ו"}){
$gender="נ";
} elsif(substr($word,-1,1) eq "ת" && !$opts{"ים"}){
$gender="נ";
} else {
$gender="ז";
}
$detail_prefix="$gender,";
}
# preprocess the word the user has given, converting certain ktiv male
# constructs into markers (w, y) that we can better work with later (see
# comments in inword() about what it does).
$word=inword($word);
# related singular noun forms
if(exists $opts{"נפרד"}){
outword $opts{"נפרד"}, "ע,יחיד"; # explicit override of the nifrad
} elsif(!$opts{"אין_יחיד"}){
outword $word, "ע,יחיד"; # the singular noun itself
}
if($opts{"אבד_י"}){
# in words like עיפרון and היריון the first yud (coming from chirik
# or tsere in ktiv male) is lost in all but the base word
$word =~ s/י//o;
}
my $smichut=$word;
if($opts{"אין_יחיד"} || $opts{"אין_נטיות_יחיד"}){
# We mark the singular words with "*", telling outword to drop them.
# This makes the code look cleaner than a huge if statement around all
# the singular code. Maybe in the future we should move the singular
# inflection code to a seperate function, if() only around that, and
# stop all that "*" nonsense.
$smichut="*".$smichut;
}
#my $smichut_orig=$smichut;
if($opts{"מיוחד_אח"}){
# special case:
# אח, אב, חם, פה include an extra yod in the smichut. Note that in the
# first person singular possessive, we should drop that extra yod.
# For a "im" plural, it turns out to be the same inflections as the
# plural - but this is not the case with a "ot" plural.
# Interestingly, the yud in these inflections is always a chirik
# male - it is never consonantal (never has a vowel on it).
if(substr($smichut,-1,1) eq "ה"){
# Remove the ה. Basically, only one word fits this case: פה
$smichut=substr($smichut,0,-1);
# And add the extra third-person masuline possesive (just like the
# סגול_ה case, but we don't bother to check for the סגול_ה flag here).
outword $smichut."יהו", "ע,יחיד,של/הוא";
}
outword $smichut."י-", "ע,יחיד,סמיכות"; # smichut
outword $smichut."י", "ע,יחיד,של/אני"; # possessives (kinu'im)
outword $smichut."ינו", "ע,יחיד,של/אנחנו";
outword $smichut."יך", "ע,יחיד,של/אתה";
outword $smichut."יך", "ע,יחיד,של/את";
outword $smichut."יכם", "ע,יחיד,של/אתם";
outword $smichut."יכן", "ע,יחיד,של/אתן";
outword $smichut."יו", "ע,יחיד,של/הוא";
outword $smichut."יה", "ע,יחיד,של/היא";
outword $smichut."יהן", "ע,יחיד,של/הן";
outword $smichut."יהם", "ע,יחיד,של/הם";
} else {
if(!$opts{"סגול_ה"}){ # replace final ה by ת, unless סגול_ה option
if(substr($smichut,-1,1) eq "ה" && !$opts{"סגול_ה"}){
substr($smichut,-1,1)="ת";
}
}
if(exists($opts{"נסמך"})){
outword $opts{"נסמך"}."-", "ע,יחיד,סמיכות";
} else {
outword $smichut."-", "ע,יחיד,סמיכות"; # smichut
}
if($opts{"מיוחד_שן"}){
# academia's ktiv male rules indicate that the inflections of שן
# (at least the plural is explicitly mentioned...) should get an
# extra yud - to make it easy to distinguish from the number שניים.
substr($smichut,0,-1)=substr($smichut,0,-1).'י';
substr($word,0,-1)=substr($word,0,-1).'י';
}
if(substr($word,-2,2) eq "אי" && length($word)>2){
# in words ending with patach and then the imot kria aleph yud,
# such as תנאי and גבאי, all the inflections (beside the base word
# and the smichut) are as if the yud wasn't there.
# Note that words ending with אי but not patach, like אי and סנאי,
# should not get this treatment, so there should be an option to turn
# it off.
substr($word,-1,1)="";
substr($smichut,-1,1)="";
}
# Note that the extra vowel markers, 'a' and 'e' are added for mele'im
# ending with yud (e.g., אי) - this vowel attaches to the yud and makes
# the yud a consonant. This phenomenon is handled in outword.
my $no_ah=0;
if($opts{"סגול_ה"}){
# the ה is dropped from the singular inflections, except one alternate
# inflection like מורהו (the long form of מורו):
# (there's another femenine inflection, מורה with kamats on the he,
# but this is spelled the same (as מורה with mapik) without niqqud so
# we don't need to print it again).
if(substr($smichut,-1,1) eq "ה"){
$smichut=substr($smichut,0,-1);
}
outword $smichut."ehו", "ע,יחיד,של/הוא";
# TODO: maybe add the "eha" inflection? But it won't generate anything
# different from the ah below...
#outword $smichut."eha" unless $no_ah;
}
outword $smichut."י", "ע,יחיד,של/אני"; # possessives (kinu'im)
outword $smichut."eנו", "ע,יחיד,של/אנחנו";
outword $smichut."ך", "ע,יחיד,של/אתה";
outword $smichut."eך", "ע,יחיד,של/את";
outword $smichut."כם", "ע,יחיד,של/אתם";
outword $smichut."כן", "ע,יחיד,של/אתן";
outword $smichut."ו", "ע,יחיד,של/הוא";
outword $smichut."ah", "ע,יחיד,של/היא";
outword $smichut."aן", "ע,יחיד,של/הן";
outword $smichut."aם", "ע,יחיד,של/הם";
}
# related plural noun forms
# note: don't combine the $plural_.. ifs, nor use elsif, because some
# nouns have more than one plural forms.
if($plural_im){
my $xword=$word;
if(substr($xword,-1,1) eq "ה" && !$opts{"שמור_ת"}){
# remove final "he" (not "tav", unlike the "ot" pluralization below)
# before adding the "im" pluralization, unless the שמור_ת option was
# given.
$xword=substr($xword,0,-1);
}
my $xword_orig=$xword;
if($opts{"אבד_ו"}){
# when the אבד_ו flag was given,we remove the first "em kri'a" from
# the word in most of the inflections. (see a discussion of this
# option in wolig.dat).
$xword =~ s/ו//o;
}
outword $xword."ים", "ע,רבים";
$smichut=$xword;
my $smichut_orig=$xword_orig;
outword $smichut_orig."י-", "ע,רבים,סמיכות"; # smichut
# (We write patach followed by a consonant yud as "y", and later this will
# give us the chance to automatically double it as necessary by the
# Academia's ktiv male rules)
outword $smichut."y", "ע,רבים,של/אני"; # possessives (kinu'im)
outword $smichut."ינו", "ע,רבים,של/אנחנו";
outword $smichut."יך", "ע,רבים,של/אתה";
outword $smichut."yך", "ע,רבים,של/את";
outword $smichut_orig."יכם", "ע,רבים,של/אתם";
outword $smichut_orig."יכן", "ע,רבים,של/אתן";
outword $smichut."יו", "ע,רבים,של/הוא";
outword $smichut."יה", "ע,רבים,של/היא";
outword $smichut_orig."יהן", "ע,רבים,של/הן";
outword $smichut_orig."יהם", "ע,רבים,של/הם";
}
if($plural_iim){
# I currently decided that in Hebrew, unlike Arabic, only specific
# nouns can get the iim (zugi) pluralization, and most nouns can't,
# e.g., חתוליים isn't correct (for "two cats") despite a story called
# מעשה בחתוליים. This is why this is an option, and not the default.
my $xword=$word;
if(substr($xword,-1,1) eq "ה"){
# Change final he into tav before adding the "iim" pluralization.
$xword=substr($xword,0,-1)."ת";
}
my $xword_orig=$xword;
outword $xword."yם", "ע,רבים";
$smichut=$xword;
my $smichut_orig=$xword_orig;
outword $smichut_orig."י-", "ע,רבים,סמיכות"; # smichut
outword $smichut."y", "ע,רבים,של/אני"; # possessives (kinu'im)
outword $smichut."ינו", "ע,רבים,של/אנחנו";
outword $smichut."יך", "ע,רבים,של/אתה";
outword $smichut."yך", "ע,רבים,של/את";
outword $smichut_orig."יכם", "ע,רבים,של/אתם";
outword $smichut_orig."יכן", "ע,רבים,של/אתן";
outword $smichut."יו", "ע,רבים,של/הוא";
outword $smichut."יה", "ע,רבים,של/היא";
outword $smichut_orig."יהן", "ע,רבים,של/הן";
outword $smichut_orig."יהם", "ע,רבים,של/הם";
}
if($plural_ot){
my $xword=$word;
if(substr($xword,-1,1) eq "ה" || substr($xword,-1,1) eq "ת"){
# remove final "he" or "tav" before adding the "ot" pluralization,
# unless the שמור_ת option was given.
if(!$opts{"שמור_ת"}){
$xword=substr($xword,0,-1);
}
}
if($opts{"אבד_ו"}){
# In segoliim with cholam chaser chat that inflect like feminines
# (i.e., the plural_ot case), the cholam is lost *only* in the base
# plural, not in other plural inflection. This is comparable to the
# inflections of the word מלכה, where the patach is lost only in the
# base plural.
# See for example גורן, דופן.
my $tmp = $xword;
$tmp =~ s/ו//o;
outword $tmp."ות", "ע,רבים";
} else {
outword $xword."ות", "ע,רבים";
}
$smichut=$xword."ות";
outword $smichut."-", "ע,רבים,סמיכות"; # smichut
outword $smichut."y", "ע,רבים,של/אני"; # possessives (kinu'im)
outword $smichut."ינו", "ע,רבים,של/אנחנו";
outword $smichut."יך", "ע,רבים,של/אתה";
outword $smichut."yך", "ע,רבים,של/את";
outword $smichut."יכם", "ע,רבים,של/אתם";
outword $smichut."יכן", "ע,רבים,של/אתן";
outword $smichut."יו", "ע,רבים,של/הוא";
outword $smichut."יה", "ע,רבים,של/היא";
outword $smichut."יהן", "ע,רבים,של/הן";
outword $smichut."יהם", "ע,רבים,של/הם";
}
if($plural_iot){
my $xword=$word;
if(substr($xword,-1,1) eq "ה" || substr($xword,-1,1) eq "ת"){
# remove final "he" or "tav" before adding the "iot" pluralization,
# unless the שמור_ת option was given.
if(!$opts{"שמור_ת"}){
$xword=substr($xword,0,-1);
}
}
outword $xword."יות", "ע,רבים";
$smichut=$xword."יות";
outword $smichut."-", "ע,רבים,סמיכות"; # smichut
outword $smichut."y", "ע,רבים,של/אני"; # possessives (kinu'im)
outword $smichut."ינו", "ע,רבים,של/אנחנו";
outword $smichut."יך", "ע,רבים,של/אתה";
outword $smichut."yך", "ע,רבים,של/את";
outword $smichut."יכם", "ע,רבים,של/אתם";
outword $smichut."יכן", "ע,רבים,של/אתן";
outword $smichut."יו", "ע,רבים,של/הוא";
outword $smichut."יה", "ע,רבים,של/היא";
outword $smichut."יהן", "ע,רבים,של/הן";
outword $smichut."יהם", "ע,רבים,של/הם";
}
if($plural_xot){
my $xword=$word;
if(substr($xword,-1,1) eq "ה" || substr($xword,-1,1) eq "ת"){
# remove final "he" or "tav" before adding the "xot" pluralization,
# unless the שמור_ת option was given.
if(!$opts{"שמור_ת"}){
$xword=substr($xword,0,-1);
}
}
outword $xword."אות", "ע,רבים";
$smichut=$xword."אות";
outword $smichut."-", "ע,רבים,סמיכות"; # smichut
outword $smichut."y", "ע,רבים,של/אני"; # possessives (kinu'im)
outword $smichut."ינו", "ע,רבים,של/אנחנו";
outword $smichut."יך", "ע,רבים,של/אתה";
outword $smichut."yך", "ע,רבים,של/את";
outword $smichut."יכם", "ע,רבים,של/אתם";
outword $smichut."יכן", "ע,רבים,של/אתן";
outword $smichut."יו", "ע,רבים,של/הוא";
outword $smichut."יה", "ע,רבים,של/היא";
outword $smichut."יהן", "ע,רבים,של/הן";
outword $smichut."יהם", "ע,רבים,של/הם";
}
if($plural_bizarre){
# User specified plural for bizarre cases; For example, the plural of
# צל is צללים, the plural of בת is בנות.
# We take the fully formed plural from the user, and may need to take
# of the ending to guess the smichut and possesives (letting the user
# override the smichut forms too).
my $plural=$opts{"רבים"};
#outword $plural, "ע,רבים";
outword((exists($opts{"נפרדים"}) ? $opts{"נפרדים"} : $plural), "ע,רבים");
# Overriding the plural nishmach with the נסמכים option: David Yalin,
# In his book דקדוק הלשון העברית (1942) explains in page 207 how some
# of the kinuyim are known as "kinuyey hanifrad" and some "kinuyey
# hanishmach" because when the nismach and nifrad differ, they follow
# different ones. This is important for words like תיש, and in fact
# the אבד_ו option does basically the same thing.
my $smichut_orig;
if(substr($plural,-2,2) eq "ות"){
$smichut_orig= exists($opts{"נסמכים"}) ? $opts{"נסמכים"} : $plural;
# as David Yalin explains (ibid.): "צריך להעיר כי בשמות שסימן הריבוי
# שלהם הוא -ות נוטים כל כינויי הרבים אחרי צורת הסמיכות".
$smichut=$smichut_orig;
outword $smichut_orig."-", "ע,רבים,סמיכות"; # smichut
} elsif(substr($plural,-2,2) eq "ים" || substr($plural,-2,2) eq "ין"){
$smichut=substr($plural,0,-2);
# the removal of the final yod from נסמכים is a bit silly... maybe
# we should have had a מקור_נסמכים option and ask it without yod.
$smichut_orig= exists($opts{"נסמכים"}) ?
substr($opts{"נסמכים"},0,-1) : $smichut;
outword $smichut_orig."י-", "ע,רבים,סמיכות"; # smichut
} else {
die "Plural given for $word is of unrecognized form: $plural.";
}
outword $smichut."y", "ע,רבים,של/אני"; # possessives (kinu'im)
outword $smichut."ינו", "ע,רבים,של/אנחנו";
outword $smichut."יך", "ע,רבים,של/אתה";
outword $smichut."yך", "ע,רבים,של/את";
outword $smichut_orig."יכם", "ע,רבים,של/אתם";
outword $smichut_orig."יכן", "ע,רבים,של/אתן";
outword $smichut."יו", "ע,רבים,של/הוא";
outword $smichut."יה", "ע,רבים,של/היא";
outword $smichut_orig."יהן", "ע,רבים,של/הן";
outword $smichut_orig."יהם", "ע,רבים,של/הם";
}
} elsif($opts{"ת"}){
############################# adjective ##################################
$detail_prefix="";
# preprocess the word the user has given, converting certain ktiv male
# constructs into markers (w, y) that we can better work with later (see
# comments in inword() about what it does).
$word=inword($word);
# A preprocessing rule special for adjectives: a final yud will always be
# a chirik male, not some sort of consonant yud or another vowel. Together
# with the iy post-transformation in outword, this makes שני - שנייה work
# correctly. However, when the word ends with וי (and not ווי) we assume
# this is shuruk followed by a consonant yud (for example, מצוי). In
# words that do end in ווי and the וו is not a consonant we must use a
# w explictly, (e.g. רווי should be written explictly as רwוי).
if($word =~ m/([^aeiו]|וו)י$/o){
substr($word,-1,1) = "iי";
}
my $xword=$word;
if(substr($xword,-1,1) eq "ה"){
# remove final "he" before adding the pluralization,
# unless the שמור_ה option was given.
if(!$opts{"שמור_ה"}){
$xword=substr($xword,0,-1);
}
}
if($opts{"עם"}){
# For nationality adjectives (always adding in yud!), there is a seperate
# plural for the people of that nationality (rather than other objects
# from that country), with only ם added. There's also a country name,
# and sometimes a female-person form too (נקבה_ה). We these here,
# instead of seperately in extrawords, so that the country list can be
# organized nicely at one place.
if(exists($opts{"ארץ"})){
outword $opts{"ארץ"}, "ע,פרטי,נ" if($opts{"ארץ"} ne "") # country name
} elsif(substr($word,-3,3) eq "אiי"){
outword substr($word,0,-3)."ה", "ע,פרטי,נ"; # country name
} else {
$country = $word;
$country =~ s/i?י$//;
$country =~ s/([כמנפצ])$/$fin{$1}/;
outword $country, "ע,פרטי,נ"; # country name
}
outword $word."ם", "ע,רבים,ז"; # plural (people of that nationality)
$opts{"נקבה_ת"}=1; # for enabling ת plural. adding ה plural is optional.
}
if(!exists($opts{"יחיד"})){
outword $word, "ת,יחיד,ז"; # masculin, singular
outword $word."-", "ת,יחיד,ז,סמיכות"; # smichut (same as nifrad)
} else {
outword $opts{"יחיד"}, "ת,יחיד,ז"; # masculin, singular
outword $opts{"יחיד"}."-", "ת,יחיד,ז,סמיכות"; # smichut (same as nifrad)
}
if($opts{"ם"}){
# special case for adjectives like רשאי. Unlike the noun case where we
# turn this option automatically for words ending with אי, here such a
# default would not be useful because a lot of nouns ending with ה or א
# correspond to adjectives ending with אי that this rule doesn't fit.
outword $xword."ם", "ת,רבים,ז"; # masculin, plural
outword $xword."-", "ת,רבים,ז,סמיכות"; # smichut
} else {
outword $xword."ים", "ת,רבים,ז"; # masculin, plural
outword $xword."י-", "ת,רבים,ז,סמיכות"; # smichut
}
# feminine, singular:
if($opts{"נקבה_ית"}){
# This is an ad-hoc treatment of the nekeva_it option, which cannot be
# combined with others because we will only have one plural form...
$xword=$xword."י";
$opts{"נקבה_ת"}=1;
}
my $nekeva_implicit = !($opts{"נקבה_ת"} || $opts{"נקבה_ה"} ||
$opts{"יחידה"});
my $nekeva_t = $opts{"נקבה_ת"} ||
($nekeva_implicit && substr($xword,-1,1) eq "י");
my $nekeva_h = $opts{"נקבה_ה"} ||
($nekeva_implicit && !$nekeva_t);
if(exists($opts{"יחידה"})){
my $yechida=$opts{"יחידה"};
outword $yechida, "ת,יחיד,נ";
$yechida =~ s/ה$/ת/ if(!$opts{"שמור_ה"});
outword $yechida."-", "ת,יחיד,נ,סמיכות";
}
if($nekeva_t){
if(substr($word,-1,1) eq "ה" && !$opts{"שמור_ה"}){
# This is a rare case, where an adjective ending with ה gets a ת
# feminine form, and an extra yud needs to be added. For example
# מופלה, מופלית.
outword $xword."ית", "ת,יחיד,נ";
outword $xword."ית-", "ת,יחיד,נ,סמיכות"; # smichut (same as nifrad)
} else {
# note: we don't bother adding the vowel "e" before the ת because that
# would only make a difference before a yud - and interestingly when
# there *is* a yud, the vowel is dropped anyway!
outword $xword."ת", "ת,יחיד,נ";
outword $xword."ת-", "ת,יחיד,נ,סמיכות"; # smichut (same as nifrad)
}
}
if($nekeva_h){
outword $xword."aה", "ת,יחיד,נ";
outword $xword."aת-", "ת,יחיד,נ,סמיכות"; # smichut
}
outword $xword."ות", "ת,רבים,נ"; # feminine, plural
outword $xword."ות-", "ת,רבים,נ,סמיכות"; # smichut (same as nifrad)
} else {
die "word '".$word."' was not specified as noun, adjective or verb.";
}
outword "-------"
}
syntax highlighted by Code2HTML, v. 0.9.1