ports//hebrew/hspell/work/hspell-0.8/wolig.pl

#!/usr/bin/perl -w
#
# Copyright (C) 2000-2004 Nadav Har'El, Dan Kenigsberg
#
use Carp;
use FileHandle;

my $detailed_output=0;
my $detail_prefix;

# This arrays will be useful later to convert ordinary letters into final,
# and vice-versa.
my %fin = ('כ'=>'ך', 'מ'=>'ם', 'נ'=>'ן', 'פ'=>'ף', 'צ'=>'ץ');
my %nif = ('ך'=>'כ', 'ם'=>'מ', 'ן'=>'נ', 'ף'=>'פ', 'ץ'=>'צ');

sub outword {
  my $word = shift;
  my $details = shift;

  # "*" sign used to signify non-existant word that should not be output.
  # It will allow us to more-easily drop words without huge if()s.
  return if $word =~ m/^\*/;

  # change otiot-sofiot in the middle of the word
  # (the silly a-z was added for our special "y" and "w" marks).
  # (the ('?) and $2 are for סנדוויץ', סנדוויצ'ים)
  $word =~ s/([ךםןףץ])('?)(?=[א-תa-z])/$nif{$1}$2/go;

  # change special consonant marks into the proper Hebrew letters, using
  # proper ktiv male rules.

  # Note that the order of these conversion is important. Since they have
  # the potential of changing so many words, it is highly recommended to
  # diff the output files before and after the change, to see that no
  # unexpected words got changed.

  # The vowel markers 'a' and 'e' do nothing except to a yud (chirik male) -
  # which turns it into a consonant yud; For example your(feminine) צי is
  # צייך (tsere in the yud, so it's a consonant and doubled) and
  # your(masculine) צי is ציך (yud is chirik male, and not doubled)
  $word =~ s/י[ea]/y/go;
  $word =~ s/[ea]//go;

  # The vowel 'i' is a chirik chaser - it should be followed by a yud if
  # necessary. We do nothing with it currently - it's only useful for words
  # like סנאiי where we want to make sure that wolig.pl does not think this
  # is the normal patach-aleph-yud (with no niqqud under the aleph) case as
  # in תנאי.
  # The first rule here is useful for transformation from שני to שנייה, via
  # שני adj-inword> שנiי feminine> שנiיaה outword> שנiyה outword> שנייה
  $word =~ s/iy/יי/go;  # useful in stuff like שנiי - שנייה
  $word =~ s/i//go;

  # Y is the same as y, except it is not translated to a double-yud (but rather
  # to a single yud) when it is the last letter of the word. It's used in words
  # like חולי in which the original form of the word has a chirik male, but in
  # all the inflections the yud from the chirik becomes a fully-fleged
  # consonant. We do not need a similar trick for vav (w), because the
  # Academia's rules do not do anything to a vav at the end of the word,
  # contrary to what happens to a yud.
  # I'm not sure this trick is "kosher" (based on the language), but it does
  # work...
  $word =~ s/Y($|(?=-))/י/go;  # Y's at the end of the word
  $word =~ s/Y/y/go;       # the rest of the Y's are converted to y's

  # The first conversion below implements the akademia's rule that a chirik
  # before a yו should not be written with a י. So we convert יyו into יו.
  # IDEA: to be more certain that the first י functions as a chirik, it would
  # have been better to use the i character: in addition to the יה -> yה rule
  # we have in the beginning of processing a word, we should do ייה -> iyה.
  # Then here the rule would convert iyו, not יyו. [but everything is working
  # well even without this idea]
  $word =~ s/יyו/יו/go;
  $word =~ s/(?<=[^ויy])y(?=[^ויyה]|$)/יי/go;
  $word =~ s/y/י/go;                      # otherwise, just one yud.

  # The first conversion below of וw to ו has an interesting story. In the
  # original Hebrew, the consonant ו sounded like the English w or Arabic
  # waw. An "u" sound (a kubuts, which we mark by ו) followed by this w
  # sound sounded like a long "u", which was later written with a shuruk,
  # i.e., one vav. This conversion is very useful for understanding how the
  # word שוק is inflected (see explanation in wolig.dat).
  $word =~ s/וw/ו/go;
  $word =~ s/(?<=[^וw])w(?=[^וw-])/וו/go;  # if vav needs to be doubled, do it
  $word =~ s/w/ו/go;                       # otherwise, just one vav.


  # A consonant ה (h) is always output as a ה. The only reason we are
  # interested in which ה is consonant is to allow the rules earlier to double
  # yud next to a consonant ה (i.e.. h), but not next to a em-kria ה.
  # For example, compare אריה (lion) and ארייה (her lion).
  $word =~ s/h/ה/go;

  if($detailed_output && defined($details)){
    $word =~ s/-$//;  # smichut is already known by the details...
    $word .= " ".$detail_prefix.$details;
  }
  print $word."\n";
}

sub inword {
  # For some constructs built of אהוי in end or beginnings of words, we can
  # immediately guess that these must be consonants (and not vowels) and make
  # use of that knowledge by changing the Hebrew letters into the markers
  # "w", "y" we use for consonants ו and י respectively.
  #
  # This function takes a word as inputted from wolig.dat, presumably written
  # in ktiv male, and makes a few predictions, such as that a vav in the
  # beginning of the word must be a consonant. Predictions that appear here
  # must have two traits:
  # 1. They must be useful for the correct inflection of some word.
  #    For example, realising that the וו at the end of מזווה is a consonant
  #    help us later avoid the false inflection מזווו and instead generate
  #    the correct מזוו.
  # 2. They must be correct in 100% of the cases. For example, a rule saying
  #    that every appearance of וו in the input is a consonant (w) is wrong,
  #    because of words like ציווי.
  #    However, the rules only have to "appear" correct (for all the actual
  #    words in wolig.dat), not necessarily be linguisticly correct. For
  #    example, we'll see below a rule that a ו at the end of a word is a
  #    consonant (w). This is indeed true for most nouns (צו, מקווקו), but not
  #    for אחו. However, all of אחו's inflections have a consonant vav, and in
  #    the word itself we don't really care about mislabeling it "consonant"
  #    because a vav at the end of the word isn't doubled anyway under the
  #    Academia's rules.
  #
  # Actually the second rule can be relaxed a bit if we provide alternative
  # ways to input a certain construct. For example, if "u" could signify a
  # vowel vav in the input, then we wouldn't really care if in a few rare cases
  # we wrongly decide a certain vav to be consonant: the user could override
  # this decision by putting a "u" explicitly, instead of the vav, in the
  # input file.

  my $word = shift;
  if(substr($word,0,1) eq "ו"){
    # A word cannot start with a shuruk or kubuts!
    substr($word,0,1)="w";
  }
  if(substr($word,-4,4) eq "וויה"){
    # A word like חוויה, הלוויה, טריוויה. I can't imagine any base noun (or
    # adjective) for which such a double-vav isn't a consonant but rather
    # a vav and shuruk.
    substr($word,-4,2)="w";
  }
  if(substr($word,-1,1) eq "ו"){
    # This vav is a consonant (see comment above about why the few exceptions
    # that do exist don't bother us).
    substr($word,-1,1)="w";
  } elsif(substr($word,-3,3) eq "ווה"){
    # If the word ends with ווה, the user wrote in ktiv male and intended
    # a consonant vav. Replace the וו by the character "w", which will be
    # doubled if necessary (for ktiv male) by outword. This change actually
    # makes a difference for the סגול_ה with ות cases: for example, the
    # word מקווה has a plural מקוות and his-possesive מקוו. Without this
    # change, we get the incorrect possesive מקווו and plural מקווות.
    # Similarly it is needed for the adjective נאווה's correct feminine plural.
    substr($word,-3,2)="w";
  } elsif(substr($word,-2,2) eq "יה"){
    substr($word,-2,1)="y";
    # TODO: maybe convert ייה (in ktiv male, e.g., סופגנייה) into iyה.
    # see outword above on a discussion about that. But everything also
    # works without this change.
  }
  return $word;
}

#############################################################################

my ($fh,$word,$optstring,%opts);

my $infile;
if($#ARGV < 0){
	$infile="wolig.dat";
} else {
	if($ARGV[0] eq "-d"){
		$detailed_output=!$detailed_output;
		shift @ARGV;
	}
	$infile=$ARGV[0];
}

$fh = new FileHandle $infile, "r"
  or croak "Couldn't open data file $infile for reading";
while(<$fh>){
  print if /^#\*/;        # print these comments.
  chomp;
  s/#.*$//o;              # comments start with '#'.
  next if /^[ 	]*$/o;	  # ignore blank lines.
  ($word,$optstring)=split;
  die "Type of word '".$word."' was not specified." if !defined($optstring);
  undef %opts;
  my $val;
  foreach $opt (split /,/o, $optstring){
    ($opt, $val) = (split /=/o, $opt);
    $val = 1 unless defined $val;
    $opts{$opt}=$val;
  }
  if($opts{"ע"}){
    ############################# noun ######################################
    # note that the noun may have several plural forms (see, for example,
    # אות). When one of the plural forms isn't explicitly specified, wolig
    # tries to guess, based on simplistic heuristics that work for the majority
    # of the nouns (84% of them, at one time I counted).
    my $plural_none = $opts{"יחיד"} || substr($word,-3,3) eq "יות";
    my $plural_bizarre = exists($opts{"רבים"});
    my $plural_implicit = !($opts{"ות"} || $opts{"ים"} || $opts{"יות"}
			   || $opts{"אות"} || $opts{"יים"} || $plural_none
			   || $plural_bizarre);
    my $plural_iot = $opts{"יות"} ||
      ($plural_implicit && (substr($word,-2,2) eq "ות"));
    my $plural_xot = $opts{"אות"};
    my $plural_ot = $opts{"ות"} ||
      ($plural_implicit && !$plural_iot && (substr($word,-1,1) eq "ה" || substr($word,-1,1) eq "ת" ));
    my $plural_im = $opts{"ים"} || ($plural_implicit && !$plural_ot && !$plural_iot);
    my $plural_iim = $opts{"יים"};

    # Find gender for detailed output. This has nothing to do with word
    # inflection, it's just an added value of wolig.pl...
    if($detailed_output){
      my $gender;
      if($opts{"זכר"}){
        if($opts{"נקבה"}){
   	  $gender="ז,נ";
	} else {
	  $gender="ז";
	}
      } elsif($opts{"נקבה"}){
        $gender="נ"
      } elsif($opts{"סגול_ה"}){
        $gender="ז";
      } elsif((substr($word,-1,1) eq "ה") && !$opts{"אבד_ו"}){
        $gender="נ";
      } elsif(substr($word,-1,1) eq "ת" && !$opts{"ים"}){
        $gender="נ";
      } else {
        $gender="ז";
      }
      $detail_prefix="$gender,";
    }

    # preprocess the word the user has given, converting certain ktiv male
    # constructs into markers (w, y) that we can better work with later (see
    # comments in inword() about what it does).
    $word=inword($word);

    # related singular noun forms
    if(exists $opts{"נפרד"}){
      outword $opts{"נפרד"}, "ע,יחיד";  # explicit override of the nifrad
    } elsif(!$opts{"אין_יחיד"}){
      outword $word, "ע,יחיד"; # the singular noun itself
    }
    if($opts{"אבד_י"}){
      # in words like עיפרון and היריון the first yud (coming from chirik
      # or tsere in ktiv male) is lost in all but the base word
      $word =~ s/י//o;
    }
    my $smichut=$word;
    if($opts{"אין_יחיד"} || $opts{"אין_נטיות_יחיד"}){
      # We mark the singular words with "*", telling outword to drop them.
      # This makes the code look cleaner than a huge if statement around all
      # the singular code. Maybe in the future we should move the singular
      # inflection code to a seperate function, if() only around that, and
      # stop all that "*" nonsense.
      $smichut="*".$smichut;
    }
    #my $smichut_orig=$smichut;
    if($opts{"מיוחד_אח"}){
      # special case:
      # אח, אב, חם, פה include an extra yod in the smichut. Note that in the
      # first person singular possessive, we should drop that extra yod.
      # For a "im" plural, it turns out to be the same inflections as the
      # plural - but this is not the case with a "ot" plural.
      # Interestingly, the yud in these inflections is always a chirik
      # male - it is never consonantal (never has a vowel on it).
      if(substr($smichut,-1,1) eq "ה"){
        # Remove the ה. Basically, only one word fits this case: פה
	$smichut=substr($smichut,0,-1);
	# And add the extra third-person masuline possesive (just like the
	# סגול_ה case, but we don't bother to check for the סגול_ה flag here).
	outword $smichut."יהו", "ע,יחיד,של/הוא";
      }
      outword $smichut."י-",  "ע,יחיד,סמיכות"; # smichut
      outword $smichut."י",   "ע,יחיד,של/אני"; # possessives (kinu'im)
      outword $smichut."ינו", "ע,יחיד,של/אנחנו";
      outword $smichut."יך",  "ע,יחיד,של/אתה";
      outword $smichut."יך",  "ע,יחיד,של/את";
      outword $smichut."יכם", "ע,יחיד,של/אתם";
      outword $smichut."יכן", "ע,יחיד,של/אתן";
      outword $smichut."יו",  "ע,יחיד,של/הוא";
      outword $smichut."יה",  "ע,יחיד,של/היא";
      outword $smichut."יהן", "ע,יחיד,של/הן";
      outword $smichut."יהם", "ע,יחיד,של/הם";
    } else {
      if(!$opts{"סגול_ה"}){ # replace final ה by ת, unless סגול_ה option
        if(substr($smichut,-1,1) eq "ה" && !$opts{"סגול_ה"}){
          substr($smichut,-1,1)="ת";
        }
      }
      if(exists($opts{"נסמך"})){
        outword $opts{"נסמך"}."-", "ע,יחיד,סמיכות";
      } else {
        outword $smichut."-", "ע,יחיד,סמיכות"; # smichut
      }
      if($opts{"מיוחד_שן"}){
      	# academia's ktiv male rules indicate that the inflections of שן
	# (at least the plural is explicitly mentioned...) should get an
	# extra yud - to make it easy to distinguish from the number שניים.
	substr($smichut,0,-1)=substr($smichut,0,-1).'י';
	substr($word,0,-1)=substr($word,0,-1).'י';
      }
      if(substr($word,-2,2) eq "אי" && length($word)>2){
      	# in words ending with patach and then the imot kria aleph yud,
	# such as תנאי and גבאי, all the inflections (beside the base word
	# and the smichut) are as if the yud wasn't there.
	# Note that words ending with אי but not patach, like אי and סנאי,
	# should not get this treatment, so there should be an option to turn
	# it off.
	substr($word,-1,1)="";
	substr($smichut,-1,1)="";
      }
      # Note that the extra vowel markers, 'a' and 'e' are added for mele'im
      # ending with yud (e.g., אי) - this vowel attaches to the yud and makes
      # the yud a consonant. This phenomenon is handled in outword.
      my $no_ah=0;
      if($opts{"סגול_ה"}){
      	# the ה is dropped from the singular inflections, except one alternate
	# inflection like מורהו (the long form of מורו):
	# (there's another femenine inflection, מורה with kamats on the he,
	# but this is spelled the same (as מורה with mapik) without niqqud so
	# we don't need to print it again).
	if(substr($smichut,-1,1) eq "ה"){
	  $smichut=substr($smichut,0,-1);
	}
        outword $smichut."ehו", "ע,יחיד,של/הוא";
	# TODO: maybe add the "eha" inflection? But it won't generate anything
	# different from the ah below...
        #outword $smichut."eha" unless $no_ah;
      }
      outword $smichut."י",   "ע,יחיד,של/אני"; # possessives (kinu'im)
      outword $smichut."eנו", "ע,יחיד,של/אנחנו";
      outword $smichut."ך",   "ע,יחיד,של/אתה";
      outword $smichut."eך",  "ע,יחיד,של/את";
      outword $smichut."כם",  "ע,יחיד,של/אתם";
      outword $smichut."כן",  "ע,יחיד,של/אתן";
      outword $smichut."ו",   "ע,יחיד,של/הוא";
      outword $smichut."ah",  "ע,יחיד,של/היא";
      outword $smichut."aן",  "ע,יחיד,של/הן";
      outword $smichut."aם",  "ע,יחיד,של/הם";
    }
    # related plural noun forms
    # note: don't combine the $plural_.. ifs, nor use elsif, because some
    # nouns have more than one plural forms.
    if($plural_im){
      my $xword=$word;
      if(substr($xword,-1,1) eq "ה" && !$opts{"שמור_ת"}){
	# remove final "he" (not "tav", unlike the "ot" pluralization below)
	# before adding the "im" pluralization, unless the שמור_ת option was
	# given.
	$xword=substr($xword,0,-1);
      }
      my $xword_orig=$xword;
      if($opts{"אבד_ו"}){
	# when the אבד_ו flag was given,we remove the first "em kri'a" from
	# the word in most of the inflections. (see a discussion of this
	# option in wolig.dat).
	$xword =~ s/ו//o;
      }
      outword $xword."ים", "ע,רבים";
      $smichut=$xword;
      my $smichut_orig=$xword_orig;
      outword $smichut_orig."י-", "ע,רבים,סמיכות"; # smichut
      # (We write patach followed by a consonant yud as "y", and later this will
      # give us the chance to automatically double it as necessary by the
      # Academia's ktiv male rules)
      outword $smichut."y",        "ע,רבים,של/אני"; # possessives (kinu'im)
      outword $smichut."ינו",      "ע,רבים,של/אנחנו";
      outword $smichut."יך",       "ע,רבים,של/אתה";
      outword $smichut."yך",       "ע,רבים,של/את";
      outword $smichut_orig."יכם", "ע,רבים,של/אתם";
      outword $smichut_orig."יכן", "ע,רבים,של/אתן";
      outword $smichut."יו",       "ע,רבים,של/הוא";
      outword $smichut."יה",       "ע,רבים,של/היא";
      outword $smichut_orig."יהן", "ע,רבים,של/הן";
      outword $smichut_orig."יהם", "ע,רבים,של/הם";
    }
    if($plural_iim){
      # I currently decided that in Hebrew, unlike Arabic, only specific
      # nouns can get the iim (zugi) pluralization, and most nouns can't,
      # e.g., חתוליים isn't correct (for "two cats") despite a story called
      # מעשה בחתוליים. This is why this is an option, and not the default.
      my $xword=$word;
      if(substr($xword,-1,1) eq "ה"){
	# Change final he into tav before adding the "iim" pluralization.
	$xword=substr($xword,0,-1)."ת";
      }
      my $xword_orig=$xword;
      outword $xword."yם", "ע,רבים";
      $smichut=$xword;
      my $smichut_orig=$xword_orig;
      outword $smichut_orig."י-", "ע,רבים,סמיכות"; # smichut
      outword $smichut."y",        "ע,רבים,של/אני"; # possessives (kinu'im)
      outword $smichut."ינו",      "ע,רבים,של/אנחנו";
      outword $smichut."יך",       "ע,רבים,של/אתה";
      outword $smichut."yך",       "ע,רבים,של/את";
      outword $smichut_orig."יכם", "ע,רבים,של/אתם";
      outword $smichut_orig."יכן", "ע,רבים,של/אתן";
      outword $smichut."יו",       "ע,רבים,של/הוא";
      outword $smichut."יה",       "ע,רבים,של/היא";
      outword $smichut_orig."יהן", "ע,רבים,של/הן";
      outword $smichut_orig."יהם", "ע,רבים,של/הם";
    }
    if($plural_ot){
      my $xword=$word;
      if(substr($xword,-1,1) eq "ה" || substr($xword,-1,1) eq "ת"){
	# remove final "he" or "tav" before adding the "ot" pluralization,
	# unless the שמור_ת option was given.
	if(!$opts{"שמור_ת"}){
	  $xword=substr($xword,0,-1);
	}
      }
      if($opts{"אבד_ו"}){
      	# In segoliim with cholam chaser chat that inflect like feminines
	# (i.e., the plural_ot case), the cholam is lost *only* in the base
	# plural, not in other plural inflection. This is comparable to the
	# inflections of the word מלכה, where the patach is lost only in the
	# base plural.
	# See for example גורן, דופן.
	my $tmp = $xword;
	$tmp =~ s/ו//o;
      	outword $tmp."ות",    "ע,רבים";
      } else {
        outword $xword."ות",  "ע,רבים";
      }
      
      $smichut=$xword."ות";
      outword $smichut."-",   "ע,רבים,סמיכות"; # smichut
      outword $smichut."y",   "ע,רבים,של/אני"; # possessives (kinu'im)
      outword $smichut."ינו", "ע,רבים,של/אנחנו";
      outword $smichut."יך",  "ע,רבים,של/אתה";
      outword $smichut."yך",  "ע,רבים,של/את";
      outword $smichut."יכם", "ע,רבים,של/אתם";
      outword $smichut."יכן", "ע,רבים,של/אתן";
      outword $smichut."יו",  "ע,רבים,של/הוא";
      outword $smichut."יה",  "ע,רבים,של/היא";
      outword $smichut."יהן", "ע,רבים,של/הן";
      outword $smichut."יהם", "ע,רבים,של/הם";
    }
    if($plural_iot){
      my $xword=$word;
      if(substr($xword,-1,1) eq "ה" || substr($xword,-1,1) eq "ת"){
	# remove final "he" or "tav" before adding the "iot" pluralization,
	# unless the שמור_ת option was given.
	if(!$opts{"שמור_ת"}){
	  $xword=substr($xword,0,-1);
	}
      }
      outword $xword."יות",   "ע,רבים";
      $smichut=$xword."יות";
      outword $smichut."-",   "ע,רבים,סמיכות"; # smichut
      outword $smichut."y",   "ע,רבים,של/אני"; # possessives (kinu'im)
      outword $smichut."ינו", "ע,רבים,של/אנחנו";
      outword $smichut."יך",  "ע,רבים,של/אתה";
      outword $smichut."yך",  "ע,רבים,של/את";
      outword $smichut."יכם", "ע,רבים,של/אתם";
      outword $smichut."יכן", "ע,רבים,של/אתן";
      outword $smichut."יו",  "ע,רבים,של/הוא";
      outword $smichut."יה",  "ע,רבים,של/היא";
      outword $smichut."יהן", "ע,רבים,של/הן";
      outword $smichut."יהם", "ע,רבים,של/הם";
    }
    if($plural_xot){
      my $xword=$word;
      if(substr($xword,-1,1) eq "ה" || substr($xword,-1,1) eq "ת"){
	# remove final "he" or "tav" before adding the "xot" pluralization,
	# unless the שמור_ת option was given.
	if(!$opts{"שמור_ת"}){
	  $xword=substr($xword,0,-1);
	}
      }
      outword $xword."אות",   "ע,רבים";
      $smichut=$xword."אות";
      outword $smichut."-",   "ע,רבים,סמיכות"; # smichut
      outword $smichut."y",   "ע,רבים,של/אני"; # possessives (kinu'im)
      outword $smichut."ינו", "ע,רבים,של/אנחנו";
      outword $smichut."יך",  "ע,רבים,של/אתה";
      outword $smichut."yך",  "ע,רבים,של/את";
      outword $smichut."יכם", "ע,רבים,של/אתם";
      outword $smichut."יכן", "ע,רבים,של/אתן";
      outword $smichut."יו",  "ע,רבים,של/הוא";
      outword $smichut."יה",  "ע,רבים,של/היא";
      outword $smichut."יהן", "ע,רבים,של/הן";
      outword $smichut."יהם", "ע,רבים,של/הם";
    }
    if($plural_bizarre){
      # User specified plural for bizarre cases; For example, the plural of
      # צל is צללים, the plural of בת is בנות.
      # We take the fully formed plural from the user, and may need to take
      # of the ending to guess the smichut and possesives (letting the user
      # override the smichut forms too).
      my $plural=$opts{"רבים"};
      #outword $plural, "ע,רבים";
      outword((exists($opts{"נפרדים"}) ? $opts{"נפרדים"} : $plural), "ע,רבים");
      # Overriding the plural nishmach with the נסמכים option: David Yalin,
      # In his book דקדוק הלשון העברית (1942) explains in page 207 how some
      # of the kinuyim are known as "kinuyey hanifrad" and some "kinuyey
      # hanishmach" because when the nismach and nifrad differ, they follow
      # different ones. This is important for words like תיש, and in fact
      # the אבד_ו option does basically the same thing.
      my $smichut_orig;
      if(substr($plural,-2,2) eq "ות"){
	$smichut_orig= exists($opts{"נסמכים"}) ? $opts{"נסמכים"} : $plural;
	# as David Yalin explains (ibid.): "צריך להעיר כי בשמות שסימן הריבוי
	# שלהם הוא -ות נוטים כל כינויי הרבים אחרי צורת הסמיכות".
        $smichut=$smichut_orig;
        outword $smichut_orig."-", "ע,רבים,סמיכות"; # smichut
      } elsif(substr($plural,-2,2) eq "ים" || substr($plural,-2,2) eq "ין"){
        $smichut=substr($plural,0,-2);
	# the removal of the final yod from נסמכים is a bit silly... maybe
	# we should have had a מקור_נסמכים option and ask it without yod.
	$smichut_orig= exists($opts{"נסמכים"}) ?
		substr($opts{"נסמכים"},0,-1) : $smichut;
        outword $smichut_orig."י-", "ע,רבים,סמיכות"; # smichut
      } else {
        die "Plural given for $word is of unrecognized form: $plural.";
      }
      outword $smichut."y",        "ע,רבים,של/אני"; # possessives (kinu'im)
      outword $smichut."ינו",      "ע,רבים,של/אנחנו";
      outword $smichut."יך",       "ע,רבים,של/אתה";
      outword $smichut."yך",       "ע,רבים,של/את";
      outword $smichut_orig."יכם", "ע,רבים,של/אתם";
      outword $smichut_orig."יכן", "ע,רבים,של/אתן";
      outword $smichut."יו",       "ע,רבים,של/הוא";
      outword $smichut."יה",       "ע,רבים,של/היא";
      outword $smichut_orig."יהן", "ע,רבים,של/הן";
      outword $smichut_orig."יהם", "ע,רבים,של/הם";
    }
  } elsif($opts{"ת"}){
    ############################# adjective ##################################
    $detail_prefix="";
    # preprocess the word the user has given, converting certain ktiv male
    # constructs into markers (w, y) that we can better work with later (see
    # comments in inword() about what it does).
    $word=inword($word);
    # A preprocessing rule special for adjectives: a final yud will always be
    # a chirik male, not some sort of consonant yud or another vowel. Together
    # with the iy post-transformation in outword, this makes שני - שנייה work
    # correctly. However, when the word ends with וי (and not ווי) we assume
    # this is shuruk followed by a consonant yud (for example, מצוי). In
    # words that do end in ווי and the וו is not a consonant we must use a
    # w explictly, (e.g. רווי should be written explictly as רwוי).
    if($word =~ m/([^aeiו]|וו)י$/o){
      substr($word,-1,1) = "iי";
    }

    my $xword=$word;
    if(substr($xword,-1,1) eq "ה"){
      # remove final "he" before adding the pluralization,
      # unless the שמור_ה option was given.
      if(!$opts{"שמור_ה"}){
	$xword=substr($xword,0,-1);
      }
    }

    if($opts{"עם"}){
      # For nationality adjectives (always adding in yud!), there is a seperate
      # plural for the people of that nationality (rather than other objects
      # from that country), with only ם added. There's also a country name,
      # and sometimes a female-person form too (נקבה_ה). We these here,
      # instead of seperately in extrawords, so that the country list can be
      # organized nicely at one place.
      if(exists($opts{"ארץ"})){
        outword $opts{"ארץ"}, "ע,פרטי,נ" if($opts{"ארץ"} ne "") # country name
      } elsif(substr($word,-3,3) eq "אiי"){
        outword substr($word,0,-3)."ה", "ע,פרטי,נ";  # country name
      } else {
        $country = $word;
        $country =~ s/i?י$//;
	$country =~ s/([כמנפצ])$/$fin{$1}/;
        outword $country, "ע,פרטי,נ"; # country name
      }
      outword $word."ם", "ע,רבים,ז"; # plural (people of that nationality)
      $opts{"נקבה_ת"}=1; # for enabling ת plural. adding ה plural is optional.
    }

    if(!exists($opts{"יחיד"})){
      outword $word,     "ת,יחיד,ז"; # masculin, singular
      outword $word."-", "ת,יחיד,ז,סמיכות"; # smichut (same as nifrad)
    } else {
      outword $opts{"יחיד"},     "ת,יחיד,ז"; # masculin, singular
      outword $opts{"יחיד"}."-", "ת,יחיד,ז,סמיכות"; # smichut (same as nifrad)
    }
    if($opts{"ם"}){
      # special case for adjectives like רשאי. Unlike the noun case where we
      # turn this option automatically for words ending with אי, here such a
      # default would not be useful because a lot of nouns ending with ה or א
      # correspond to adjectives ending with אי that this rule doesn't fit.
      outword $xword."ם",  "ת,רבים,ז"; # masculin, plural
      outword $xword."-",  "ת,רבים,ז,סמיכות"; # smichut
    } else {
      outword $xword."ים", "ת,רבים,ז"; # masculin, plural
      outword $xword."י-", "ת,רבים,ז,סמיכות"; # smichut
    }
    # feminine, singular:
    if($opts{"נקבה_ית"}){
      # This is an ad-hoc treatment of the nekeva_it option, which cannot be
      # combined with others because we will only have one plural form...
      $xword=$xword."י";
      $opts{"נקבה_ת"}=1;
    }
    my $nekeva_implicit = !($opts{"נקבה_ת"} || $opts{"נקבה_ה"} ||
    			    $opts{"יחידה"});
    my $nekeva_t = $opts{"נקבה_ת"} ||
    		   ($nekeva_implicit && substr($xword,-1,1) eq "י");
    my $nekeva_h = $opts{"נקבה_ה"} ||
    		   ($nekeva_implicit && !$nekeva_t);
    if(exists($opts{"יחידה"})){
      my $yechida=$opts{"יחידה"};
      outword $yechida,     "ת,יחיד,נ";
      $yechida =~ s/ה$/ת/ if(!$opts{"שמור_ה"});
      outword $yechida."-", "ת,יחיד,נ,סמיכות";
    }
    if($nekeva_t){
      if(substr($word,-1,1) eq "ה" && !$opts{"שמור_ה"}){
        # This is a rare case, where an adjective ending with ה gets a ת
	# feminine form, and an extra yud needs to be added. For example
	# מופלה, מופלית.
        outword $xword."ית",  "ת,יחיד,נ";
        outword $xword."ית-", "ת,יחיד,נ,סמיכות"; # smichut (same as nifrad)
      } else {
        # note: we don't bother adding the vowel "e" before the ת because that
        # would only make a difference before a yud - and interestingly when
        # there *is* a yud, the vowel is dropped anyway!
        outword $xword."ת",   "ת,יחיד,נ";
        outword $xword."ת-",  "ת,יחיד,נ,סמיכות"; # smichut (same as nifrad)
      }
    }
    if($nekeva_h){
      outword $xword."aה",  "ת,יחיד,נ";
      outword $xword."aת-", "ת,יחיד,נ,סמיכות"; # smichut
    }
    outword $xword."ות",  "ת,רבים,נ"; # feminine, plural
    outword $xword."ות-", "ת,רבים,נ,סמיכות"; # smichut (same as nifrad)
  } else {
    die "word '".$word."' was not specified as noun, adjective or verb.";
  }
  outword "-------"
}
syntax highlighted by Code2HTML, v. 0.9.1