############################################################################ # # Name: qur2rtv.icn # # Title: qur2rtv (Quran -> retrieve format converter) # # Author: Richard L. Goerwitz # # Version: 1.2 # ############################################################################ # # Program for converting the internet-accessible scan of M. H. # Shakir's Quran translation into retrieve format. Reads standard # input. Writes reformatted text to standard output. Assumes the # sections will come in order (1 before 2; 2 before 3, etc.), but # that they will all be directed into the same input stream. # Naturally, it does not matter whether they have been concatenated # into one, or remain split into several, files. # ############################################################################ # # Links: none # ############################################################################ procedure main() local line, verse # in case this ever gets encapsulated static section, last_verse, text, skipped, extra_text, seenit initial { last_verse := 1000 section := 0 extra_text := "" skipped := 1 } # While you can read lines from stdin... while line := trim(read(),'\t \x0D\x1A') # trim CR, tab, sp, ^Z do { # ...scan them for text numbers, and output these in retrieve # format, along with corresponding text. line ? { # Housekeeping. if pos(0) then { skipped := 1 # note that the last line was blank next # skip past empty lines } tab(many('\t ')) # tab past whitespace (if present) # Two cases where extra text has been tacked onto a file # and has to be stripped out. ="THE SPIDER" & { until read(&input) ? (tab(match("\x1A" | "with")), pos(0)) next } if section = 65 & verse = 12 & /seenit & { ="In the Name of Allah, the Beneficent, the Merciful." } then { until read(&input) ? tab(match("\x1A" | "and she")) seenit := 1 next } # More housekeeping (the text is rife with errors). (=". ", match("2")) ="/ " # If the next line begins with a numerical reference, then # write out the text of the preceding text (if in fact # there *was* a preceding text block). Finally, write out the # section/text reference (in retrieve format). if \skipped | any('.?:', \text, -1) & verse := is_it_a_verse() then { write(\text) if -1 <= verse < 2 then { section +:= 1 # # For debugging purposes. # write(&errout, "resetting; text = \n", \text) # write(&errout, "section now = ", section) # write(&errout, "last_verse = ", last_verse) # write(&errout, "verse = ", verse) } else if verse ~= (last_verse+1) then { if verse = (last_verse+2) then write(&errout, "LF missing, ",section,":",last_verse) else if not (verse := map(verse, "1", "7")= (last_verse+1)) then if verse = 34 & last_verse = 35 then verse := 36 else if verse = 6 & last_verse = 3 & section = 47 then { write(&errout,"extra text, ",section,":",last_verse) until trim(read(&input)) == "" next } else if section = 43 & verse = 29 & last_verse = 30 then { find("disbelievers in it", !&input) | stop("parsing error; get help") next } else stop("error, ",section,":",last_verse,"\n",text) } last_verse := verse write("::", section, ":", verse) tab(many(' \t')) text := extra_text || " " || tab(0) extra_text := "" } else { # Dump the (rest of) the line onto text. if /skipped & (extra_text == "") then text ||:= " " || tab(0) else { # if we've had a blank line in this text block, but # no verse number, then concatenate it with any other # text we have after the last blank line extra_text ||:= " " || tab(0) } } } skipped := &null } # Flush the "text" buffer. \text ||:= " " || \extra_text write(\text) exit(0) # or fail end # # From strings.icn in the IPL (written by Ralph Griswold). # procedure REplace(s1,s2,s3) local result, i result := "" i := *s2 s1 ? { while result ||:= tab(find(s2)) do { result ||:= s3 move(i) } return result || tab(0) } end procedure is_it_a_verse() local tmp # # Can the first bit of text in &subject possible be construed as a # verse reference (with typos)? Let's see. # # I've seen "I 1." for 11. return (="I 1. ", 11) | # I've seen "I." or "l." for "1." (tab(any('lI')), =".", 1) | # I've even seen "S." for "5." (="S", =".", 5) | 1(tab(many(&digits)), tab(any('.: '))) | { # If it's none of the above, then as long as it's over two chars, # try lots of mapping. If I took away the restriction that the # sequence be less than two chars, I'd get lines which begin with # the word "I" (as in "I am"). No go. (tab(find(" ")+1) || tab(upto(&ucase))) \ 1 ? { *(tmp := 1(tab(many(&digits++'IOlS')), tab(any(':., ')))) > 1 & integer(map(tmp, "IOlS", "1015")) } } end