############################################################################
#
#	Name:	 qur2rtv.icn
#
#	Title:	 qur2rtv  (Quran -> retrieve format converter)
#
#	Author:	 Richard L. Goerwitz
#
#	Version: 1.2
#
############################################################################
#
#  Program for converting the internet-accessible scan of M. H.
#  Shakir's Quran translation into retrieve format.  Reads standard
#  input.  Writes reformatted text to standard output.  Assumes the
#  sections will come in order (1 before 2; 2 before 3, etc.), but
#  that they will all be directed into the same input stream.
#  Naturally, it does not matter whether they have been concatenated
#  into one, or remain split into several, files.
#
############################################################################
#
#  Links: none
#
############################################################################


procedure main()

    local line, verse
    # in case this ever gets encapsulated
    static section, last_verse, text, skipped, extra_text, seenit
    initial {
	last_verse := 1000
	section := 0
	extra_text := ""
	skipped := 1
    }

    # While you can read lines from stdin...
    while line := trim(read(),'\t \x0D\x1A')	# trim CR, tab, sp, ^Z
    do {

	# ...scan them for text numbers, and output these in retrieve
	# format, along with corresponding text.
	line ? {

	    # Housekeeping.
	    if pos(0) then {
		skipped := 1	# note that the last line was blank
		next		# skip past empty lines
	    }
	    tab(many('\t '))    # tab past whitespace (if present)

	    # Two cases where extra text has been tacked onto a file
	    # and has to be stripped out.
	    ="THE SPIDER" & {
		until read(&input) ? (tab(match("\x1A" | "with")), pos(0))
		next
	    }
	    if section = 65 & verse = 12 & /seenit & {
		="In the Name of Allah, the Beneficent, the Merciful."
	    }
	    then {
		until read(&input) ? tab(match("\x1A" | "and she"))
		seenit := 1
		next
	    }
	    # More housekeeping (the text is rife with errors).
	    (=". ", match("2"))
	    ="/ "

	    # If the next line begins with a numerical reference, then
	    # write out the text of the preceding text (if in fact
	    # there *was* a preceding text block).  Finally, write out the
	    # section/text reference (in retrieve format).

	    if \skipped | any('.?:', \text, -1) &
		verse := is_it_a_verse()
	    then {
		write(\text)
		if -1 <= verse < 2 then {
		    section +:= 1
#		    # For debugging purposes.
#		    write(&errout, "resetting; text = \n", \text)
#		    write(&errout, "section now = ", section)
#		    write(&errout, "last_verse = ", last_verse)
#		    write(&errout, "verse = ", verse)
		}
		else if verse ~= (last_verse+1) then {
		    if verse = (last_verse+2) then
			write(&errout, "LF missing, ",section,":",last_verse)
		    else if not (verse := map(verse, "1", "7")= (last_verse+1))
		    then if verse = 34 & last_verse = 35
		    then verse := 36
		    else if verse = 6 & last_verse = 3 & section = 47
		    then {
			write(&errout,"extra text, ",section,":",last_verse)
			until trim(read(&input)) == ""
			next
		    } else if section = 43 & verse = 29 & last_verse = 30
		    then {
			find("disbelievers in it", !&input) |
			    stop("parsing error; get help")
			next
		    }
		    else stop("error, ",section,":",last_verse,"\n",text)
		}
		last_verse := verse
		write("::", section, ":", verse)
		tab(many(' \t'))
		text := extra_text || " " || tab(0)
		extra_text := ""
	    } else {
		# Dump the (rest of) the line onto text.
		if /skipped & (extra_text == "") then
		    text ||:= " " || tab(0)
		else {
		    # if we've had a blank line in this text block, but
		    # no verse number, then concatenate it with any other
		    # text we have after the last blank line
		    extra_text ||:= " " || tab(0)
		}
	    }
	}
	skipped := &null
    }
    # Flush the "text" buffer.
    \text ||:= " " || \extra_text
    write(\text)

    exit(0)
    # or fail

end


#
# From strings.icn in the IPL (written by Ralph Griswold).
#
procedure REplace(s1,s2,s3)

    local result, i
    result := ""
    i := *s2

    s1 ? {
	while result ||:= tab(find(s2)) do {
	    result ||:= s3
	    move(i)
	}
	return result || tab(0)
    }

end


procedure is_it_a_verse()

    local tmp

    #
    # Can the first bit of text in &subject possible be construed as a
    # verse reference (with typos)?  Let's see.
    #

    # I've seen "I 1." for 11.
    return (="I 1. ", 11) |
	# I've seen "I." or "l." for "1."
	(tab(any('lI')), =".", 1) |
	# I've even seen "S." for "5."
	(="S", =".", 5) |
	1(tab(many(&digits)), tab(any('.: '))) | {
	# If it's none of the above, then as long as it's over two chars,
	# try lots of mapping.  If I took away the restriction that the
	# sequence be less than two chars, I'd get lines which begin with
	# the word "I" (as in "I am").  No go.
	    (tab(find(" ")+1) || tab(upto(&ucase))) \ 1 ? {
		*(tmp := 1(tab(many(&digits++'IOlS')), tab(any(':., ')))) > 1 &
		    integer(map(tmp, "IOlS", "1015"))
	    }
	}
    
end