Newsgroups: alt.sources
From: goer@ellis.uchicago.edu (Richard L. Goerwitz)
Subject: kjv browser, part 8 of 11
Message-ID: <1991Jul3.065222.28343@midway.uchicago.edu>
Date: Wed, 3 Jul 1991 06:52:22 GMT

---- Cut Here and feed the following to sh ----
#!/bin/sh
# this is bibleref.08 (part 8 of a multipart archive)
# do not concatenate these parts, unpack them in order with /bin/sh
# file findre.icn continued
#
if test ! -r _shar_seq_.tmp; then
	echo 'Please unpack part 1 first!'
	exit 1
fi
(read Scheck
 if test "$Scheck" != 8; then
	echo Please unpack part "$Scheck" next!
	exit 1
 else
	exit 0
 fi
) < _shar_seq_.tmp || exit 1
if test ! -f _shar_wnt_.tmp; then
	echo 'x - still skipping findre.icn'
else
echo 'x - continuing file findre.icn'
sed 's/^X//' << 'SHAR_EOF' >> 'findre.icn' &&
X			tmp := tab(many('*?+')) | &null
X			if upto('*?',\tmp)
X			then put(token_list,-ord("*"))
X			else put(token_list,-ord("+"))
X		    }
X		    "?"    : {
X			tmp := tab(many('*?+')) | &null
X			if upto('*+',\tmp)
X			then put(token_list,-ord("*"))
X			else put(token_list,-ord("?"))
X		    }
X		    "("    : {
X			tab(many('*+?'))
X			put(token_list,-ord("("))
X		    }
X		    default: {
X			put(token_list,-ord(chr))
X		    }
X		}
X	    }
X	    else {
X		case chr of {
X		    # More egrep compatibility stuff.
X		    "["    : {
X			b_loc := find("[") | *&subject+1
X			every next_one := find("]",,,b_loc)
X			\next_one ~= &pos | err_out(s,2,chr)
X			put(token_list,-ord(chr))
X		    }
X                    "]"    : {
X			if &pos = (\next_one+1)
X			then put(token_list,-ord(chr)) &
X			     next_one := &null
X			else put(token_list,ord(chr))
X		    }
X		    default: put(token_list,ord(chr))
X		}
X	    }
X	}
X    }
X
X    token_list := UnMetaBrackets(token_list)
X
X    fixed_length_token_list := list(*token_list)
X    every i := 1 to *token_list
X    do fixed_length_token_list[i] := token_list[i]
X    return fixed_length_token_list
X
Xend
X
X
X
Xprocedure UnMetaBrackets(l)
X
X    # Since brackets delineate a cset, it doesn't make
X    # any sense to have metacharacters inside of them.
X    # UnMetaBrackets makes sure there are no metacharac-
X    # ters inside of the braces.
X
X    local tmplst, i, Lb, Rb
X
X    tmplst := list(); i := 0
X    Lb := -ord("[")
X    Rb := -ord("]")
X
X    while (i +:= 1) <= *l do {
X	if l[i] = Lb then {
X	    put(tmplst,l[i])
X	    until l[i +:= 1] = Rb
X	    do put(tmplst,abs(l[i]))
X	    put(tmplst,l[i])
X	}
X	else put(tmplst,l[i])
X    }
X    return tmplst
X
Xend
X
X
X
Xprocedure MakeFSTN(l,INI,FIN)
X
X    # MakeFSTN recursively descends through the tree structure
X    # implied by the tokenized string, l, recording in (global)
X    # fstn_table a list of operations to be performed, and the
X    # initial and final states which apply to them.
X
X    local i, inter, inter2, tmp, Op, Arg
X    static Lp, Rp, Sl, Lb, Rb, Caret_inside, Dot, Dollar, Caret_outside
X    # global biggest_nonmeta_str, slash_present, parends_present
X    initial {
X	Lp := -ord("("); Rp := -ord(")")
X	Sl := -ord("|")
X	Lb := -ord("["); Rb := -ord("]"); Caret_inside := ord("^")
X	Dot := -ord("."); Dollar := -ord("$"); Caret_outside := -ord("^")
X    }
X
X    /INI := 1 & state_table := table() &
X    NextState("new") & biggest_nonmeta_str := ""
X    /FIN := 0
X
X    # I haven't bothered to test for empty lists everywhere.
X    if *l = 0 then {
X	/state_table[INI] := []
X	put(state_table[INI],o_a_s(zSucceed,&null,FIN))
X	return
X    }
X
X    # HUNT DOWN THE SLASH (ALTERNATION OPERATOR)
X    every i := 1 to *l do {
X	if l[i] = Sl & tab_bal(l,Lp,Rp) = i then {
X	    if i = 1 then err_out(l,2,char(abs(l[i]))) else {
X		/slash_present := "yes"
X		inter := NextState()
X		inter2:= NextState()
X		MakeFSTN(l[1:i],inter2,FIN)
X		MakeFSTN(l[i+1:0],inter,FIN)
X		/state_table[INI] := []
X		put(state_table[INI],o_a_s(apply_FSTN,inter2,0))
X		put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X		return
X	    }
X	}
X    }
X
X    # HUNT DOWN PARENTHESES
X    if l[1] = Lp then {
X	i := tab_bal(l,Lp,Rp) | err_out(l,2,"(")
X	inter := NextState()
X	if any('*+?',char(abs(0 > l[i+1]))) then {
X	    case l[i+1] of {
X		-ord("*")   : {
X		    /state_table[INI] := []
X		    put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X		    MakeFSTN(l[2:i],INI,INI)
X		    MakeFSTN(l[i+2:0],inter,FIN)
X		    return
X		}
X		-ord("+")   : {
X		    inter2 := NextState()
X		    /state_table[inter2] := []
X		    MakeFSTN(l[2:i],INI,inter2)
X		    put(state_table[inter2],o_a_s(apply_FSTN,inter,0))
X		    MakeFSTN(l[2:i],inter2,inter2)
X		    MakeFSTN(l[i+2:0],inter,FIN)
X		    return
X		}
X		-ord("?")   : {
X		    /state_table[INI] := []
X		    put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X		    MakeFSTN(l[2:i],INI,inter)
X		    MakeFSTN(l[i+2:0],inter,FIN)
X		    return
X		}
X	    }
X	}
X	else {
X	    MakeFSTN(l[2:i],INI,inter)
X	    MakeFSTN(l[i+1:0],inter,FIN)
X	    return
X	}
X    }
X    else {     # I.E. l[1] NOT = Lp (left parenthesis as -ord("("))
X	every i := 1 to *l do {
X	    case l[i] of {
X		Lp     : {
X		    inter := NextState()
X		    MakeFSTN(l[1:i],INI,inter)
X		    /parends_present := "yes"
X		    MakeFSTN(l[i:0],inter,FIN)
X		    return
X		}
X		Rp     : err_out(l,2,")")
X	    }
X	}
X    }
X
X    # NOW, HUNT DOWN BRACKETS
X    if l[1] = Lb then {
X	i := tab_bal(l,Lb,Rb) | err_out(l,2,"[")
X	inter := NextState()
X	tmp := ""; every tmp ||:= char(l[2 to i-1])
X	if Caret_inside = l[2]
X	then tmp := ~cset(Expand(tmp[2:0]))
X	else tmp :=  cset(Expand(tmp))
X	if any('*+?',char(abs(0 > l[i+1]))) then {
X	    case l[i+1] of {
X		-ord("*")   : {
X		    /state_table[INI] := []
X		    put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X		    put(state_table[INI],o_a_s(any,tmp,INI))
X		    MakeFSTN(l[i+2:0],inter,FIN)
X		    return
X		}
X		-ord("+")   : {
X		    inter2 := NextState()
X		    /state_table[INI] := []
X		    put(state_table[INI],o_a_s(any,tmp,inter2))
X		    /state_table[inter2] := []
X		    put(state_table[inter2],o_a_s(apply_FSTN,inter,0))
X		    put(state_table[inter2],o_a_s(any,tmp,inter2))
X		    MakeFSTN(l[i+2:0],inter,FIN)
X		    return
X		}
X		-ord("?")   : {
X		    /state_table[INI] := []
X		    put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X		    put(state_table[INI],o_a_s(any,tmp,inter))
X		    MakeFSTN(l[i+2:0],inter,FIN)
X		    return
X		}
X	    }
X	}
X	else {
X	    /state_table[INI] := []
X	    put(state_table[INI],o_a_s(any,tmp,inter))
X	    MakeFSTN(l[i+1:0],inter,FIN)
X	    return
X	}
X    }
X    else {           # I.E. l[1] not = Lb
X	every i := 1 to *l do {
X	    case l[i] of {
X		Lb     : {
X		    inter := NextState()
X		    MakeFSTN(l[1:i],INI,inter)
X		    MakeFSTN(l[i:0],inter,FIN)
X		    return
X		}
X		Rb     : err_out(l,2,"]")
X	    }
X	}
X    }
X
X    # FIND INITIAL SEQUENCES OF POSITIVE INTEGERS, CONCATENATE THEM
X    if i := match_positive_ints(l) then {
X	inter := NextState()
X	tmp := Ints2String(l[1:i])
X	# if a slash has been encountered already, forget optimizing
X        # in this way; if parends are present, too, then forget it,
X        # unless we are at the beginning or end of the input string
X	if  INI = 1 | FIN = 2 | /parends_present &
X	    /slash_present & *tmp > *biggest_nonmeta_str
X	then biggest_nonmeta_str := tmp
X	/state_table[INI] := []
X	put(state_table[INI],o_a_s(match,tmp,inter))
X	MakeFSTN(l[i:0],inter,FIN)
X	return
X    }
X
X    # OKAY, CLEAN UP ALL THE JUNK THAT'S LEFT
X    i := 0
X    while (i +:= 1) <= *l do {
X	case l[i] of {
X	    Dot          : { Op := any;   Arg := &cset }
X	    Dollar       : { Op := pos;   Arg := 0     }
X	    Caret_outside: { Op := pos;   Arg := 1     }
X	    default      : { Op := match; Arg := char(0 < l[i]) }
X	} | err_out(l,2,char(abs(l[i])))
X	inter := NextState()
X	if any('*+?',char(abs(0 > l[i+1]))) then {
X	    case l[i+1] of {
X		-ord("*")   : {
X		    /state_table[INI] := []
X		    put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X		    put(state_table[INI],o_a_s(Op,Arg,INI))
X		    MakeFSTN(l[i+2:0],inter,FIN)
X		    return
X		}
X		-ord("+")   : {
X		    inter2 := NextState()
X		    /state_table[INI] := []
X		    put(state_table[INI],o_a_s(Op,Arg,inter2))
X		    /state_table[inter2] := []
X		    put(state_table[inter2],o_a_s(apply_FSTN,inter,0))
X		    put(state_table[inter2],o_a_s(Op,Arg,inter2))
X		    MakeFSTN(l[i+2:0],inter,FIN)
X		    return
X		}
X		-ord("?")   : {
X		    /state_table[INI] := []
X		    put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X		    put(state_table[INI],o_a_s(Op,Arg,inter))
X		    MakeFSTN(l[i+2:0],inter,FIN)
X		    return
X		}
X	    }
X	}
X	else {
X	    /state_table[INI] := []
X	    put(state_table[INI],o_a_s(Op,Arg,inter))
X	    MakeFSTN(l[i+1:0],inter,FIN)
X	    return
X	}
X    }
X
X    # WE SHOULD NOW BE DONE INSERTING EVERYTHING INTO state_table
X    # IF WE GET TO HERE, WE'VE PARSED INCORRECTLY!
X    err_out(l,4)
X
Xend
X
X
X
Xprocedure NextState(new)
X    static nextstate
X    if \new then nextstate := 1
X    else nextstate +:= 1
X    return nextstate
Xend
X
X
X
Xprocedure err_out(x,i,elem)
X    writes(&errout,"Error number ",i," parsing ",image(x)," at ")
X    if \elem 
X    then write(&errout,image(elem),".")
X    else write(&errout,"(?).")
X    exit(i)
Xend
X
X
X
Xprocedure zSucceed()
X    return .&pos
Xend
X
X
X
Xprocedure Expand(s)
X
X    local s2, c1, c2
X
X    s2 := ""
X    s ? {
X	s2 ||:= ="^"
X	s2 ||:= ="-"
X	while s2 ||:= tab(find("-")-1) do {
X	    if (c1 := move(1), ="-",
X		c2 := move(1),
X		c1 << c2)
X	    then every s2 ||:= char(ord(c1) to ord(c2))
X	    else s2 ||:= 1(move(2), not(pos(0))) | err_out(s,2,"-")
X	}
X	s2 ||:= tab(0)
X    }
X    return s2
X
Xend
X
X
X
Xprocedure tab_bal(l,i1,i2)
X
X    local i, i1_count, i2_count
X
X    i := 0
X    i1_count := 0; i2_count := 0
X    while (i +:= 1) <= *l do {
X	case l[i] of {
X	    i1  : i1_count +:= 1
X	    i2  : i2_count +:= 1
X	}
X	if i1_count = i2_count
X	then suspend i
X    }
X
Xend
X
X
Xprocedure match_positive_ints(l)
X    
X    # Matches the longest sequence of positive integers in l,
X    # beginning at l[1], which neither contains, nor is fol-
X    # lowed by a negative integer.  Returns the first position
X    # after the match.  Hence, given [55, 55, 55, -42, 55],
X    # match_positive_ints will return 3.  [55, -42] will cause
X    # it to fail rather than return 1 (NOTE WELL!).
X
X    local i
X
X    every i := 1 to *l do {
X	if l[i] < 0
X	then return (3 < i) - 1 | fail
X    }
X    return *l + 1
X
Xend
X
X
Xprocedure Ints2String(l)
X
X    local tmp
X
X    tmp := ""
X    every tmp ||:= char(!l)
X    return tmp
X
Xend
X
X
Xprocedure StripChar(s,s2)
X
X    local tmp
X
X    if find(s2,s) then {
X	tmp := ""
X	s ? {
X	    while tmp ||:= tab(find("s2"))
X	    do tab(many(cset(s2)))
X	    tmp ||:= tab(0)
X	}
X    }
X    return \tmp | s
X
Xend
SHAR_EOF
echo 'File findre.icn is complete' &&
true || echo 'restore of findre.icn failed'
rm -f _shar_wnt_.tmp
fi
# ============= kjv2rtv.icn ==============
if test -f 'kjv2rtv.icn' -a X"$1" != X"-c"; then
	echo 'x - skipping kjv2rtv.icn (File already exists)'
	rm -f _shar_wnt_.tmp
else
> _shar_wnt_.tmp
echo 'x - extracting kjv2rtv.icn (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'kjv2rtv.icn' &&
X############################################################################
X#
X#	Name:	 kjv2rtv.icn
X#
X#	Title:	 kjv2rtv  (KJV -> retrieve format converter)
X#
X#	Author:	 Richard L. Goerwitz
X#
X#	Version: 1.5
X#
X############################################################################
X#
X#  Program for converting PD KJV biblical texts into retrieve format.
X#  Reads standard input.  Writes reformatted text to standard output.
X#  Assumes the specific PC-SIG KJV format for input files.  If you
X#  have a KJV text that has been "tampered" with, this program may not
X#  work correctly.  And then again....
X#
X############################################################################
X#
X#  Links: complete.icn ./convertr.icn ./name2num.icn
X#
X############################################################################
X
X
Xprocedure main()
X
X    local line, bitmap, verse
X
X    # While you can read lines from stdin...
X    while line := read() do {
X
X	# ...scan them for book ch:vs references, and output these in
X	# retrieve format, along with corresponding text.
X	line ? {
X
X	    # Housekeeping.
X	    pos(0) & next	# skip past empty lines
X	    ="\x1F"		# tab past ASCII 31 (if present)
X	    tab(many('\t '))    # tab past whitespace (if present)
X
X	    # If the next line begins with a book ch:vs reference,
X	    # then write out the text of the preceding verse (if in
X	    # fact there *was* a preceding verse).  Finally, write out
X	    # the new book ch:vs reference (in retrieve format).
X	    if bitmap := convertr(tab(find("  "))) then {
X		write(REplace("" ~== trim(\verse, '\t \x0D'), "  ", " "))
X		write("::", bitmap)
X		tab(many(' \t'))
X		verse := trim(tab(0), '\t \x0D')
X	    } else {
X		# Dump the (rest of) the line onto verse.
X		verse ||:= " " || ("" ~== trim(tab(0), '\t \x0D'))
X	    }
X	}
X    }
X    # Flush the "verse" buffer.
X    write(REplace("" ~== trim(\verse, '\t \x0D'), "  ", " "))
X
X    exit(0)
X
Xend
X
X
X#
X# From strings.icn in the IPL (written by Ralph Griswold).
X#
Xprocedure REplace(s1,s2,s3)
X
X    local result, i
X    result := ""
X    i := *s2
X
X    s1 ? {
X	while result ||:= tab(find(s2)) do {
X	    result ||:= s3
X	    move(i)
X	}
X	return result || tab(0)
X    }
X
Xend
SHAR_EOF
true || echo 'restore of kjv2rtv.icn failed'
rm -f _shar_wnt_.tmp
fi
# ============= convertr.icn ==============
if test -f 'convertr.icn' -a X"$1" != X"-c"; then
	echo 'x - skipping convertr.icn (File already exists)'
	rm -f _shar_wnt_.tmp
else
> _shar_wnt_.tmp
echo 'x - extracting convertr.icn (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'convertr.icn' &&
X############################################################################
X#
X#	Name:	 convertr.icn
X#
X#	Title:	 convert KJV book chap:verse reference to a 
X#                writable bitmap suitable for a retrieve text-base
X#                file
X#
X#	Author:	 Richard L. Goerwitz
X#
X#	Version: 1.3
X#
X############################################################################
X#
X#  Links: complete.icn, ./name2num.icn
X#
X############################################################################
X
Xprocedure convertr(s)
X
X    local bitmap, bookname, book_numeric, len, no
X
X    no       := 2
X    len      := 8
X    bookname := ""
X    bitmap   := 0
X
X    s ? {
X
X	# Find book name, convert it to an integer.
X	bookname ||:= tab(any('1234'));	tab(many(' '))
X	bookname ||:= tab(many(&letters++&digits)) | fail
X	book_numeric :=  name2num(bookname) | fail
X	bitmap := book_numeric || ":"
X	
X	# Get book and verse fields.  Tack them onto bitmap.
X	while tab(upto(&digits)) do {
X	    no -:= 1
X	    # If no goes below 0 then we have too many fields for the
X	    # file named in arg 2.
X	    bitmap ||:= tab(many(&digits)) || ":"
X	}
X	no ~= 0 & stop("convertr:  impossible reference ",image(&subject))
X    }
X
X    # If the current no is not 0, then we have either too
X    # many or too few fields.
X    no = 0 | fail
X
X    return trim(bitmap, ':')
X
Xend
SHAR_EOF
true || echo 'restore of convertr.icn failed'
rm -f _shar_wnt_.tmp
fi
# ============= makeind.icn ==============
if test -f 'makeind.icn' -a X"$1" != X"-c"; then
	echo 'x - skipping makeind.icn (File already exists)'
	rm -f _shar_wnt_.tmp
else
> _shar_wnt_.tmp
echo 'x - extracting makeind.icn (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'makeind.icn' &&
X############################################################################
X#
X#	Name:	 makeind.icn
X#
X#	Title:	 makeind.icn
X#
X#	Author:	 Richard L. Goerwitz
X#
X#	Version: 1.24
X#
X############################################################################
X#
X#  This file, makeind.icn, compiles into an indexing program which
X#  creates a series of files offering the user rapid access to
X#  individual elements (usually words) within a text file.  Access is
X#  gained through a set of basic retrieval utilities contained in the
X#  file retrieve.icn, bmp2text.icn, retrops.icn, and others included
X#  with this package.  In order to be indexable, files must interleave
X#  string coded bitfield-style designators with text in the following
X#  manner:
X#
X#  ::001:001:001
X#  This is text.
X#  ::001:001:002
X#  This is more text.
X#
X#  The lines beginning with :: (a double colon) mark bitfield-style
X#  location-designators.  Location designators are strings with digit
X#  fields of fixed number and length separated either by nothing (as
X#  in, say 001001002), or better yet by non-digits (e.g. 001:001:002).
X#  NOTE WELL: The bitmaps must come in ascending order.  For example,
X#  if we assume three-field bitmaps, 002:001:014 would come before
X#  003:001:013.  If your file is not sorted properly, then use the
X#  utility, sorttxt provided as a part of this distribution.
X#
X#  usage:  makeind -f filename -m int -n int [-l int] [-s]
X#
X#  When calling makeind, you must specify the filename to be indexed
X#  (-f filename), the maximum field value (-m max-value; e.g. if
X#  fields can go from 0 to 255, then -m 255 would be used), and the
X#  number of fields (-n field-number).  The -s switch directs makeind
X#  to create a case-sensitive index.  The default is case-insensitive.
X#  -l [int] tells makeind to create a .LIM file, which is only needed
X#  if you want to retrieve text by location marker, and not just via
X#  the index (for this, you'll need something to translate human-
X#  readable references into retrieve's native format).
X#
X#  BUGS: This indexing routine is going to eat up a _tremendous_
X#  amount of memory when used on large files, since every token in the
X#  input file gets its own entry in wordtbl, and each entry gets a set
X#  as its corresponding key.  If you don't have the memory, then you
X#  could use strings instead of sets (the insert routines will be just
X#  a tiny bit more complicated).  Intermediate files could also be
X#  used.  Drop me a line if you want help.  Otherwise, make sure you
X#  have at *least* two megabytes core for every megabyte of text in
X#  the file you wish to index (or else a very, very good virtual
X#  memory management system).
X#
X#  NOTE: The -S [field-sep] option is currently disabled because using
X#  it slows things down drastically.  If you want to be able to
X#  specify what separator to use when breaking files down into
X#  individual words, consult ./gettokens.icn.
X#
X############################################################################
X#
X#  Links: options.icn, codeobj.icn, ./indexutl.icn ./gettokens.icn
X#
X#  See also: retrieve.icn, bmp2text.icn, expandrf.icn
X#
X############################################################################
X
X# IPL files to be linked in at compile time.
Xlink options, codeobj
X
X# Global variable (for OS-dependencies).
X# global IS			# declared in indexutl.icn
X
X# Is is a record containing vital information on an indexed file, such
X# as the field separator, the string-length of fields, etc.  I've re-
X# moved the record declaration from this file, and placed it in index-
X# utl.icn.
X# record is(FS, s_len, len, no, is_case_sensitive, r_field)
X
X#
X# Main procedure.
X#
Xprocedure main(a)
X
X    local usage, opt_table, fname, rollover_field, index_fname,
X	bitmap_fname, upto_field, bofname, bitmap_offset_table,
X	out_IS, limits_fname
X    # global IS			# IS contains stats for file being indexed
X
X    #
X    # Initialize global OS-related parameters, such as the directory
X    # separator (_slash) and the maximum permissible filename length
X    # minus four (to make room for extensions makeind tacks on).
X    #
X    initialize_os_params()
X
X    #
X    # Read in and check command argument list.  Insert FS and no
X    # parameters into (global) record IS.  Calculate s_len, len, and
X    # bitmap_length parameters as well.  Returns table of options
SHAR_EOF
true || echo 'restore of makeind.icn failed'
fi
echo 'End of  part 8'
echo 'File makeind.icn is continued in part 9'
echo 9 > _shar_seq_.tmp
exit 0
-- 

   -Richard L. Goerwitz              goer%sophist@uchicago.bitnet
   goer@sophist.uchicago.edu         rutgers!oddjob!gide!sophist!goer
