#!/bin/sh

#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# simtel2html -- represent the SimTel Software Repository file index as a
#                hypertext (HTML) document.
#
# How to use this script.
# -----------------------
#
# 1) Change the first block of variable assignments below (marked "CHANGE
#    ME"), as needed.  These determine what site to fetch the index from
#    (variables "site" and "root"), who you are (variable "address"), where
#    you want the output file, simtel_index.html, to be stored (variable
#    "dir"), which of two output formats you want (variable "style"), and
#    where awk and unzip are found on your system (variables "awk" and
#    "unzip"). 
#
#    If you want to connect to SimTel's home site, you may leave site and
#    root alone and change the other variables.
#
# 2) This script can do the conversion in either of two ways.  If the style
#    variable is set to "single", then the index is converted to a single
#    html document, roughly 1.4 megabytes long.  This isn't a problem for me,
#    but using it might be too slow on some machines.  Setting style to
#    "multiple" causes the index to be converted into one html document per
#    SimTel directory, with a top level document that points to them.  If you
#    set style=multiple, you MUST have an implementation of awk that doesn't
#    curl up and die when it opens more than a few files.
#
# 3) Make a link from your home page (or some other html document) to the
#    index.  It could look something like this:
#
#       Thousands of DOS and Windows files may be found at the mighty
#       <a href="file://localhost/some_path/simtel_index.html">
#       SimTel Software Repository</a>.
#
#    where "some_path" is the same path to which you changed the "dir"
#    variable in step 1.
#
# 4) Run the simtel2html script.  If it ends with the message "Created
#    simtel_index.html," everything worked fine.
#
# 5) Run Mosaic (or your favorite browser), and click on the link you
#    established in step 3.  Be prepared to wait for a few seconds if you
#    set style="single".
#
# 6) Click on any file name to fetch the file.  Mosaic will automatically
#    recognize that .zip files ought to be copied to a local file, rather
#    than displayed -- I don't know about other browsers.  You may also click
#    on a directory name to open an ftp session in that directory, not that
#    that really buys you anything.
#
# Notes/bugs.
# -----------
#
# 1) Padding the file names with underscores is hokey, but the version of
#    Mosaic I'm running doesn't honor trailing blanks in <tt> mode.  If
#    you have a better way of lining up the file names and descriptions, you
#    can just change the appropriate printf down below.
#
# 2) Last time I released a shell script for public consumption, I got lots
#    of mail asking "how come this doesn't work at my site?"  Please ask
#    someone local, since I hereby promise I don't know anything about the
#    version of unix you're running :o)  But if you're really stuck, go ahead
#    and ask me anyway.
#
# 3) You'll need a unix version of unzip.
#
# 4) The message "awk: too many output files" means your awk is broken.  Find
#    a better one or set style=single.
#
# Author:  Jonathan C. Rice   rice@zizania.cray.com
#
# History: 1994/05/09 1.0 First public release.
#          1994/06/21 1.1 Change SIMIBM.LST to simibm.lst, to reflect a
#                         name change by SimTel.
#                         Allow specification of the file path for unzip.
#          1994/07/14 1.2 SimTel's index file name changed *back* to upper
#                         case -- script updated to allow either version.
#
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-



#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=#
#          CHANGE ME             #
#                                #
dir="$HOME/www"                  #  - where do you keep your html files?
#                                #    (omit trailing slash on directory name)
address="$USER@some_site.com"    #  - what is your internet address?
style="single"                   #  - see note 2 in "how to use," above
awk="/opt/gnu/bin/gawk"          #  - where is a good implementation of awk?
unzip="$HOME/bin/unzip"          #  - where is unzip?
#                                #
site="oak.oakland.edu"           #  - what SimTel mirror is handiest to you?
root="SimTel/msdos"              #  - what is the top-level directory there?
#                                #    (omit leading and trailing slash)
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=#


# With any luck you shouldn't have to change anything below this line.

zipfile="simlist.zip"    # the index as a zip
listfile="SIMIBM.LST"    # the index as text
outfile="simtel_index"   # prefix of all .html files generated

# FTP the index (if necessary) and unzip it.

cd $dir
rm $outfile* 1>/dev/null 2>&1

if [ ! -s $zipfile ]
then
  ftp -vin $site <<FTPEOF
user anonymous $address
lcd $dir
binary
cd /$root/filedocs
get $zipfile
quit
FTPEOF
fi

if [ ! -s $zipfile ]
then
  echo "Failed to ftp $zipfile."
  exit 1
fi

$unzip -o $zipfile $listfile
if [ ! -s $listfile ]
then
  listfile=`echo $listfile | tr A-Z a-z`
  $unzip -o $zipfile $listfile
  if [ ! -s $listfile ]
  then
    echo "Failed to unzip $listfile."
    exit 1
  fi
fi

# Scan the index for directory and file names, building the .html file or
# files along the way.  The tr command discards carriage returns, which
# if left in would prevent awk from recognizing blank lines.

# Note: I use NR==1 in the awk instead of BEGIN because, in all three awk
# implementations I tried, command line variables weren't defined in the
# scope of the BEGIN pattern.

tr -d '\015' < $listfile | $awk '
  NR==1 {
    dc = 0
    top = "" prefix ".html"
    f = top
    printf("<title>Index of files in the SimTel Software Repository</title>\n")>>top
    printf("<h1>Files from SimTel</h1>\n")>>top
    if (style == "multiple") printf("<dir>\n")>>top
  }
  /^Directory / {
    dc++
    if (dc != 1) printf("</ul>\n")>>f
    n=split($0, p, "/")
    d=p[n-1]
    if (style == "multiple")
      {
        close(f)
        f = "" prefix "_" d ".html"
        printf("<li><a href=%c%s%c>%s</a>\n", 34, f, 34, d)>>top
        printf("<title>SimTel index: %s</title>\n", d)>>f
      }
    printf("<h2><a href=%cftp://%s/%s/%s%c>%s</a></h2>\n<ul>\n", 34, site, root, d, 34, d)>>f
    next
  }
  /^ Filename /||/^=======/||/^\ *$/ {
    next
  }
  {
    if (dc != 0) printf("<li><tt><a href=%cftp://%s/%s/%s/%s%c>%-12.12s</a></tt>...%s\n", 34, site, root, d, $1, 34, $1 "____________", substr($0, 34, length($0)-33))>>f
    next
  }
  END {
    printf("</ul>\n")>>f
    if (style == "multiple") printf("</dir>\n")>>top
    printf("<hr><h4>Generated by %s on %s</h4>\n", thisfile, today)>>top
  }
' thisfile=`basename $0` today=`date +'%Y/%m/%d'` site=$site root=$root style=$style prefix=$outfile -

# Check the result.

if [ -s $outfile.html ]
then
  rm $zipfile $listfile
  chmod go+r *.html
  echo "Created $outfile.html."
else
  echo "Failed to create $outfile.html."
  exit 1
fi
