# Text-formatting filters: 'unfmt', 'fmt', 'refmt'
#
# This file contains the makings of three useful filters: 'unfmt'
# breaks down most hard newline characters in an input stream, while
# preserving others in a rational fashion, and is intended to be a
# pre-processing step for 'fmt', which formats an input stream of text
# to a specified line-width.  'unfmt' can, however, also be used to
# un-format text for use with text editors that don't mind very long
# lines (Tandy Scripsit, for example).  'refmt' is a combination of
# 'unfmt' and 'fmt' and is useful for converting files from one line-width
# to another.  'refmt' is also invaluable for dynamic reformatting of
# text within the 'ex' or 'vi' editors: to reformat a portion of text
# to a new line-width in 'ex' or 'vi', enter the command
# "linespec1,linespec2 ! refmt width" where 'width' is your desired new
# line-width; to reformat all text to line-width 'width' in 'ex' or 'vi',
# enter the command "%!refmt width".  Your text must, of course, be in a
# form compatible with the rules described below, but those rules are
# fairly straightforward.
#
# Rules for unformatting: 
#
#  (1) All text lines get joined together separated by a space, except
#      as follows.
#  (2) Blank lines don't get joined.
#  (3) Lines that follow blank lines don't get joined.
#  (4) Lines that begin with a space, a tab, or a dot (".") don't get joined.
#  (5) Lines ending with a period ("."), question mark ("?") or exclamation
#      point ("!") and which are joinable with the lines that follow them
#      are padded with an extra space (i.e., with TWO spaces) before they're
#      joined.
#  (6) Lines ending with an alphabetic character followed by a hyphen ("-")
#      and which are joinable with the lines that follow them are joined to
#      the following lines with no intervening space: the hyphen will appear
#      immediately before the first character of the next line.
#
# Rules for formatting:
#
#  (1) Lines are folded so that the specified line-width is observed without
#      breaking words in the middle.  If a word is too long to fit within
#      the specified line-width, it's hyphenated haphazardly but no
#      characters are lost.
#  (2) Lines that begin with EXACTLY ONE dot (".") will have that single
#      dot suppressed on output.  However, if an initial dot is followed
#      by another dot, both dots will be output in normal fashion.  Note that
#      a line beginning with a dot will always have its preceding newline
#      character retained during unformatting, regardless of what follows
#      the dot.
#  (3) A "soft line-break" is the insertion of a newline character into
#      the output stream so as to fit an input line to the specified
#      line-width, where no newline character exists in the actual input
#      stream.  If there's white space in the input stream immediately
#      following the point where a soft line-break is inserted, then
#      thereafter spaces and tab characters will be disregarded until
#      a nonblank character or hard newline is encountered in the input
#      stream.
#
# This file is in proper form for automatic installation.  To so install it,
# however, you must have the 'cc' C compiler.  If you do, then just type
# "sh filename" where 'filename' is whatever name you've saved this file
# under.  The filters 'fmt', 'unfmt', and 'refmt' will appear in your
# working directory.  'unfmt' takes no command-line argument; 'fmt' and
# 'refmt' each take a single argument, the new line-width.
# 
echo splitting out program files
sed -n '/^#START/,$p' <$0 | \
sed -e '/file: unfmt\.c/,/EOF: unfmt\.c/w unfmt.c' \
    -e '/file: fmt\.c/,/EOF: fmt\.c/w fmt.c' >/dev/null
echo compiling unfmt.c
cc -o unfmt unfmt.c
echo compiling fmt.c
cc -o fmt fmt.c
echo making refmt
echo 'WIDTH=$1\nif [ "$WIDTH" = "" ]\nthen WIDTH=80\nfi\nexec unfmt | fmt $WIDTH' >refmt
chmod a+x refmt
echo All done.
exit
#START
/* file: unfmt.c */

/****** Description ***************
   Usage: unfmt

   This filter collapses input text lines as a preparatory step for
   re-formatting by some other program.  Rules:

   (1) All newline characters in the input are converted to spaces,
       except newlines preceded by newlines and newlines followed by
       newlines, spaces, tabs characters, or dots ("."'s).  Essentially
       this means that only completely blank lines and lines beginning
       with spaces, tabs, or dots will resist the line-joining effect.

   (2) For lines whose newlines are subject to conversion as described
       in (1):

        (a) Input lines that end with a dot ("."), question mark ("?")
            or exclamation point ("!") are padded with an extra space
            before any joining takes place.  This is so that most
            end-of-sentence sequences will be set off by two spaces
            rather than the single space that ordinarily results from
            unformatting.  To cancel this double-padding of terminal
            end-of-sentence characters, change the #define of PADEOS
            from YES to NO.

        (b) Input lines that end with an alphabetic character followed by
            a hyphen ("-") do not have their terminating newline characters
            converted to spaces; instead, the newline characters are
            thrown away.

 *********************************/

#include <stdio.h>
#include <ctype.h>

main()
{
        unfmt(stdin,stdout);
        return(0);
}

/********** Function: void unfmt(instream, outstream) **********************
    Parameters:
        FILE *instream, *outstream;

    Description:

        Preprocessor for text reformatting.  Newlines in input text
        are converted to spaces, except for newlines preceded by
        newlines and newlines followed by whitespace or dots ("."'s).
        Input lines ending with the end-of-sentence characters ".",
        "?" or "!" and whose terminal newlines are convertible to
        spaces are padded with a single extra terminal space if the
        #define PADEOS is true.  Terminal newlines in input lines that
        end with an alphabetic character followed by a hyphen ("-")
        and which are convertible to spaces are not converted to
        spaces but instead are thrown away.

 ************************************************************************/

#define YES             1
#define NO              0
#define NEWLINE         '\n'
#define EVER            ;;
#define PADEOS          YES

unfmt(instream, outstream)
FILE *instream, *outstream;
{
        char c, latch[3];

        latch[0] = latch[1] = latch[2] = '\0';
        if (instream==(FILE *)NULL || outstream==(FILE *)NULL) return;
        for (EVER) {
                if ((int)(c=getc(instream))==EOF) {
                        if (latch[0]==NEWLINE) putc(NEWLINE,outstream);
                        return;
                }
                /* filter control characters */
                if (c>'~' || (c<' ' && c!='\t' && c!=NEWLINE)) continue;
                /* process */
                switch (c) {
                        case NEWLINE:
                        case ' ':
                        case '\t':
                        case '.':
                                if (latch[0]==NEWLINE) {
                                        if (latch[1]!=NEWLINE)
                                                putc(NEWLINE,outstream);
                                        putc(c,outstream);
                                }
                                else if (c!=NEWLINE)
                                        putc(c,outstream);
                                break;
                        default:
                                /* pad double spaces for EOS */
                                if (PADEOS && latch[0]==NEWLINE &&
                                    (latch[1]=='.' || latch[1]=='!' ||
                                     latch[1]=='?'))
                                        fprintf(outstream,"  ");
                                /* else pad singly 'cept alpha-hyphen */
                                else if (latch[0]==NEWLINE &&
                                         !(latch[1]=='-' &&
                                         isalpha(latch[2])) &&
                                         latch[1]!=NEWLINE)
                                                putc(' ',outstream);
                                putc(c,outstream);
                                break;
                }
                latch[2] = latch[1];
                latch[1] = latch[0];
                latch[0] = c;
        }
}
/* EOF: unfmt.c */
/* file: fmt.c */

/* fmt.c */
/****** Description ***************
   Usage: fmt width [-h]
   Examples: fmt 80
             fmt 80 -h

   This filter takes text from standard input and formats it for the
   specified terminal width.  Newline characters in input are preserved;
   input lines too long to fit in the specified width are folded without
   breaking words except for words that are longer than the specified
   width, which words get hyphenated in haphazard fashion.

   If the '-h' flag is set, then lines longer than the specified
   width will be broken at hyphens as well as at spaces or tabs.

   Dots (".") at the beginning of input lines are suppressed on output
   unless the initial dot is followed by another dot.
 *********************************/

#include <stdio.h>
#include <ctype.h>

main(argc, argv)
int argc;
char *argv[];
{
    register int right, hyphen;
    char *width;

    right = hyphen = 0;
    width = (argc==1) ? "bozo" : argv[1];
    while (*width) {
        if (*width<'0' || *width>'9') 
            usage();
        right = (right*10) + (*width - '0');
        ++width;    
    }
    if (argc==3) {
        if (*(argv[2]++)=='-' && *argv[2]=='h')
            hyphen = 1;
        else usage();
    }
    return(fmt(stdin,stdout,0,right,hyphen));
}

usage()
{
    printf("usage: fmt width [-h]\n");
    exit(1);
}

/***** Function: int fmt(instream, outstream, left, right, hyphen) *********
    Parameters:
    FILE *instream, *outstream;
    int left, right, hyphen;

    Returns: 0 if no error; (-1) for bad argument specification

    Description:

    Formats text from stream 'instream' to a line-width of 'right'
    and sends the result to stream 'outstream'.  If 'left' is
    nonzero, a "hanging indentation" of 'left' blank spaces will
    be inserted before every line, except the first line, until an
    input line is encountered that contains a newline character.
    If parameter 'hyphen' is nonzero, lines will be broken at
    hyphens as well as at blanks and tabs.
    
    Tab-stops are assumed to be standard width for the purpose
    of computing widths of tab characters in the input.

    If an input line begins with exactly one dot ("."), that
    dot will be suppressed on output.  Lines beginning with
    more than one consecutive dot are output unchanged.

    Lines containing sequences of more than 'right' consecutive
    nonblank characters (or more than 'right-left' for lines where
    hanging indentation applies), and which are therefore not
    possible to break cleanly, get hyphenated in haphazard fashion.

    Whitespace in the input stream, encountered immediately after
    a soft newline has been inserted into the output stream, is
    ignored.

    An incomplete final input line (i.e., a final input line
    not ending with a newline character) will have a newline
    appended to it on output.

 ************************************************************************/

#define NEWLINE     '\n'
#define EVER        ;;

fmt(instream, outstream, left, right, hyphen)
FILE *instream, *outstream;
int left;                            /* "hanging" initial indentation */
int right;                           /* terminal width */
int hyphen;                          /* if true, split lines at hyphens */
{
    register int i, j, k;
    short int
         hardnl,    /* if true, input line has hard newline */
         seglen,    /* length of line segment to be output */
         width,     /* effective output width */
         firstline, /* suppresses hanging indent on first line */
         eof,       /* we've hit EOF on input */
         done;      /* nothing more to do */
    char c, eolchar, segment[256], holding[256];

    if (instream==(FILE *)NULL ||  outstream==(FILE *)NULL ||
    left<0 || left>250 || right<1 || right>256)
        return(-1);                  /* bozo calls */
    firstline = hardnl = 1;
    done = eof = 0;
    *holding = '\0';
    for (EVER) {
        eolchar = '\0';  /* if we need a hyphen it'll go in eolchar */
        for (i=0;i<256;++i) segment[i] = '\0';
        width = (right - left);
        /* take care of stashed characters, if any */
        if (*holding) {
            i = 0;
                /* skip whitespace after soft newline */
            if (!hardnl)
                while (holding[i]==' ' || holding[i]=='\t')
                    ++i;
            strcpy(segment,&holding[i]);
        }
        *holding = '\0';
        i = strlen(segment);
        --i;
        /* get a bunch of new characters from input stream */
        while ((++i)<width) {
            c = getc(instream);
            if (!(eof=((int)c==EOF))) {
                    /* skip whitespace after soft nl */
                if (!i && !hardnl && (c==' ' || c=='\t')) --i;
                else segment[i] = c;
            }
            else break;
        }
        seglen = (width>strlen(segment)) ? strlen(segment) : width;
        /* adjust segment length for tab chars */
        i = j = left;
        while (i<seglen) {
            if (segment[j-left]=='\t' &&
                (i+=(8-(i%8)))>seglen) break;
            else ++i;
            ++j;
        }
        seglen = j;
        /* look for hard newline */
        for (i=0;(i<seglen && segment[i]!=NEWLINE);++i);
        if (hardnl=(i<seglen)) seglen = ++i;
        /* if no hard newline, find rightmost blank if any */
        if (!hardnl) {
            j = seglen;
            --j;
            while (j && segment[j]!=' ' && segment[j]!='\t') {
                if (hyphen && segment[j]=='-' && isalpha(segment[j-1]))
                    break;
                --j;
            }
            if (j) {
                seglen = j;
                if (segment[j]=='-') {
                    eolchar = '\n';
                    ++seglen;
                }
            }
            else {
                eolchar = '-';
                --seglen;
            }
        }
        /* preserve excess chars in 'holding' */
        if (eolchar=='-') --seglen;
        strcpy(holding,&segment[seglen]);
        segment[seglen] = '\0';
        /* allow for hanging indent if any */
        if (left && !firstline)
            for (i=0;i<left;++i) putc(' ',outstream);
        firstline = 0;
        /* cancel hanging indent after first hard newline */
        if (hardnl) left = 0;
        /* output the segment, suppressing single leading dot */
        i = (segment[0]=='.' && segment[1]!='.');
        fputs(segment+i,outstream);
        if (done=(!(*holding) && eof)) eolchar = '\0';
        if (eolchar) putc(eolchar,outstream);
        if (!hardnl && eolchar!='\n') putc(NEWLINE,outstream);
        if (done) return(0);
    }
}
/* EOF: fmt.c */
