Newsgroups: comp.sources.misc From: leo@s514.ipmce.su (Leonid A. Broukhis) Subject: v25i013: freeze - Freeze/melt compression program, Part02/02 Message-ID: <1991Nov5.022937.2395@sparky.imd.sterling.com> X-Md4-Signature: 295f05a33bd5bef88ed889bd2e49b358 Date: Tue, 5 Nov 1991 02:29:37 GMT Approved: kent@sparky.imd.sterling.com Submitted-by: leo@s514.ipmce.su (Leonid A. Broukhis) Posting-number: Volume 25, Issue 13 Archive-name: freeze/part02 Environment: ISC, Xenix, SunOS, MS-DOS Supersedes: freeze: Volume 17, Issue 67-68 #! /bin/sh # This is a shell archive. Remove anything before this line, then feed it # into a shell via "sh file" or similar. To overwrite existing files, # type "sh file -c". # The tool that generated this appeared in the comp.sources.unix newsgroup; # send mail to comp-sources-unix@uunet.uu.net if you want that tool. # Contents: bitio.c compat.h debug.c decode.c default.c freeze.h huf.h # lz.c lz.h makefile patchlevel.h statist.c # Wrapped by kent@sparky on Sun Nov 3 18:29:27 1991 PATH=/bin:/usr/bin:/usr/ucb ; export PATH echo If this archive is complete, you will see the following message: echo ' "shar: End of archive 2 (of 2)."' if test -f 'bitio.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'bitio.c'\" else echo shar: Extracting \"'bitio.c'\" \(1484 characters\) sed "s/^X//" >'bitio.c' <<'END_OF_FILE' X#include "freeze.h" X#include "bitio.h" X Xunsigned long getbuf = 0; /* assume sizeof (unsigned long) >= 4 */ Xu_short putbuf; Xuchar bitlen = 0, __, crpt_flag = 0; X X/* get N bits (N <= 16), returning in Bit(N-1)...Bit 0 */ X Xshort GetNBits (n) X register u_short n; X{ X register unsigned long dx = getbuf; X register uchar c; X X static u_short mask[17] = { X 0x0000, X 0x0001, 0x0003, 0x0007, 0x000f, X 0x001f, 0x003f, 0x007f, 0x00ff, X 0x01ff, 0x03ff, 0x07ff, 0x0fff, X 0x1fff, 0x3fff, 0x7fff, 0xffff }; X X while (bitlen < n) X { X c = getchar (); X dx |= (unsigned long) c << (BYSH - bitlen); X bitlen += 8; X } X crpt_flag = feof(stdin); X getbuf = dx << n; X bitlen -= n; X return (dx >> (bits(getbuf) - n)) & mask[n]; X} X X/* output `l' high bits of `c' */ X Xvoid Putcode (l, c) X register u_short l; X u_short c; X{ X register u_short len = bitlen; X register u_short b = (u_short)putbuf; X b |= c >> len; X if ((len += l) >= 8) { X putchar ((int)(b >> 8)); X if ((len -= 8) >= 8) { X putchar ((int)b); X bytes_out += 2; X len -= 8; X b = c << (l - len); X } else { X b <<= 8; X bytes_out++; X } X } X if (ferror(stdout)) X writeerr(); X putbuf = b; X bitlen = len; X} X X X/* Flushes the bit I/O buffers and check the state of stdout */ X Xvoid EncodeEnd () X{ X if (bitlen) { X putchar((int)(putbuf >> 8)); X bytes_out++; X if (ferror(stdout)) X writeerr(); X } X} X X/* File too short or invalid header, print a message */ X Xvoid crpt_message ( ) X{ X fprintf ( stderr, "melt: corrupt input\n" ); X} X END_OF_FILE if test 1484 -ne `wc -c <'bitio.c'`; then echo shar: \"'bitio.c'\" unpacked with wrong size! fi # end of 'bitio.c' fi if test -f 'compat.h' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'compat.h'\" else echo shar: Extracting \"'compat.h'\" \(184 characters\) sed "s/^X//" >'compat.h' <<'END_OF_FILE' Xextern uchar Table1[]; X X#define F1 60 X#define N1 4096 X#define N_CHAR1 (256 - THRESHOLD + F1 + 1) X Xextern short DecodePOld(); X Xextern void melt1(); END_OF_FILE if test 184 -ne `wc -c <'compat.h'`; then echo shar: \"'compat.h'\" unpacked with wrong size! fi # end of 'compat.h' fi if test -f 'debug.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'debug.c'\" else echo shar: Extracting \"'debug.c'\" \(1548 characters\) sed "s/^X//" >'debug.c' <<'END_OF_FILE' X#if defined(DEBUG) X#include "freeze.h" X#include "huf.h" X#include "bitio.h" X X /*---------------------------*/ X /* DEBUG MODULE */ X /*---------------------------*/ X Xprintcodes(mode) X{ X /* X * Just print out codes from input file. For debugging. X */ X register short k, c, col = 0; X X#ifdef COMPAT X if (!mode) { X StartHuff(N_CHAR1); X init(Table1); X } else X#endif X { X if (read_header() == EOF) { X fprintf(stderr, "Bad header\n"); X return; X } X StartHuff(N_CHAR2); X init(Table2); X } X X InitIO(); X X for (;;) { X X if((c = DecodeChar()) == ENDOF) X break; X if (c < 256) { X fprintf(stderr, "%5d%c", c, X (col+=8) >= 74 ? (col = 0, '\n') : '\t' ); X } else { X c = c - 256 + THRESHOLD; X X k = DecodePosition(); X X fprintf(stderr, "%2d-%d%c", c, k, X (col+=8) >= 74 ? (col = 0, '\n') : '\t' ); X } X } X putc( '\n', stderr ); X exit( 0 ); X} X X/* for pretty char printing */ X Xchar * Xpr_char(c) X register uchar c; X{ X static char buf[5]; X register i = 4; X buf[4] = '\0'; X if ( (isascii((int)c) && isprint((int)c) && c != '\\') || c == ' ' ) { X buf[--i] = c; X } else { X switch( c ) { X case '\n': buf[--i] = 'n'; break; X case '\t': buf[--i] = 't'; break; X case '\b': buf[--i] = 'b'; break; X case '\f': buf[--i] = 'f'; break; X case '\r': buf[--i] = 'r'; break; X case '\\': buf[--i] = '\\'; break; X default: X buf[--i] = '0' + c % 8; X buf[--i] = '0' + (c / 8) % 8; X buf[--i] = '0' + c / 64; X break; X } X buf[--i] = '\\'; X } X return &buf[i]; X} X#endif END_OF_FILE if test 1548 -ne `wc -c <'debug.c'`; then echo shar: \"'debug.c'\" unpacked with wrong size! fi # end of 'debug.c' fi if test -f 'decode.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'decode.c'\" else echo shar: Extracting \"'decode.c'\" \(2004 characters\) sed "s/^X//" >'decode.c' <<'END_OF_FILE' X#include "freeze.h" X#include "huf.h" X#include "bitio.h" X X/* X * Melt stdin to stdout. X */ X Xvoid melt2 () X{ X register short i, j, k, r, c; X X/* Huffman-dependent part */ X if(read_header() == EOF) X return; X StartHuff(N_CHAR2); X init(Table2); X/* end of Huffman-dependent part */ X X InitIO(); X for (i = 0; i < N2 - F2; i++) X text_buf[i] = ' '; X r = N2 - F2; X for (in_count = 0;; ) { X c = DecodeChar(); X X if (c == ENDOF) X break; X if (c < 256) { X#ifdef DEBUG X if (debug) X fprintf(stderr, "'%s'\n", pr_char((uchar)c)); X else X#endif /* DEBUG */ X putchar (c); X text_buf[r++] = c; X r &= N2 - 1; X in_count++; X } else { X i = (r - DecodePosition() - 1) & (N2 - 1); X j = c - 256 + THRESHOLD; X#ifdef DEBUG X if (debug) X fputc('"', stderr); X#endif X for (k = 0; k < j; k++) { X c = text_buf[(i + k) & (N2 - 1)]; X#ifdef DEBUG X if (debug) X fprintf(stderr, "%s", pr_char((uchar)c)); X else X#endif X putchar (c); X text_buf[r++] = c; X r &= (N2 - 1); X in_count++; X } X#ifdef DEBUG X if (debug) X fprintf(stderr, "\"\n"); X#endif X } X INDICATOR X } X} X X#ifdef COMPAT Xvoid melt1 () X{ X register short i, j, k, r, c; X X StartHuff(N_CHAR1); X init(Table1); X InitIO(); X for (i = 0; i < N1 - F1; i++) X text_buf[i] = ' '; X r = N1 - F1; X for (in_count = 0;; ) { X c = DecodeChar(); X X if (c == ENDOF) X break; X X if (c < 256) { X#ifdef DEBUG X if (debug) X fprintf(stderr, "'%s'\n", pr_char((uchar)c)); X else X#endif /* DEBUG */ X putchar (c); X text_buf[r++] = c; X r &= (N1 - 1); X in_count++; X } else { X i = (r - DecodePOld() - 1) & (N1 - 1); X j = c - 256 + THRESHOLD; X#ifdef DEBUG X if (debug) X fputc('"', stderr); X#endif X for (k = 0; k < j; k++) { X c = text_buf[(i + k) & (N1 - 1)]; X#ifdef DEBUG X if (debug) X fprintf(stderr, "%s", pr_char((uchar)c)); X else X#endif X putchar (c); X text_buf[r++] = c; X r &= (N1 - 1); X in_count++; X } X#ifdef DEBUG X if (debug) X fprintf(stderr, "\"\n"); X#endif X } X INDICATOR X } X} X#endif END_OF_FILE if test 2004 -ne `wc -c <'decode.c'`; then echo shar: \"'decode.c'\" unpacked with wrong size! fi # end of 'decode.c' fi if test -f 'default.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'default.c'\" else echo shar: Extracting \"'default.c'\" \(1271 characters\) sed "s/^X//" >'default.c' <<'END_OF_FILE' X#include "freeze.h" X X /*-------------------------------*/ X /* DEFAULTS FILE HANDLING */ X /*-------------------------------*/ X X#define OK 0 X#define FAIL NULL X#define NOFILE ((FILE *) 0) X#define MAXLINE 128 X Xextern int errno; Xchar *strchr(); Xstatic FILE *defd = NOFILE; /* defaults file stream */ X Xint defopen(fname) /* open | reopen | close defaults file */ X char *fname; X{ X register FILE *fd; X X if (!fname) { X if (defd) X fclose(defd); X defd = NOFILE; X return OK; X } X X if (!(fd = fopen(fname, "r"))) X return errno; /* problems opening file */ X X defd = fd; X return OK; X} X Xstatic char defline[MAXLINE + 1]; X Xchar *defread(pattern) X register char *pattern; X{ X register sz_patt; X register char *cp; X X if (!defd) X return FAIL; /* defaults file not opened */ X X rewind(defd); X sz_patt = strlen(pattern); X X while (fgets(defline, MAXLINE, defd)) { X if (!(cp = strchr(defline, '\n'))) X return FAIL; /* line too long */ X if (cp - defline < sz_patt) X continue; /* line too short */ X *cp = '\0'; X if (!strncmp(pattern, defline, sz_patt)) X return defline + sz_patt; /* match found */ X } X X return FAIL; /* no matching lines */ X} END_OF_FILE if test 1271 -ne `wc -c <'default.c'`; then echo shar: \"'default.c'\" unpacked with wrong size! fi # end of 'default.c' fi if test -f 'freeze.h' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'freeze.h'\" else echo shar: Extracting \"'freeze.h'\" \(2968 characters\) sed "s/^X//" >'freeze.h' <<'END_OF_FILE' X X#include X X#ifdef SUN4 X#include X#else /* SUN4 */ X# ifndef getc X# define getc(p) (--(p)->_cnt < 0 ? _filbuf(p) : (int) *(p)->_ptr++) X# endif X# ifndef putc X# define putc(x, p) (--(p)->_cnt < 0 ? _flsbuf((unsigned char) (x), (p)) : (int) (*(p)->_ptr++ = (unsigned char) (x))) X# endif X#ifndef BSD4_2 Xtypedef unsigned short u_short; X#endif /* BSD4_2 */ X#endif /* SUN4 */ X X#include X#include X#include X#include X X/* for MAXNAMLEN only !!! */ X#ifdef unix X#ifndef BSD4_2 X#include X#else X#include X#endif /* BSD4_2 */ X#endif /* unix */ X X#if !defined(MAXNAMLEN) X#define MAXNAMLEN 255 X#else X#if MAXNAMLEN < 255 X#undef MAXNAMLEN X#define MAXNAMLEN 255 X#endif X#endif X X#ifdef DEBUG X#include X#endif /* DEBUG */ X X#ifdef __TURBOC__ X#define MSDOS X#include X#include X#endif /* __TURBOC__ */ X X#ifdef MSDOS X#include X#include X#endif /* MSDOS */ X Xtypedef unsigned char uchar; X X#if defined(BITS) && BITS > 14 Xtypedef unsigned long hash_t; X#else Xtypedef u_short hash_t; X#endif /* BITS */ X X#ifdef lint X#define N2 256 X#else /* lint */ X#define N2 8192 /* buffer size */ X#endif /* lint */ X X#define F2 256 /* pre-sence buffer size */ X#define THRESHOLD 2 X X#define N_CHAR2 (256 - THRESHOLD + F2 + 1) /* code: 0 .. N_CHARi - 1 */ X#define T2 (N_CHAR2 * 2 - 1) /* size of table */ X X#define ENDOF 256 /* pseudo-literal */ X Xextern uchar Table2[]; X Xextern long in_count, bytes_out, file_length; X Xextern uchar text_buf[]; Xextern u_short match_position, match_length; X Xextern short quiet, force; /* useful flags */ X X/* Note ind_threshold is triangle number of Kbytes */ X Xextern long indc_threshold, indc_count; X Xextern short do_melt, topipe, greedy; X X#define MAGIC1 ((uchar)'\037') X#define MAGIC2_1 ((uchar)'\236') /* freeze vers. 1.X */ X#define MAGIC2_2 ((uchar)'\237') X Xextern int exit_stat; X X#ifdef DEBUG Xextern short debug; Xextern short verbose; Xextern char * pr_char(); Xextern long symbols_out, refers_out; X#endif /* DEBUG */ X X#ifdef GATHER_STAT Xextern long node_steps, node_matches; X#endif X Xextern short DecodeChar(), DecodePosition(), GetNBits(); Xextern void melt2(), (*meltfunc)(), writeerr(), prratio(), prbits(), freeze(); X X#if defined(BSD42) && !defined(BSD4_2) X#define BSD4_2 X#endif X X#ifdef COMPAT X#include "compat.h" X#endif X X#define INDICATOR \ Xif (quiet < 0 && (in_count > indc_count)) {\ X if (ferror(stdout))\ X writeerr();\ X if (file_length) {\ X fprintf(stderr, " %2d%%\b\b\b\b",\ X ftell(stdin) * 100 / file_length);\ X indc_count += indc_threshold;\ X } else {\ X fprintf(stderr, " %5ldK\b\b\b\b\b\b\b", in_count / 1024);\ X indc_count += indc_threshold;\ X indc_threshold += 1024;\ X }\ X fflush (stderr);\ X} X X#ifdef BSD4_2 X#define strchr index X#endif END_OF_FILE if test 2968 -ne `wc -c <'freeze.h'`; then echo shar: \"'freeze.h'\" unpacked with wrong size! fi # end of 'freeze.h' fi if test -f 'huf.h' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'huf.h'\" else echo shar: Extracting \"'huf.h'\" \(210 characters\) sed "s/^X//" >'huf.h' <<'END_OF_FILE' X Xextern u_short freq[]; Xextern short son[], prnt[]; Xextern void StartHuff(), init(), write_header(), EncodeChar(); Xextern void EncodePosition(); X#define MAX_FREQ (u_short)0x8000 /* Tree update timing */ END_OF_FILE if test 210 -ne `wc -c <'huf.h'`; then echo shar: \"'huf.h'\" unpacked with wrong size! fi # end of 'huf.h' fi if test -f 'lz.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'lz.c'\" else echo shar: Extracting \"'lz.c'\" \(3480 characters\) sed "s/^X//" >'lz.c' <<'END_OF_FILE' X#include "freeze.h" X#include "lz.h" X X/*----------------------------------------------------------------------*/ X/* */ X/* LZSS ENCODING */ X/* */ X/*----------------------------------------------------------------------*/ X Xuchar text_buf[N2 + F2 - 1]; /* cyclic buffer with an overlay */ Xu_short match_position, match_length; /* current characteristics of a X matched pattern */ X X/* next[N+1..] is used as hash table, X the rest of next is a link down, X prev is a link up. X*/ X Xhash_t prev[N2 + 1]; X X#ifndef __XENIX__ X#ifdef __TURBOC__ Xu_short huge * next = (u_short huge *) NULL; X#else /* __TURBOC__ */ Xu_short next[array_size]; /* a VERY large array :-) */ X#endif /* __TURBOC__ */ X#else /* __XENIX__ */ X#if parts == 2 Xu_short next0[32768L], next1[8193]; X#else X# if parts == 3 Xu_short next0[32768L], next1[32768L], next2[8193]; X# else X# if parts == 5 Xu_short next0[32768L], next1[32768L], next2[32768L], next3[32768L], next4[8193]; X# else Xu_short next0[32768L], next1[32768L], next2[32768L], next3[32768L], next4[32768L], X next5[32768L], next6[32768L], next7[32768L], next8[8193]; X# endif X# endif X#endif /* parts */ X X/* A list of `next's parts */ X Xu_short *next[parts] = { Xnext0, next1 X#if parts > 2 X,next2 X#if parts > 3 X,next3, next4 X#if parts > 5 X,next5, next6, Xnext7, next8 X#endif X#endif X#endif X}; X#endif /* __XENIX__ */ X X#ifdef GATHER_STAT Xlong node_steps, node_matches; X#endif /* GATHER_STAT */ X X/* Initialize the data structures and allocate memory, if needed. X Although there is no more trees in the LZ algorithm X implementation, routine name is kept intact :-) X*/ X Xvoid InitTree () X{ X long i; X#ifdef GATHER_STAT X node_steps = node_matches = 0; X#endif /* GATHER_STAT */ X X#ifdef __TURBOC__ X if (!next && (next = (u_short huge *) farmalloc( X (unsigned long)array_size * sizeof(u_short))) == NULL) { X X fprintf(stderr, "Not enough memory (%luK wanted)\n", X (unsigned long)array_size * sizeof(u_short) / 1024); X exit(3); X } X#endif /* __TURBOC__ */ X X for (i = N2 + 1; i < array_size; i++ ) X nextof(i) = NIL; X for (i = 0; i < sizeof(prev)/sizeof(*prev); i++ ) X prev[i] = NIL; X} X X/* Get the longest nearest match of the string beginning in text_buf[r] X to the cyclic buffer. Result (length & position) is returned X in correspondent global variables (`match_length' & X `match_position'). `match_length' == 0 denotes failure. X*/ X X#ifndef FAST Xvoid get_next_match (r) X u_short r; X{ X register u_short p = r; X register int m; X register uchar *key FIX_SI, *pattern FIX_DI; X#ifdef GATHER_STAT X node_matches++; X#endif X key = text_buf + r; X match_length = 0; X do { X do { X if ((p = nextof(p)) == NIL) X return; X pattern = text_buf + p; X X MATCHING; X X } while NOT_YET; X X#ifdef GATHER_STAT X node_steps++; X#endif X X for (m = match_length; X ++m < F2 && key[m] == pattern[m]; ); X X match_length = m; X match_position = ((r - p) & (N2 - 1)) - 1; X } while (m < F2); X X/* There are two equal F-char sequences, the oldest one must be X deleted from the list. X*/ X X X#ifdef DEBUG X if (verbose) X fprintf(stderr, "Replacing node: %d -> %d\n", p, r); X#endif X nextof(prev[p]) = nextof(p); X prev[nextof(p)] = prev[p]; X prev[p] = NIL; /* remove p, it is further than r */ X return; X} X#endif END_OF_FILE if test 3480 -ne `wc -c <'lz.c'`; then echo shar: \"'lz.c'\" unpacked with wrong size! fi # end of 'lz.c' fi if test -f 'lz.h' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'lz.h'\" else echo shar: Extracting \"'lz.h'\" \(4267 characters\) sed "s/^X//" >'lz.h' <<'END_OF_FILE' Xextern void InitTree(); X X#ifndef BITS X#define BITS 14 /* for 16-bits machines */ X#endif X X#if BITS < 13 X#undef BITS X#define BITS 13 /* 1:1 hash */ X#endif X X#if BITS > 21 X#undef BITS X#define BITS 21 /* 4 MB hash table, if sizeof(u_short) == 2 */ X#endif X X/* The following hash-function isn't optimal but it is very fast: X X HASH = ((first + (second << LEN0) + X (third << LEN1)) & ((1L << BITS) - 1); X X The difference of LENs is no more than one bit. X*/ X X#define LEN0 ((BITS-8)/2) X#define LEN1 (BITS-8) X X#define NIL N2 X X#if defined(M_XENIX) && defined(I_286) && (BITS > 14) X#define __XENIX__ X#if BITS > 18 X#undef BITS X#define BITS 18 X#endif X#endif X X/* `array_size' is the size of array `next', which contains X the heads of linked lists and the references to X next members of these lists. X*/ X X#define array_size (N2 + 1 + (1L << BITS)) X Xextern hash_t prev[]; X X#ifndef __XENIX__ X#define nextof(i) next[i] X X#ifdef __TURBOC__ Xextern u_short huge * next; X#else /* __TURBOC__ */ Xextern u_short next[]; X#endif /* __TURBOC__ */ X X#else /* __XENIX__ */ X X/* We divide the array `next' in `parts' which fit into 286's segment */ X X#define parts (array_size/32768 + 1) X#define nextof(i) next[(i) >> 15][(i) & 0x7fff] X#if parts == 2 Xextern u_short next0[], next1[]; X#else X# if parts == 3 Xextern u_short next0[], next1[], next2[]; X# else X# if parts == 5 Xextern u_short next0[], next1[], next2[], next3[], next4[]; X# else Xextern u_short next0[], next1[], next2[], next3[], next4[], X next5[], next6[], next7[], next8[]; X# endif X# endif X#endif /* parts */ X Xextern u_short *next[]; X#endif /* __XENIX__ */ X X/* Some defines to eliminate function-call overhead */ X X/* Deleting of a node `n' from a linked list */ X X#define DeleteNode(n) \ X{\ X nextof(prev[n]) = NIL;\ X prev[n] = NIL;\ X} X X/* Inserting of a node `r' into hashed linked list: `r' becomes X the head of list. X*/ X X#define InsertNode(r)\ X{\ X register hash_t p; register u_short first_son;\ X register uchar *key;\ X key = &text_buf[r];\ X p = N2 + 1 + (((hash_t)key[0] + ((hash_t)key[1] << LEN0) +\ X ((hash_t)key[2] << LEN1)) & ((1L << BITS) - 1));\ X first_son = nextof(p);\ X nextof(r) = first_son;\ X nextof(p) = r;\ X prev[r] = p;\ X prev[first_son] = r;\ X} X X/* This routine inputs the char from stdin and does some other X actions depending of this char's presence. X*/ X X#define Next_Char(N,F)\ Xif ((c = getchar()) != EOF) {\ X text_buf[s] = c;\ X if (s < F - 1)\ X text_buf[s + N] = c;\ X s = (s + 1) & (N - 1);\ X r = (r + 1) & (N - 1);\ X InsertNode(r);\ X in_count++;\ X} else {\ X s = (s + 1) & (N - 1);\ X r = (r + 1) & (N - 1);\ X if (--len) InsertNode(r);\ X} X X#if defined(__GNUC__) X#if defined(__i386__) X/* Optimizer cannot allocate these registers correctly :( (v1.39) */ X#define FIX_SI asm("si") X#define FIX_DI asm("di") X#else X X/* GNU-style register allocations for other processors are welcome! */ X X#define FIX_SI X#define FIX_DI X#endif X#else X X/* Dummy defines for non-GNU compilers */ X X#define FIX_SI X#define FIX_DI X#endif X X#ifndef DO_INLINE X X/* This statement is due to ideas of Boyer and Moore: */ X/* Is somewhere an optimizing compiler which can vectorize this? ;-) */ X X#define MATCHING for (m = match_length; m >= 0 && key[m] == pattern[m]; m--) X#define NOT_YET (m >= 0) X X#else X X/* Hope this function will be intrinsic (Microsoft C). X Ideally this memcmp should be right-to-left, but this works X fast enough. X*/ X#define MATCHING m = memcmp(key, pattern, match_length + 1) X#define NOT_YET (m != 0) X X#endif X X#ifdef FAST X/* Simple inline replacement for get_next_match; they match Xliterally except return --> goto quote(leave)l. No obfuscations !! */ X X#ifdef __STDC__ X#define LEAVE(num) leave##num X#else X#define LEAVE(num) leave/**/num X#endif X X#define Get_Next_Match(r,l) {register u_short p=r;register int m;\ Xregister uchar *key FIX_SI, *pattern FIX_DI;key=text_buf+r;\ Xmatch_length=0;do{ do{ if((p=nextof(p))==NIL)goto LEAVE(l);\ Xpattern=text_buf+p;MATCHING;}while NOT_YET;\ Xfor(m=match_length;++m'makefile' <<'END_OF_FILE' XSHELL = /bin/sh XDEST = /usr/local/bin XMANDEST = /usr/local/man/man1 XEXTHDRS = XHDRS = bitio.h\ X compat.h\ X freeze.h\ X huf.h\ X lz.h\ X patchlevel.h XLDFLAGS = XLIBS = -lc_s # shared library X XCC = gcc X XCFLAGS = -DBITS=18 -O -DCOMPAT -DFAST -fstrength-reduce #-DBSD42 -DSUN4 X XLINTFLAGS = -DBITS=15 -DCOMPAT -DDEBUG -DGATHER_STAT -x -DFAST X XMAKEFILE = makefile X XOBJS = bitio.o\ X debug.o\ X decode.o\ X default.o\ X encode.o\ X freeze.o\ X huf.o\ X lz.o X XPROGRAM = freeze X XCATMAN = freeze.man X XMAN = freeze.1 X XSRCS = bitio.c\ X debug.c\ X decode.c\ X default.c\ X encode.c\ X freeze.c\ X huf.c\ X lz.c X Xall: $(PROGRAM) statist $(CATMAN) X Xlint: $(SRCS) X lint $(LINTFLAGS) $(SRCS) > lint.out X X$(PROGRAM): $(OBJS) X $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $(PROGRAM) X Xstatist: statist.o lz.o X $(CC) $(CFLAGS) $(LDFLAGS) -o statist statist.o lz.o $(LIBS) X Xclean:; rm -f *.o *.b .,* core *.out $(PROGRAM) statist X Xinstall: $(DEST)/$(PROGRAM) $(MANDEST)/$(MAN) X Xpatch:; rm -f patch.out X -for i in ../distribution/* ; do \ X (diff -c $$i `basename $$i` >> patch.out); \ X done X X$(DEST)/$(PROGRAM): $(PROGRAM) X install -s -c $(PROGRAM) $(DEST) X ln -f $(DEST)/$(PROGRAM) $(DEST)/melt X ln -f $(DEST)/$(PROGRAM) $(DEST)/fcat X X$(MANDEST)/$(MAN): $(CATMAN) X cp $(CATMAN) $(MANDEST)/$(MAN) X chmod +r $(MANDEST)/$(MAN) X ln -f $(MANDEST)/$(MAN) $(MANDEST)/melt.1 X ln -f $(MANDEST)/$(MAN) $(MANDEST)/fcat.1 X X$(CATMAN): $(MAN) X nroff -man < $(MAN) > $(CATMAN) X X### Xbitio.o: freeze.h compat.h bitio.h Xdebug.o: freeze.h compat.h huf.h bitio.h Xdecode.o: freeze.h compat.h huf.h bitio.h Xdefault.o: freeze.h compat.h Xencode.o: freeze.h compat.h lz.h huf.h bitio.h Xfreeze.o: freeze.h compat.h lz.h huf.h patchlevel.h Xhuf.o: freeze.h compat.h huf.h bitio.h Xlz.o: freeze.h compat.h lz.h Xstatist.o: freeze.h compat.h lz.h END_OF_FILE if test 1986 -ne `wc -c <'makefile'`; then echo shar: \"'makefile'\" unpacked with wrong size! fi # end of 'makefile' fi if test -f 'patchlevel.h' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'patchlevel.h'\" else echo shar: Extracting \"'patchlevel.h'\" \(49 characters\) sed "s/^X//" >'patchlevel.h' <<'END_OF_FILE' X#define PATCHLEVEL 0 X#define PATCHDATE "11/1/91" END_OF_FILE if test 49 -ne `wc -c <'patchlevel.h'`; then echo shar: \"'patchlevel.h'\" unpacked with wrong size! fi # end of 'patchlevel.h' fi if test -f 'statist.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'statist.c'\" else echo shar: Extracting \"'statist.c'\" \(5888 characters\) sed "s/^X//" >'statist.c' <<'END_OF_FILE' X#include "freeze.h" X#include "lz.h" X X/* This program calculates the distribution of the matched strings' Xpositions and lengths using nearly the same code as `freeze'. X*/ X X#define N_POS 62 X#define T (N_POS * 2 - 1) X#define R (T - 1) X X#define update(c) (freq[c]++) X Xlong in_count, bytes_out; X Xlong indc_count; Xshort reduceflag = 0, greedy = 0; X Xint lens[F2+1]; X Xu_short bits[9]; X Xshort prnt[T]; Xunsigned long freq[T]; Xshort used[T]; X Xvoid freeze(), StartHuff(); X X#ifndef INT_SIG Xvoid X#endif Xgiveres(); X Xint main(argc, argv) char ** argv; { X argv++; X while (argc > 1) { X if (**argv == '-') { X if ((*argv)[1] == 'g') { X argc--; argv++; X greedy++; X } else X break; X } else X break; X } X if(argc != 1) { X fprintf(stderr, "Usage: statist [-g] < sample_file\n"); X fprintf(stderr, "Press INTR to display current values\n"); X exit(0); X } X signal(SIGINT, giveres); X X#ifdef MSDOS X setmode(fileno(stdin), O_BINARY); /* Oh this MS-DOS ... */ X#endif /* MSDOS */ X X freeze(); X giveres(); X return 0; X} X Xunsigned long isqrt(val) Xunsigned long val; X{ X unsigned long result = 0; X unsigned long side = 0; X unsigned long left = 0; X int digit = 0; X int i; X for (i=0; i> 30); X val <<= 2; X if (left >= side*2 + 1) X { X left -= side*2+1; X side = (side+1)*2; X result <<= 1; X result |= 1; X } X else X { X side *= 2; X result <<= 1; X } X } X return result; X} X X X/* Prints the (current) values of tunable parameters. Uncertainty is Xthe number of missequencings (algorithm assumes the probabilities Xof references decrease uniformly when distance increases). Ideally Xit should be 0, but somewhat about 5 or less denotes the given 8 values Xcould improve the compression rate when using them. X*/ X X#ifndef INT_SIG Xvoid X#endif Xgiveres() { X u_short c; X register int i, j, k, pr, f, average, sum; X unsigned long cumul, sigma2; X short r, percent; X signal(SIGINT, giveres); X newtry: X StartHuff(N_POS); X pr = f = 0; X i = N_POS; X r = N_POS * 2 - 2; X while (i <= r) { X j = findmin(i); X k = findmin(i); X freq[i] = freq[j] + freq[k]; X prnt[j] = prnt[k] = i++; X } X X for (c = 1; c <= 6; c++) bits[c] = 0; X X for(c = 0; c < N_POS; c++) { X j = 0; X k = c; X do j++; while ((k = prnt[k]) != r); X if (j <= 6) X bits[j]++; X if (j < pr) X f += pr - j; X pr = j; X } X X k = bits[1] + bits[2] + bits[3] + bits[4] + X bits[5] + bits[6]; X X k = 62 - k; /* free variable length codes for 7 & 8 bits */ X X j = 128 * bits[1] + 64 * bits[2] + 32 * bits[3] + X 16 * bits[4] + 8 * bits[5] + 4 * bits[6]; X X j = 256 - j; /* free byte images for these codes */ X X/* Equation: X bits[7] + bits[8] = k X 2 * bits[7] + bits[8] = j X*/ X j -= k; X if (j < 0 || k < j) { X printf("Huffman tree has more than 8 levels, reducing...\n"); X for (i = 0; i < N_POS; i++) X if (!freq[i]) X freq[i] = 1; X else if (reduceflag) X freq[i] = (freq[i] + 1) / 2; X reduceflag = 1; X goto newtry; X } else { X bits[7] = j; X bits[8] = k - j; X printf("%d %d %d %d %d %d %d %d (uncertainty = %d)\n", X bits[1], bits[2], bits[3], bits[4], X bits[5], bits[6], bits[7], bits[8], f); X } X sum = 0; cumul = 0; X for(i = 3; i <= F2; i++) { X cumul += (unsigned long) i * lens[i]; X sum += lens[i]; X } X sum || sum++; X printf("Average match length: %d.%02d\n", X average = cumul / sum, i = cumul * 100 / sum % 100); X if (i >= 50) average++; X j = sum; X percent = 0; X for (i = F2; i >= 3; i--) { X static pcs[] = { 999, 995, 990, 970, 950, 900, 800, 700, 500 }; X j -= lens[i]; X newpcs: X if (j <= sum * pcs[percent] / 1000) { X printf("Percentile %d.%d: %d\n", X pcs[percent] / 10, pcs[percent] % 10, i); X if (percent == sizeof(pcs)/sizeof(int) - 1) X break; X else { X percent++; X goto newpcs; X } X } X } X for (sigma2 = 0, i = 3; i <= F2; i++) X sigma2 += (unsigned long)(i - average)*(i - average)*lens[i]; X sigma2 = sigma2 * 100 / sum; X j = (int)isqrt(sigma2); X printf("Sigma: %d.%1d\n", j / 10, j % 10); X fflush(stdout); X} X X Xvoid freeze () X{ X register u_short i, len, r, s; X register short c; X StartHuff(0); X InitTree(); X s = 0; X r = N2 - F2; X for (i = s; i < r; i++) X text_buf[i] = ' '; X for (len = 0; len < F2 && (c = getchar()) != EOF; len++) X text_buf[r + len] = c; X in_count = len; X for (i = 0; i <= F2; i++) X InsertNode(r + i - F2); X while (len != 0) { X Get_Next_Match(r,1); X if (match_length > len) X match_length = len; X if (match_length <= THRESHOLD) { X match_length = 1; X } else if (greedy) { X lens[match_length] ++; X update((u_short)match_position >> 7); X } else { X register u_short orig_length, orig_position; X orig_length = match_length; X orig_position = match_position; X DeleteNode(s); X Next_Char(N2, F2); X Get_Next_Match(r,2); X if (match_length > len) match_length = len; X if (orig_length > match_length) { X lens[orig_length] ++; X update((u_short)orig_position >> 7); X match_length = orig_length - 1; X } else { X lens[match_length] ++; X update(match_position >> 7); X } X } X for (i = 0; i < match_length && X (c = getchar()) != EOF; i++) { X DeleteNode(s); X text_buf[s] = c; X if (s < F2 - 1) X text_buf[s + N2] = c; X s = (s + 1) & (N2 - 1); X r = (r + 1) & (N2 - 1); X InsertNode(r); X } X in_count += i; X if ((in_count > indc_count)) { X fprintf(stderr, "%5dK\b\b\b\b\b\b", in_count / 1024); X fflush (stderr); X indc_count += 4096; X } X while (i++ < match_length) { X DeleteNode(s); X s = (s + 1) & (N2 - 1); X r = (r + 1) & (N2 - 1); X if (--len) InsertNode(r); X } X } X} X Xvoid StartHuff(beg) { X int i; X for (i = beg; i < N_POS * 2 - 1; i++) X freq[i] = 0; X for (i = 0; i < N_POS * 2 - 1; i++) X used[i] = prnt[i] = 0; X} X Xint findmin(range) { X long min = (1 << 30) - 1, argmin = -1, i; X for (i = 0; i < range; i++) { X if(!used[i] && freq[i] < min) X min = freq[argmin = i]; X } X used[argmin] = 1; X return argmin; X} END_OF_FILE if test 5888 -ne `wc -c <'statist.c'`; then echo shar: \"'statist.c'\" unpacked with wrong size! fi # end of 'statist.c' fi echo shar: End of archive 2 \(of 2\). cp /dev/null ark2isdone MISSING="" for I in 1 2 ; do if test ! -f ark${I}isdone ; then MISSING="${MISSING} ${I}" fi done if test "${MISSING}" = "" ; then echo You have unpacked both archives. rm -f ark[1-9]isdone else echo You still must unpack the following archives: echo " " ${MISSING} fi exit 0 exit 0 # Just in case... -- Kent Landfield INTERNET: kent@sparky.IMD.Sterling.COM Sterling Software, IMD UUCP: uunet!sparky!kent Phone: (402) 291-8300 FAX: (402) 291-4362 Please send comp.sources.misc-related mail to kent@uunet.uu.net.