From pa.dec.com!nntpd.lkg.dec.com!decuac!haven.umd.edu!ames!think.com!samsung!uunet!sparky!kent Sun Jun 30 15:24:01 PDT 1991 Article: 2461 of comp.sources.misc Path: pa.dec.com!nntpd.lkg.dec.com!decuac!haven.umd.edu!ames!think.com!samsung!uunet!sparky!kent From: leo@s514.ipmce.su (Leonid A. Broukhis) Newsgroups: comp.sources.misc Subject: v20i070: Freeze/Melt compression program, Patch03 Message-ID: <1991Jun27.172727.570@sparky.IMD.Sterling.COM> Date: 27 Jun 91 17:27:27 GMT Sender: kent@sparky.IMD.Sterling.COM (Kent Landfield) Organization: Inst. of Prec. Mech. & Comp. Equip., Moscow, USSR Lines: 834 Approved: kent@sparky.imd.sterling.com X-Md4-Signature: 6c078afae3add3d77e3d5449b4403aec Submitted-by: Leonid A. Broukhis Posting-number: Volume 20, Issue 70 Archive-name: freeze/patch03 Patch-To: freeze: Volume 17, Issue 67-68 Here is the 3rd patch for Freeze/melt program (bug fixes, some compression speedup, etc.) but I think that Brad Templeton's compression program will bury my Freeze out. This patch is probably last (if no more bugs will be found). ------------------ cut here --------------------- *** ../distribution/README Wed Mar 27 19:44:41 1991 --- README Tue Jun 25 13:05:32 1991 *************** *** 151,157 **** Compression rate is *INDEPENDENT* of the hash table size, but may vary with different static Huffman tables. It is about 2% worse than the same ! of ARJ and LHA. My aim is not the maximum compression, the same range is enough. :-) --- 151,158 ---- Compression rate is *INDEPENDENT* of the hash table size, but may vary with different static Huffman tables. It is about 2% worse than the same ! of ARJ 1.00 and LHA 2.05, but, unfortunately, ARJ 2.00 beats Freeze ! on 8%. My aim is not the maximum compression, the same range is enough. :-) *************** *** 166,168 **** --- 167,172 ---- Encode/Decode_Char/Position), so if you want the speed and/or compression rate `a la vogue' you may replace the low-level routines with the homebrew (f. ex.) ones and enjoy the results. + + (I tried to implement splay trees instead of Huffman ones and instead of + static table for position information, but this gives nothing.) *** ../distribution/bitio.c Mon May 13 17:05:09 1991 --- bitio.c Thu Jun 13 21:15:14 1991 *************** *** 6,15 **** short GetByte () { ! register u_short dx = getbuf; register u_short c; ! if (getlen <= 8) { c = getchar (); if ((short)c < 0) { --- 6,15 ---- short GetByte () { ! register u_short dx = bitbuf; register u_short c; ! if (bitlen <= 8) { c = getchar (); if ((short)c < 0) { *************** *** 22,32 **** corrupt_flag = 1; c = 0; } ! dx |= c << (8 - getlen); ! getlen += 8; } ! getbuf = dx << 8; ! getlen -= 8; return (dx >> 8) & 0xff; } --- 22,32 ---- corrupt_flag = 1; c = 0; } ! dx |= c << (8 - bitlen); ! bitlen += 8; } ! bitbuf = dx << 8; ! bitlen -= 8; return (dx >> 8) & 0xff; } *************** *** 36,42 **** short GetNBits (n) register u_short n; { ! register u_short dx = getbuf; register u_short c; static u_short mask[17] = { --- 36,42 ---- short GetNBits (n) register u_short n; { ! register u_short dx = bitbuf; register u_short c; static u_short mask[17] = { *************** *** 50,56 **** 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; ! if (getlen <= 8) { c = getchar (); if ((short)c < 0) { --- 50,56 ---- 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; ! if (bitlen <= 8) { c = getchar (); if ((short)c < 0) { *************** *** 59,69 **** corrupt_flag = 1; c = 0; } ! dx |= c << (8 - getlen); ! getlen += 8; } ! getbuf = dx << n; ! getlen -= n; return (dx >> shift[n]) & mask[n]; } --- 59,69 ---- corrupt_flag = 1; c = 0; } ! dx |= c << (8 - bitlen); ! bitlen += 8; } ! bitbuf = dx << n; ! bitlen -= n; return (dx >> shift[n]) & mask[n]; } *************** *** 72,79 **** register u_short l; u_short c; { ! register u_short len = putlen; ! register u_short b = putbuf; b |= c >> len; if ((len += l) >= 8) { putchar ((int)(b >> 8)); --- 72,79 ---- register u_short l; u_short c; { ! register u_short len = bitlen; ! register u_short b = bitbuf; b |= c >> len; if ((len += l) >= 8) { putchar ((int)(b >> 8)); *************** *** 89,94 **** } if (ferror(stdout)) writeerr(); ! putbuf = b; ! putlen = len; } --- 89,94 ---- } if (ferror(stdout)) writeerr(); ! bitbuf = b; ! bitlen = len; } *** ../distribution/encode.c Mon May 13 17:05:10 1991 --- encode.c Thu Jun 13 22:11:35 1991 *************** *** 1,5 **** #include "freeze.h" - #include "lz.h" /* --- 1,4 ---- *************** *** 91,99 **** if (verbose) { register short pos = (r - 1 - match_position) & (N - 1), ! len = match_length; fputc('"', stderr); ! for(;len;len--, pos++) fprintf(stderr, "%s", pr_char(text_buf[pos])); fprintf(stderr, "\"\n"); --- 90,98 ---- if (verbose) { register short pos = (r - 1 - match_position) & (N - 1), ! leng = match_length; fputc('"', stderr); ! for(; leng; leng--, pos++) fprintf(stderr, "%s", pr_char(text_buf[pos])); fprintf(stderr, "\"\n"); *** ../distribution/freeze.c Mon May 13 17:05:11 1991 --- freeze.c Fri Jun 7 18:59:17 1991 *************** *** 25,30 **** --- 25,31 ---- * are now separated. * Version 2.2: Tunable static Huffman table for position information, * this info may be given in the command string now. + * Version 2.2.3: Bug fixes, 10% freezing speedup. */ #ifdef COMPAT *************** *** 510,517 **** else if (debug == 0) melt(); else printcodes(); #endif /* DEBUG */ if(topipe == 0) { ! copystat(*fileptr, ofname); /* Copy stats */ if((exit_stat == 1) || (!quiet)) putc('\n', stderr); } --- 511,523 ---- else if (debug == 0) melt(); else printcodes(); #endif /* DEBUG */ + + /* check output status, and close to make sure data is written */ + if ( ferror(stdout) || fclose(stdout) < 0 ) + writeerr(); + if(topipe == 0) { ! copystat(*fileptr); /* Copy stats */ if((exit_stat == 1) || (!quiet)) putc('\n', stderr); } *************** *** 563,568 **** --- 569,575 ---- } } exit(exit_stat); + /*NOTREACHED*/ } long in_count = 1; /* length of input */ *************** *** 612,619 **** exit ( 1 ); } ! copystat(ifname, ofname) ! char *ifname, *ofname; { #ifdef __TURBOC__ struct ftime utimbuf; --- 619,626 ---- exit ( 1 ); } ! copystat(ifname) ! char *ifname; { #ifdef __TURBOC__ struct ftime utimbuf; *************** *** 660,666 **** perror(ofname); #ifndef MSDOS ! chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */ #endif #ifdef __TURBOC__ --- 667,674 ---- perror(ofname); #ifndef MSDOS ! /* Copy ownership */ ! chown(ofname, (int) statbuf.st_uid, (int) statbuf.st_gid); #endif #ifdef __TURBOC__ *** ../distribution/huf.c Mon May 13 18:54:10 1991 --- huf.c Thu Jun 13 21:15:14 1991 *************** *** 7,20 **** /* */ /*----------------------------------------------------------------------*/ - #ifdef COMPAT - #undef N - #undef F - #define F (new_flg ? _F : _FO) - #define N (new_flg ? _NN : _NO) - #endif - - /* TABLE OF ENCODE/DECODE for upper 6 bits position information */ /* The contents of this table are used for freezing only, so we use --- 7,12 ---- *************** *** 40,53 **** short son[_T]; /* points to son node (son[i],son[i+]) */ ! u_short getbuf = 0; ! uchar getlen = 0; uchar corrupt_flag = 0; /* If a file is corrupt, use fcat */ - u_short putbuf = 0; - uchar putlen = 0; - /* Initialize tree */ StartHuff () --- 32,42 ---- short son[_T]; /* points to son node (son[i],son[i+]) */ ! u_short bitbuf = 0; ! uchar bitlen = 0; uchar corrupt_flag = 0; /* If a file is corrupt, use fcat */ /* Initialize tree */ StartHuff () *************** *** 80,87 **** #ifdef DEBUG symbols_out = refers_out = 0; #endif ! putlen = getlen = 0; ! putbuf = getbuf = 0; corrupt_flag = 0; } --- 69,76 ---- #ifdef DEBUG symbols_out = refers_out = 0; #endif ! bitlen = 0; ! bitbuf = 0; corrupt_flag = 0; } *************** *** 218,225 **** EncodeEnd () { ! if (putlen) { ! putchar((int)(putbuf >> 8)); bytes_out++; if (ferror(stdout)) writeerr(); --- 207,214 ---- EncodeEnd () { ! if (bitlen) { ! putchar((int)(bitbuf >> 8)); bytes_out++; if (ferror(stdout)) writeerr(); *************** *** 228,243 **** short DecodeChar () { ! register u_short c; ! register u_short dx; ! register u_short cc; c = son[_R]; /* trace from root to leaf, got bit is 0 to small(son[]), 1 to large (son[]+1) son node */ while (c < _T) { ! dx = getbuf; ! if (getlen <= 8) { if ((short)(cc = getchar()) < 0) { if (corrupt_flag) { corrupt_message(); --- 217,230 ---- short DecodeChar () { ! register u_short c, cc, dx; c = son[_R]; /* trace from root to leaf, got bit is 0 to small(son[]), 1 to large (son[]+1) son node */ while (c < _T) { ! dx = bitbuf; ! if (bitlen <= 8) { if ((short)(cc = getchar()) < 0) { if (corrupt_flag) { corrupt_message(); *************** *** 246,256 **** corrupt_flag = 1; cc = 0; } ! dx |= cc << (8 - getlen); ! getlen += 8; } ! getbuf = dx << 1; ! getlen--; c += (dx >> 15) & 1; c = son[c]; } --- 233,243 ---- corrupt_flag = 1; cc = 0; } ! dx |= cc << (8 - bitlen); ! bitlen += 8; } ! bitbuf = dx << 1; ! bitlen--; c += (dx >> 15) & 1; c = son[c]; } *************** *** 321,328 **** /* trace from root to leaf, got bit is 0 to small(son[]), 1 to large (son[]+1) son node */ while (c < _TO) { ! dx = getbuf; ! if (getlen <= 8) { if ((short)(cc = getchar()) < 0) { if (corrupt_flag) { corrupt_message(); --- 308,315 ---- /* trace from root to leaf, got bit is 0 to small(son[]), 1 to large (son[]+1) son node */ while (c < _TO) { ! dx = bitbuf; ! if (bitlen <= 8) { if ((short)(cc = getchar()) < 0) { if (corrupt_flag) { corrupt_message(); *************** *** 331,341 **** corrupt_flag = 1; cc = 0; } ! dx |= cc << (8 - getlen); ! getlen += 8; } ! getbuf = dx << 1; ! getlen--; c += (dx >> 15) & 1; c = son[c]; } --- 318,328 ---- corrupt_flag = 1; cc = 0; } ! dx |= cc << (8 - bitlen); ! bitlen += 8; } ! bitbuf = dx << 1; ! bitlen--; c += (dx >> 15) & 1; c = son[c]; } *** ../distribution/huf.h Wed Mar 27 19:45:04 1991 --- huf.h Thu Jun 13 21:15:15 1991 *************** *** 1,9 **** ! extern u_short getbuf; ! extern uchar getlen; extern uchar corrupt_flag; - - extern u_short putbuf; - extern uchar putlen; #define MAX_FREQ (u_short)0x8000 /* tree update timing from frequency */ --- 1,6 ---- ! extern u_short bitbuf; ! extern uchar bitlen; extern uchar corrupt_flag; #define MAX_FREQ (u_short)0x8000 /* tree update timing from frequency */ *** ../distribution/lz.c Mon May 13 15:12:09 1991 --- lz.c Fri Jun 7 18:48:53 1991 *************** *** 18,28 **** hash_t prev[N + 1]; #ifndef __XENIX__ - u_short #ifdef __TURBOC__ ! huge ! #endif ! next[array_size]; #else #if parts == 2 u_short next0[32768], next1[8193]; --- 18,28 ---- hash_t prev[N + 1]; #ifndef __XENIX__ #ifdef __TURBOC__ ! u_short huge * next = (u_short huge *) NULL; ! #else ! u_short next[array_size]; ! #endif /* TURBOC */ #else #if parts == 2 u_short next0[32768], next1[8193]; *************** *** 66,71 **** --- 66,80 ---- node_steps = node_matches = 0; #endif + #ifdef __TURBOC__ + if (!next && (next = (u_short huge *) + farmalloc(array_size * sizeof(u_short))) == NULL) { + fprintf(stderr, "Not enough memory (%dK wanted)\n", + array_size * sizeof(u_short) / 1024); + exit(3); + } + #endif + for (i = N + 1; i < array_size; i++ ) nextof(i) = NIL; for (i = 0; i < sizeof(prev)/sizeof(*prev); i++ ) *************** *** 76,119 **** Get_Next_Match (r) u_short r; { ! register uchar *key; ! register u_short m, i, p; #ifdef GATHER_STAT node_matches++; #endif - key = &text_buf[p = r]; - m = 0; - while (m < _F) { - if ((p = nextof(p)) == NIL) { - match_length = m; - return; - } ! /* This statement is due to ideas of Boyer and Moore: */ ! if(key[m] != text_buf[p + m]) ! continue; ! /* This statement is due to my ideas: :-) */ ! /* It gives up to 8% speedup on files with big redundancy (text, etc.) */ ! if(key[m >> 1] != text_buf[p + (m >> 1)]) ! continue; #ifdef GATHER_STAT node_steps++; #endif ! /* This statement doesn't take a lot of execution time - ! about 20% (in profiling we trust) ! */ ! for (i = 0; i < _F && key[i] == text_buf[p + i]; i++); ! ! if (i > m) { ! match_position = ((r - p) & (N - 1)) - 1; ! m = i; ! } ! } #ifdef DEBUG if (verbose) fprintf(stderr, "Replacing node: %d -> %d\n", p, r); --- 85,121 ---- Get_Next_Match (r) u_short r; { ! register u_short p = r; ! register int m; ! register uchar *key = text_buf + r, *pattern; #ifdef GATHER_STAT node_matches++; #endif ! match_length = 0; ! do { ! do { ! if ((p = nextof(p)) == NIL) ! return; ! pattern = text_buf + p; ! /* This statement is due to ideas of Boyer and Moore: */ ! for (m = match_length; ! m >= 0 && key[m] == pattern[m]; m--); ! } while (m >= 0); #ifdef GATHER_STAT node_steps++; #endif ! for (m = match_length; ! ++m < _F && key[m] == pattern[m]; ); ! ! match_length = m; ! match_position = ((r - p) & (N - 1)) - 1; ! } while (m < _F); #ifdef DEBUG if (verbose) fprintf(stderr, "Replacing node: %d -> %d\n", p, r); *************** *** 121,125 **** nextof(prev[p]) = nextof(p); prev[nextof(p)] = prev[p]; prev[p] = NIL; /* remove p, it is further than r */ - match_length = _F; } --- 123,126 ---- *** ../distribution/lz.h Mon May 13 17:05:12 1991 --- lz.h Tue Jun 25 12:40:25 1991 *************** *** 36,46 **** #ifndef __XENIX__ #define nextof(i) next[i] - extern u_short #ifdef __TURBOC__ ! huge ! #endif ! next[]; #else #define parts (array_size/32768 + 1) #define nextof(i) next[(i) >> 15][(i) & 0x7fff] --- 36,46 ---- #ifndef __XENIX__ #define nextof(i) next[i] #ifdef __TURBOC__ ! extern u_short huge * next; ! #else ! extern u_short next[]; ! #endif /* TURBOC */ #else #define parts (array_size/32768 + 1) #define nextof(i) next[(i) >> 15][(i) & 0x7fff] *** ../distribution/makefile Mon May 13 17:05:13 1991 --- makefile Tue Jun 25 12:26:33 1991 *************** *** 1,3 **** --- 1,4 ---- + SHELL = /bin/sh DEST = /usr/local/bin MANDEST = /usr/local/man/cat1 EXTHDRS = *************** *** 9,15 **** CC = gcc ! CFLAGS = -DBITS=18 -O -DCOMPAT -fstrength-reduce #-DBSD42 -DSUN4 LINTFLAGS = -DBITS=15 -DCOMPAT -DDEBUG -DGATHER_STAT -x --- 10,16 ---- CC = gcc ! CFLAGS = -DBITS=18 -O -fstrength-reduce #-DBSD42 -DSUN4 LINTFLAGS = -DBITS=15 -DCOMPAT -DDEBUG -DGATHER_STAT -x *************** *** 42,48 **** all: $(PROGRAM) statist $(CATMAN) lint: $(SRCS) ! lint $(LINTFLAGS) $(SRCS) $(LIBS) $(PROGRAM): $(OBJS) $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $(PROGRAM) --- 43,49 ---- all: $(PROGRAM) statist $(CATMAN) lint: $(SRCS) ! lint $(LINTFLAGS) $(SRCS) > lint.out $(PROGRAM): $(OBJS) $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $(PROGRAM) *************** *** 50,58 **** statist: statist.c freeze.h lz.h huf.h $(CC) $(CFLAGS) -UCOMPAT $(LDFLAGS) -o statist statist.c $(LIBS) ! clean:; rm -f *.o *.b .,* core a.out $(PROGRAM) statist install: $(DEST)/$(PROGRAM) $(MANDEST)/$(MAN) $(DEST)/$(PROGRAM): $(PROGRAM) install -s -c $(PROGRAM) $(DEST) --- 51,64 ---- statist: statist.c freeze.h lz.h huf.h $(CC) $(CFLAGS) -UCOMPAT $(LDFLAGS) -o statist statist.c $(LIBS) ! clean:; rm -f *.o *.b .,* core *.out $(PROGRAM) statist install: $(DEST)/$(PROGRAM) $(MANDEST)/$(MAN) + + patch:; rm -f patch.out + -for i in ../distribution/* ; do \ + (diff -c $$i `basename $$i` >> patch.out); \ + done $(DEST)/$(PROGRAM): $(PROGRAM) install -s -c $(PROGRAM) $(DEST) *** ../distribution/patchlevel.h Mon May 13 17:05:13 1991 --- patchlevel.h Tue Jun 4 20:12:31 1991 *************** *** 1 **** ! #define PATCHLEVEL 2 --- 1 ---- ! #define PATCHLEVEL 3 *** ../distribution/statist.c Mon May 13 15:12:12 1991 --- statist.c Tue May 14 21:18:45 1991 *************** *** 69,78 **** #endif giveres() { u_short c; signal(SIGINT, giveres); for(c = 0; c < 62; c++) { register short *p; - register int j, k; j = 0; p = prnt; --- 69,78 ---- #endif giveres() { u_short c; + register int j, k; signal(SIGINT, giveres); for(c = 0; c < 62; c++) { register short *p; j = 0; p = prnt; *************** *** 79,92 **** k = p[c + T]; do j++; while ((k = p[k]) != R) ; ! if (j <= 8) bits[j]++; } ! printf("%d %d %d %d %d %d %d %d\n", ! bits[1], bits[2], bits[3], bits[4], ! bits[5], bits[6], bits[7], bits[8]); fflush(stdout); ! for (c = 1; c <= 8; c++) bits[c] = 0; } freeze () --- 79,114 ---- k = p[c + T]; do j++; while ((k = p[k]) != R) ; ! if (j <= 6) bits[j]++; } ! ! k = bits[1] + bits[2] + bits[3] + bits[4] + ! bits[5] + bits[6]; ! ! k = 62 - k; /* free variable length codes for 7 & 8 bits */ ! ! j = 128 * bits[1] + 64 * bits[2] + 32 * bits[3] + ! 16 * bits[4] + 8 * bits[5] + 4 * bits[6]; ! ! j = 256 - j; /* free byte images for these codes */ ! ! /* Equation: ! bits[7] + bits[8] = k ! 2 * bits[7] + bits[8] = j ! */ ! j -= k; ! if (j < 0 || k < j) { ! printf("Not enough information\n"); ! } else { ! bits[7] = j; ! bits[8] = k - j; ! printf("%d %d %d %d %d %d %d %d\n", ! bits[1], bits[2], bits[3], bits[4], ! bits[5], bits[6], bits[7], bits[8]); ! } fflush(stdout); ! for (c = 1; c <= 6; c++) bits[c] = 0; } freeze () ------------------ cut here --------------------- -- Leonid A. Broukhis | 89-1-95 Liberty St. | "BROUKHIS" is Hebrew for 7+095 494.6241 (h) | Moscow 123481 USSR | "BENEDICTAE" 7+095 132.9475 (o) | (leo@s514.ipmce.su) | {Licet omnia qualibet dicas} exit 0 # Just in case... -- Kent Landfield INTERNET: kent@sparky.IMD.Sterling.COM Sterling Software, IMD UUCP: uunet!sparky!kent Phone: (402) 291-8300 FAX: (402) 291-4362 Please send comp.sources.misc-related mail to kent@uunet.uu.net.