/*
 * The combine utility is a product of Harris, Inc. and is provided for
 * unrestricted use provided that this legend is included on all tape
 * media and as a part of the software program in whole or part.  Users
 * may copy, modify, license or distribute the combine utility without charge.
 * 
 * THE COMBINE UTILITY IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND
 * INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE
 * PRACTICE.
 * 
 * The combine utility is provided with no support and without any obligation
 * on the part of Harris, Inc. to assist in its use, correction,
 * modification or enhancement.
 * 
 * HARRIS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
 * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY THE COMBINE
 * UTILITY OR ANY PART THEREOF.
 * 
 * In no event will Harris, Inc. be liable for any lost revenue
 * or profits or other special, indirect and consequential damages, even if
 * Harris has been advised of the possibility of such damages.
 * 
 * Harris Computer Systems Division
 * 2101 W Cypress Creek Rd
 * Fort Lauderdale, Florida 33309
 */
#ifdef ALLOC
#   define EXTERN
#   define INIT( _x ) = _x
#else
#   define EXTERN extern
#   define INIT( _x )
#endif

/* 
 * Some OS dependent stuff
 */
#define rfa_type long

#define SZ$_FILE_NAME 256
#ifdef VOS	/* 24-bit word */
#define HI7       077400000	/* Hi 7 bits of a word */
#else		/* 32-bit word */
#define HI7       0xFE000000	/* Hi 7 bits of a word */
#endif

/*
 * Cache Entry:
 *
 * This structure describes a single entry in the cache of lines.
 * The cache is organized as a linked list of cache entries.
 * The entries at the front of the list are the most recently accessed.
 * The variables 'cache_head_ptr' and 'cache_tail_ptr' point to the
 * head and the tail of the cache.
 */

struct cache_entry_struct;
typedef struct cache_entry_struct       cache_entry_type;

struct cache_entry_struct {

	cache_entry_type * cache_next_ptr;/* Link to the next cache entry */

	cache_entry_type * cache_prev_ptr;/* Link to the prev cache entry */

	int     hash_code;	/* The hash code for the line. A a value of
				   HASH_FREE_ENTRY indicates this is a free
				   cache entry. */

	int     record_length;	/* Length of the record */

	char    *recordp;	/* Actual record contents. */

	int	record_alen;	/* Allocated length of recordp buffer */

};

#define CACHE_ENTRIES 500	/* Total number of cache entries */
EXTERN cache_entry_type * cache_head_ptr;/* Head of the cache linked list. */
EXTERN cache_entry_type * cache_tail_ptr;/* Head of the cache linked list. */


#define LINE_LENGTH 135		/* Default line length */

/* Maximum number of characters in a record */
/*
 * record_type:
 *
 * This structure describes a single record in a file.
 * The 'file_type' structure points to an array of these entries.
 */

struct record_struct;
typedef struct record_struct    record_type;

struct record_struct {

	rfa_type rfa;		/* The record's file address. This is an
				   operating system dependent value which is a
				   token which can be used to seek to the
				   specified record. */

/*
 * The 'value' field describes the relationship between this record and
 * another record in another file. Valid values are:
 *
 * Negative:        hash code for the record (See 'is_hash_code' macro).
 *                  A hash code is an index into the symbol table.
 * 0 or Positive:   index into record array of other file.
 *
 * The defines below are used to describes whether the 'value[0]' or
 * 'value[1]' field is used to describe a relationship between files.
 */

#define MAX_VALUE_SUB 2

	int     value[MAX_VALUE_SUB];
				/* Describes the relationship between this
				   record and another record in another file.
				*/


#define OLD_TO_NEW1         0	/* Old file: index into new1  */
#define OLD_TO_NEW2         1	/* Old file: index into new2  */
#define NEW1_TO_OLD         0	/* new1 file: index into old  */
#define NEW1_TO_NEW2        1	/* new1 file: index into new2 */
#define NEW2_TO_OLD         0	/* new2 file: index into old  */
#define NEW2_TO_NEW1        1	/* new2 file: index into new1 */

};

/*
 * Record index values:
 *
 * Record indexes include a special record at the beginning of the file
 * and a special record at the end of the file. These definitions describe
 * that phenomena.
 */

#define BEGIN_INDEX 0		/* Index of the dummy begin record */
#define DUMMY_RECORD_COUNT 2	/* Number of dummy records */

/*
 * File Description:
 *
 * This structure describes a single input file.
 * A structure of this type occurs for the 'old' file, 'new1' file,
 * and 'new2' file.
 */

struct file_struct;
typedef struct file_struct      file_type;

struct file_struct {

	char   *name_ptr;	/* Zero terminated name of file */

	char   *text_ptr;	/* Zero terminated text describing file */

	char   *lw_ptr;		/* Zero terminated last written date */

	FILE *  seq_fd;		/* fd to use for sequential access */

	FILE *  rnd_fd;		/* fd to use for random access */

	int     record_array_size;/* number of lines in the file (including
				     DUMMY_RECORD_COUNT). */

	int	record_array_alloc;/* number of allocated entries in the
				   record array. */

#define RA_ORIG	5000		/* Original # of records in record array */
#define RA_INCR 5000		/* Number of records to add on each increment */

	record_type * record;	/* Allocated array of record descriptions.
				   This field contains 0 if the file does not
				   exists. (i.e., this is the third file in a
				   two file comparison ) */

/*
 * The entry below is actually the portion of the symbol table which
 * needs an entry for each file. The array is indexed by 'hash_code'.
 *
 * Each index below is an index into the array for the specified file.
 * Valid values are:
 *
 * 0:                      This line is not in this file.
 * negative:               This line is not unique in the file.
 *                         Value is negative index to one of the records.
 * not negative:           This line occurs precisely once in the file.
 *                         Value is index to the record.
 */

	int    *sym_tab_index;	/* Index into 'record'. */
				/* There are 'sym_tab_size' elements in this array. */
};

#define OLD_FILE   0		/* Array index of 'old' file */
#define NEW1_FILE  1		/* Array index of 'new1' file */
#define NEW2_FILE  2		/* Array index of 'new2' file */
#define MAX_FILE_COUNT 3	/* Maximum number of files */

EXTERN int      file_count;	/* actual number of files */

EXTERN file_type files[MAX_FILE_COUNT];/* Description of the each file. */

/*
 * For each record, six different relationships exist. That is,
 * for each of the three files there is a relationship to each of the
 * other two files.
 * The tables below describe the six relationships.
 */

#define MATCH_COUNT ( 2*MAX_FILE_COUNT )

EXTERN int      curr_file[MATCH_COUNT]
#ifdef ALLOC
= {
	OLD_FILE, OLD_FILE, NEW1_FILE, NEW1_FILE, NEW2_FILE, NEW2_FILE
}
#endif
	       ;

/*Array of subsrcipts into the 'files' array of files which have
relationships */

EXTERN int      corres_file[MATCH_COUNT]
#ifdef ALLOC
= {
	NEW1_FILE, NEW2_FILE, OLD_FILE, NEW2_FILE, OLD_FILE, NEW1_FILE
}
#endif
	       ;
/* Array of subscripts into the 'files' array of the file which is related to */


EXTERN int      other_file[MATCH_COUNT]
#ifdef ALLOC
= {
	NEW2_FILE, NEW1_FILE, NEW2_FILE, OLD_FILE, NEW1_FILE, OLD_FILE
}
#endif
	       ;
 /* Array of subscripts into the 'files' array of the file which is not
    involved in the current relationship */

EXTERN int      value_sub[MATCH_COUNT]
#ifdef ALLOC
= {
	OLD_TO_NEW1, OLD_TO_NEW2, NEW1_TO_OLD, NEW1_TO_NEW2,
	NEW2_TO_OLD, NEW2_TO_NEW1
}
#endif
	       ;
 /* Array of subscripts to the 'value' array. This subscript identifies which
    of the two relationships are being tested. */

EXTERN int      rev_value_sub[MATCH_COUNT]
#ifdef ALLOC
= {
	NEW1_TO_OLD, NEW2_TO_OLD, OLD_TO_NEW1, NEW2_TO_NEW1,
	OLD_TO_NEW2, NEW1_TO_NEW2
}
#endif
	       ;
 /* Array of subscripts to the 'value' array. This subscript identifies the
    relationship between the 'corres' file and the 'curr' file */

EXTERN int      other_value_sub[MATCH_COUNT]
#ifdef ALLOC
= {
	NEW2_TO_OLD, NEW1_TO_OLD, NEW2_TO_NEW1, OLD_TO_NEW1,
	NEW1_TO_NEW2, OLD_TO_NEW2
}
#endif
	       ;
 /* Array of subscripts to the 'value' array. This subscript identifies the
    relationship between the 'other' file and the 'curr' file */


#define UNIQUE_MATCH_COUNT (MATCH_COUNT / 2)

EXTERN int      unique_match[UNIQUE_MATCH_COUNT]
#ifdef ALLOC
= {
	0, 1, 3
}
#endif
	       ;
 /* Array of subscripts into the relation arrays defined above. These are the
    subscripts of the relations pairing each pair of files precisely once. */
/*
 * is_hash_code:
 *
 * This macro determines if the value in the record array is a hash code
 * or an index into another file array. This macro relies on the fact
 * that all hash codes are nagetive.
 *
 * Return value:
 *      TRUE:  The value represents a hash code
 *      FALSE: The value represents an index into a file array.
 *
 * Parameter:
 *      value: The value from the file array.
 */

#define is_hash_code( _value )  ((_value) < 0)
/*
 * Options:
 */

EXTERN bool blank_compress INIT (FALSE);
				/* TRUE if blank compression is desired */

EXTERN bool blank_remove INIT (FALSE);/* TRUE if blank removal is desired */

EXTERN bool compress_records INIT (FALSE);
				/* TRUE if any record compression needs to
				   occur */

EXTERN int      prefix_lines INIT (5);/* Number of prefix lines */

EXTERN int      postfix_lines INIT (5);/* Number of postfix lines */

EXTERN bool quiet_option INIT (FALSE);
				/* TRUE if COMBINE is to be quiet if there are
				   no differences */

EXTERN bool pa_debug INIT (FALSE);/* TRUE for generic debugging */
EXTERN bool p1_debug INIT (FALSE);/* TRUE for debug of pass 1 */
EXTERN bool p2_debug INIT (FALSE);/* TRUE for debug of pass 2 */
EXTERN bool p3_debug INIT (FALSE);/* TRUE for debug of pass 3 */
EXTERN bool p4_debug INIT (FALSE);/* TRUE for debug of pass 4 */
EXTERN bool p5_debug INIT (FALSE);/* TRUE for debug of pass 5 */

EXTERN bool statistics_flag INIT (FALSE);/* TRUE to output statistics */

EXTERN bool hed_flag INIT (FALSE);/* TRUE to output hed file */

EXTERN char     exec_time[LINE_LENGTH];/* Begin execution time */

/*
 * Column specifications:
 */

#define MAX_COLUMNS 32		/* maximum number of column ranges */

EXTERN int      column_count INIT (0);/* Actual number of column ranges */

EXTERN int      first_column[MAX_COLUMNS];/* first column to compare */
				/* Column numbers are 0 relative */

EXTERN int      last_column[MAX_COLUMNS];/* last column to compare */
 				/* Column numbers are 0 relative */

/*
 * other_sub:
 *
 * This macro is given the subscript to one of the elements in the
 * 'value' array and returns the subscript to the other element.
 * This macro is heavily dependent on the fact that there are only
 * two elements in the value array.
 *
 * Return value:
 *      other subscript
 *
 * Parameter:
 *      value_sub: Subsrcipt into the value array.
 */

#define other_sub( _sub )  ( 1 - (_sub) )

/*
 * Primes: list of prime numbers.
 *
 * This array defines a set of prime numbers. For all multiples of 1024,
 * this table contains the prime number which is less than but closest
 * to that number.
 *
 * The list is terminated by a -1.
 */

EXTERN int      primes[]
#ifdef ALLOC
= {
	1021, 2039, 3067, 4093, 5119, 6143, 7159, 8191, 9209,
	10223, 11261, 12281, 13309, 14327, 15359, 16381, 17401, 18427,
	19447, 20479, 21503, 22511, 23549, 24571, 25589, 26597, 27647,
	28669, 29683, 30713, 31741, 32749, 33791, 34807, 35839, 36857,
	37879, 38903, 39929, 40949, 41983, 43003, 44029, 45053, 46073,
	47093, 48121, 49139, 50159, 51199, 52223, 53239, 54269, 55291,
	56311, 57331, 58367, 59387, 60413, 61417, 62459, 63487, 64499,
	65521, 66553, 67579, 68597, 69623, 70639, 71671, 72701, 73727,
	74747, 75773, 76781, 77813, 78839, 79867, 80863, 81919, 82939,
	83939, 84991, 86011, 87037, 88037, 89087, 90107, 91129, 92153,
	93179, 94207, 95231, 96233, 97259, 98299, 99317, 100343, 101363,
	102397, 103423, 104417, 105467, 106487, 107509, 108541, 109567,
	110587, 111611, 112621, 113657, 114679, 115693, 116731, 117757,
	118757, 119797, 120829, 121853, 122869, 123887, 124919, 125941,
	126967, 127997, 129023, 130043, 131071, 132071, 133117, 134129,
	135151, 136189, 137209, 138239, 139241, 140281, 141311, 142327,
	143357, 144383, 145399, 146423, 147451, 148471, 149503, 150523,
	151549, 152567, 153589, 154621, 155627, 156671, 157679, 158699,
	159739, 160757, 161783, 162791, 163819, 164839, 165887, 166909,
	167917, 168943, 169957, 171007, 172031, 173053, 174079, 175103,
	176123, 177131, 178169, 179173, 180221, 181243, 182261, 183289,
	184309, 185327, 186343, 187387, 188407, 189439, 190409, 191473,
	192499, 193513, 194543, 195581, 196597, 197621, 198647, 199679,
	200699, 201709, 202751, 203773, 204797, 205823, 206827, 207869,
	208891, 209917, 210943, 211949, 212987, 214009, 214993, 216061,
	217081, 218111, 219133, 220151, 221173, 222199, 223229, 224251,
	224737,
	-1
}
#endif
	       ;

/*
 * relate_type:
 *
 * This structure describes the relationsip between between a particular
 * record of a particular file and corresponding records in the other
 * files.
 *
 * This structure is built by 'pass5_analyze_relationship'. This structure
 * is used by all of the other pass5 routines to determine whether the
 * current record is the next one to be output.
 */

struct relate_struct;
typedef struct relate_struct    relate_type;

struct relate_struct {

	int     index[MAX_FILE_COUNT];
				/* Index that this record appears at in the
				   file. Value will be a hash code if this
				   record is not in the corresponding file */

	bool current[MAX_FILE_COUNT];
				/* TRUE if the record at the current position
				   in the corresponding file */

	int     relation;	/* A summary of the relationship of this record
				   to the current record in each file */
	/* The zeroeth element is represented in the least significant bit,
	   etc. */

#define INSERT_NONE           0
#define INSERT_OLD            1
#define INSERT_NEW1           2
#define INSERT_OLD_NEW1       (INSERT_OLD + INSERT_NEW1)
#define INSERT_NEW2           4
#define INSERT_OLD_NEW2       (INSERT_OLD + INSERT_NEW2)
#define INSERT_NEW1_NEW2      (INSERT_NEW1 + INSERT_NEW2)
#define INSERT_OLD_NEW1_NEW2  (INSERT_OLD + INSERT_NEW1 + INSERT_NEW2)
#define INSERT_EOT            -1

	bool moved;		/* TRUE if this record is involved in a record
				   movement. */

	bool in_all;		/* TRUE if the record is at the current
				   position in all of the files. */

};

/*
 * Statistics:
 */

EXTERN int      cache_miss;	/* total number of cache misses. */

EXTERN int      hash_collisions;/* total number of hash collsions */

EXTERN int      old_new1_change_count;
				/* Number of differences between old and new1
				   files */

EXTERN int      old_new2_change_count;
				/* Number of differences between old and new2
				   files */

EXTERN int      new1_new2_change_count;
				/* Number of differences between new1 and new2
				   files */

/*
 * Symbol Table:
 *
 * This structure describes a the symbol table.
 * Each entry in the symbol table represents a record in one of the files.
 * The contents of each record is hashed.
 * The hash value is used as an index into the arrays.
 * If the hash value is not unique, a re-hash is performed until a
 * unique hash value is obtained.
 *
 * The symbol table is organized as four arrays of entries.
 * There is one array for each file and one array of cache entry pointers
 * described below.
 */

/*
 * The index into the symbol table is a hash code. Hash codes are positive.
 * Significant hash codes include:
 *
 * 0: Not valid
 * 1: begin record
 * 2: end record
 * 3: eof (some archaic operating systems allow multiple eof's in a file.)
 */

#define HASH_FREE_ENTRY 0	/* A hash code of this value indicates a free
				   entry. This value is used in a cache entry
				   to indicate an unused cache entry */

/*
 * The cache ptr below is a pointer to the cache entry for the line.
 * Valid values are:
 *
 * CACHE_FREE_ENTRY:       This symbol table entry is unused.
 * CACHE_NOT_IN_CACHE:     This line is no longer in the cache.
 * positive:               Pointer to cache entry.
 */

EXTERN cache_entry_type * *sym_tab_cache_ptr;
				/* Pointer to table of pointers to cache
				   entries */

#define CACHE_FREE_ENTRY 0	/* Symbol table entry is unused */
 /* The code depends on the fact that the allocator zeros this entry upon
    allocation */

#define CACHE_NOT_IN_CACHE -1	/* This line no longer in cache */

EXTERN int      sym_tab_size;	/* Number of entries in the symbol table. */
/*
 * Procedure forwards:
 */

void dump_arrays() ;
void dump_statistics() ;
void dump_sym_tab() ;

void error() ;
char * mem_alloc() ;
void init() ;
void link_records() ;

void pass1() ;
int  pass1_read_record() ;
void pass1_record_compress() ;

void pass2() ;

void pass3() ;
void pass3_scan() ;

void pass4() ;
void pass4_scan() ;

void pass5() ;
void pass5_analyze_relationship() ;
int  pass5_move() ;
void pass5_dump_record() ;
void pass5_write_hed() ;
void pass5_write_listing() ;
void pass5_write_listing_line() ;
void pass5_write_listing_head() ;
