/*
 * The combine utility is a product of Harris, Inc. and is provided for
 * unrestricted use provided that this legend is included on all tape
 * media and as a part of the software program in whole or part.  Users
 * may copy, modify, license or distribute the combine utility without charge.
 * 
 * THE COMBINE UTILITY IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND
 * INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE
 * PRACTICE.
 * 
 * The combine utility is provided with no support and without any obligation
 * on the part of Harris, Inc. to assist in its use, correction,
 * modification or enhancement.
 * 
 * HARRIS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
 * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY THE COMBINE
 * UTILITY OR ANY PART THEREOF.
 * 
 * In no event will Harris, Inc. be liable for any lost revenue
 * or profits or other special, indirect and consequential damages, even if
 * Harris has been advised of the possibility of such damages.
 * 
 * Harris Computer Systems Division
 * 2101 W Cypress Creek Rd
 * Fort Lauderdale, Florida 33309
 */
#include <ctype.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "util.h"
#include "combine.h"
/*
 * main: Main program for the COMBINE utility
 *
 * This routine is the driver for the utility.
 *
 * Return value:
 *      This procedure has no return value.
 */

void main (argc, argv)
int     argc;			/* command line argument count */

char  **argv;			/* command line arguments */

{

	struct stat     stat_buf;/* Buf. to find last written date */

	/*
	 * Execute program phases.
	 */

	if (!isatty (fileno (stdout))) {
		fstat (fileno (stdout), &stat_buf);
		setvbuf (stdout, mem_alloc (stat_buf.st_blksize),
			_IOFBF, stat_buf.st_blksize);
	}

	init (argc, argv);	/* Perform program initialization */

	if (p1_debug || pa_debug) {
		fputs ("Start Pass1\n", stderr);
	}
	pass1 ();  /* Read files building symbol table and record arrays. */
	if (p1_debug || pa_debug) {
		dump_sym_tab ("Pass1 symbol table");
		dump_arrays ("Pass1 arrays");
	}

	if (p2_debug || pa_debug) {
		fputs ("Start Pass2\n", stderr);
	}
	pass2 ();		/* Determine anchor points in files. */
	if (p2_debug || pa_debug) {
		dump_arrays ("Pass2 arrays");
	}

	if (p3_debug || pa_debug) {
		fputs ("Start Pass3\n", stderr);
	}
	pass3 ();		/* Expand anchors to non-unique lines. */
	if (p3_debug || pa_debug) {
		dump_arrays ("Pass3 arrays");
	}

	if (p4_debug || pa_debug) {
		fputs ("Start Pass4\n", stderr);
	}
	pass4 ();		/* Fix non-uniques surrounded by insertions */
	if (p4_debug || pa_debug) {
		dump_arrays ("Pass4 arrays");
	}

	if (p5_debug || pa_debug) {
		fputs ("Start Pass5\n", stderr);
	}
	pass5 ();		/* Write output files. */

	if (statistics_flag) {
		dump_statistics ();
	}

	if (old_new1_change_count == 0 &&
			(file_count == 2 ||
				(old_new2_change_count == 0 &&
					new1_new2_change_count == 0))) {
		exit (0);
	} else {
		exit (1);
	}

}
/*
  * dump_arrays: dump arrays for debugging purposes
  *
  * This routine outputs the record arrays to the standard output file.
  *
  * Return value:
  *      This procedure has no return value.
  */

void dump_arrays (message)
char   *message;		/* input */
 /* Message to print before arrays */

{

	int     i;		/* Misc. variable */

	int     index;		/* Index into record array */

	int     files_left;	/* number of files left to do */

	bool file_done[MAX_FILE_COUNT];/* TRUE if EOT reached on file */

	record_type * record_ptr;/* Pointer to current record */



	/*
	 * Initialize completion parameters.
	 */
	printf ("%s\n", message);

	files_left = file_count;
	for (i = 0; i < file_count; ++i) {
		file_done[i] = FALSE;
	}

	/*
	 * For each iteration of the file read the nth record of the each file.
	 */
	for (index = BEGIN_INDEX + 1; files_left != 0; ++index) {

		printf ("record: %5d ", index);

		/*
		 * Handle each file.
		 */

		for (i = 0; i < file_count; ++i) {

			if (!file_done[i]) {

				if (index >= files[i].record_array_size - 1) {
					file_done[i] = TRUE;
					--files_left;
					if (files_left == 0) {
						break;
					}
					printf ("%38.38s", " ");
					continue;
				}

				record_ptr = &(files[i].record[index]);

				printf ("    rfa:%6d val1:%6d val2:%6d",
						record_ptr -> rfa,
						record_ptr -> value[0],
						record_ptr -> value[1]);

			} else {
				printf ("%38.38s", " ");

			}

		}

		printf ("\n");

	}

}
/*
 * dump_statistics: Dump statistics
 *
 * This routine outputs the execution statistics * to the standard output
 * file.
 *
 * Return value:
 *      This procedure has no return value.
 */
void dump_statistics () {

	int     i;		/* Misc. variable */



	/*
	 * Initialize completion parameters.
	 */

	printf ("\fStatistics:\n\n");

	printf ("Cache misses: %d\n", cache_miss);
	printf ("Hash collisions: %d\n", hash_collisions);

	printf ("Line counts:\n");
	for (i = 0; i < file_count; ++i) {
		printf ("   %5d: '%s'\n",
			files[i].record_array_size - DUMMY_RECORD_COUNT,
			files[i].name_ptr);
	}

	printf ("Changes:\n");
	printf ("   '%s' and '%s' ", files[OLD_FILE].name_ptr,
			files[NEW1_FILE].name_ptr);
	if (old_new1_change_count == 0) {
		printf ("are identical.\n");
	} else {
		printf ("have %d differences.\n", old_new1_change_count);
	}

	if (file_count > 2) {
		printf ("   '%s' and '%s' ", files[OLD_FILE].name_ptr,
				files[NEW2_FILE].name_ptr);
		if (old_new2_change_count == 0) {
			printf ("are identical.\n");
		} else {
			printf ("have %d differences.\n", old_new2_change_count);
		}

		printf ("   '%s' and '%s' ", files[NEW1_FILE].name_ptr,
				files[NEW2_FILE].name_ptr);
		if (new1_new2_change_count == 0) {
			printf ("are identical.\n");
		} else {
			printf ("have %d differences.\n", new1_new2_change_count);
		}
	}

}
/*
 * dump_sym_tab: dump symbol table for debugging purposes
 *
 * This routine outputs the symbol table to the standard output file.
 *
 * Return value:
 *      This procedure has no return value.
 */
void dump_sym_tab (message)
char   *message;		/* input */
 /* Message to print before table */

{

	int     i;		/* Misc. variable */



	/*
	 * Write each used symbol table entry.
	 */

	printf ("%s\n", message);

	for (i = 0; i < sym_tab_size; ++i) {
		if (sym_tab_cache_ptr[i] != CACHE_FREE_ENTRY) {

			printf ("hash:%5d old:%5d new1:%5d ", i,
					files[OLD_FILE].sym_tab_index[i],
					files[NEW1_FILE].sym_tab_index[i]);

			if (file_count == 3) {
				printf ("new2:%5d ", files[NEW2_FILE].sym_tab_index[i]);
			}

			if (sym_tab_cache_ptr[i] ==
				(cache_entry_type *) CACHE_NOT_IN_CACHE) {

				printf ("(record not in cache)");
			} else {
				if (sym_tab_cache_ptr[i] -> hash_code != i) {
					printf ("(cache_hash_code wrong: %d)",
						sym_tab_cache_ptr[i]->hash_code);
				}
				if (sym_tab_cache_ptr[i] -> record_length < 0) {
					sym_tab_cache_ptr[i] ->recordp[0] = '\0';
				} else {
					sym_tab_cache_ptr[i] ->
						recordp[sym_tab_cache_ptr[i] ->
						record_length] = '\0';
				}
				printf ("cache_record(%d): %s",
					sym_tab_cache_ptr[i] -> record_length,
					sym_tab_cache_ptr[i] -> recordp);
			}

			printf ("\n");

		}
	}

}
/*
 * init: Perform program initialization.
 *
 *
 * This routine interprets the command line and opens the files.
 *
 * Return value:
 *      This procedure has no return value.
 */

void init (argc, argv)
int     argc;			/* argument count from 'main' */

char  **argv;			/* arguments from 'main' */


{

	char   *basename_ptr = 0;/* basename of files */

	int     cache_entry_size;/* Number of bytes in a cache entry */

	cache_entry_type * cache_ptr;/* Pointer to cache entry */

	int     different_basenames = 0;
				/* TRUE if file basenames are different */

	int     directory_count = 0;
				/* number of command line arguments which are
				   actually directories */

	FILE * dummy_file;	/* can't assign to stdin on UNIX */

	long    etime;		/* Current time of day */

	int     is_directory[MAX_FILE_COUNT]; /* TRUE if file is a directory */

	int     i;		/* Misc. variable */
	int     j;		/* Misc. variable */
	int     k;		/* Misc. variable */

	int	max_record_len = LINE_LENGTH;	/* max initial record length */

	int     record_count;	/* Number of records in record array */

	struct stat     stat_buf;/* Buf. to find last written date */

	char   *the_cache;	/* Ptr to head of cache */

	char   *temp_ptr;	/* Misc char ptr */

	int     total_record_count;/* Total number of records in all files */

	int     c;		/* Option character */

	extern int      optind;	/* Option index */

	extern char    *optarg;	/* Option argument pointer */

	extern int      getopt ();/* getopt routine */

	extern char    *ctime ();/* convert time routine */

	extern char    *strrchr ();/* search for character in string */


#ifdef VOS
	stdout -> carriage_control = TRUE;
#endif

	/*
	 * Scan options arguments.
	 */
	(void) time (&etime);
	(void) strcpy (exec_time, ctime (&etime));
	exec_time[strlen (exec_time) - 1] = '\0'; /* remove newline character */

	for (;;) {

		c = getopt (argc, argv, "bBhsqc:d:p:P:1:2:");
		if (c == EOF) {
			break;
		}

		switch (c) {

		/*
		 * B and b option: Blank remove and blank compress
		 * options.
		 */
		case 'b':
			blank_compress = TRUE;
			compress_records = TRUE;
			break;

		case 'B':
			blank_remove = TRUE;
			compress_records = TRUE;
			break;
		/*
		 * c option: Compare only specified columns.
		 */

		case 'c':
			compress_records = TRUE;
			if ((column_count + 1) == (MAX_COLUMNS)) {
				error ("Too many -c options");
			}

			for (j = 0; isdigit (optarg[j]); ++j) {
			}
			if (j == 0) {
				error ("-c option not followed by number");
			}
			first_column[column_count] = atoi (optarg) - 1;
			/* Zero relative */

			if (first_column[column_count] < 0) {
				error ("Column specification less than column 1");
			}

			if (optarg[j] != ',') {
				error ("Column specifications not seperated by comma");
			}

			optarg += j + 1;

			for (j = 0; isdigit (optarg[j]); ++j) {
			}
			if (j == 0) {
				error ("-c option not followed by two numbers");
			}
			last_column[column_count] = atoi (optarg) - 1;
			/* Zero relative */
			if (last_column[column_count] < first_column[column_count]) {
				error ("Last column spec. less then first column spec.");
			}

			max_record_len = max(max_record_len,
				             last_column[column_count] + 1);

			column_count++;
			break;

		/*
		 * D option: Debug. Print debug output.
		 */
		case 'd':
			switch (*optarg) {
			case 'a':
				pa_debug = TRUE;
				break;
			case '1':
				p1_debug = TRUE;
				break;
			case '2':
				p2_debug = TRUE;
				break;
			case '3':
				p3_debug = TRUE;
				break;
			case '4':
				p4_debug = TRUE;
				break;
			case '5':
				p5_debug = TRUE;
				break;
			default:
				error ("invalid argument following -d option");
			}
			break;

		/*
		 * h option: name of file to output HED edit file to
		 */
		case 'h':
#ifdef VOS
			stdout -> carriage_control = FALSE;
#endif
			hed_flag = TRUE;
			break;

		/*
		 * -P option: Number of prefix lines to output to listing file.
		 * -p option: Number of postfix lines to output to listing file.
		 */
		case 'P':
			prefix_lines = atoi (optarg);
			if (prefix_lines > CACHE_ENTRIES - 10) {
				error ("Too many prefix lines");
			}
			break;

		case 'p':
			postfix_lines = atoi (optarg);
			break;

		/*
		 * -s option: Output page of statistics to stdout
		 */
		case 's':
			statistics_flag = TRUE;
			break;

		/*
		 * -1 option: Text string to associate with 'new1' file.
		 * -2 option: Text string to associate with 'new2' file.
		 */
		case '1':
			files[NEW1_FILE].text_ptr = optarg;
			break;

		case '2':
			files[NEW2_FILE].text_ptr = optarg;
			break;

		/*
		 * Q option: Quiet. Produce no output if no differences.
		 */
		case 'q':
			quiet_option = TRUE;
			break;
		}

	}

	/*
	 * Handle each command line argument.
	 */
	for (i = optind; i < argc; ++i) {

		/*
		 * Handle redirections of 'stdin':
		 *
		 * This code won't get executed on a UNIX O.S. However,
		 * on VOS this code allows the same syntax to work.
		 */
		if (argv[i][0] == '<' && argv[i][1] != '\0') {
			dummy_file = freopen (&argv[i][1], "r", stdin);

			if (dummy_file == 0) {
			        perror(&argv[i][1]);
				exit( 2 ) ;
			}

		/*
		 * Handle redirections of 'stdout':
		 *
		 * This code won't get executed on a UNIX O.S. However, on VOS this
		 * code allows the same syntax to work.
		 */
		} else if (argv[i][0] == '>' && argv[i][1] != '\0') {
			dummy_file = freopen (&argv[i][1], "w", stdout);

			if (dummy_file == 0) {
			        perror(&argv[i][1]);
			        exit(2);
			}

		/*
		 * Handle file arguments not preceeded by a specific option argument.
		 */
		} else {

			if (file_count >= MAX_FILE_COUNT) {
				error ("Too many files specified");
			}

			files[file_count].name_ptr = argv[i];

			stat (files[file_count].name_ptr, &stat_buf);
			is_directory[file_count] =
				(stat_buf.st_mode & S_IFMT) == S_IFDIR;
			if (is_directory[file_count]) {
				directory_count++;
			} else {
				temp_ptr = strrchr (argv[i], '/');
				if (temp_ptr == 0) {
					temp_ptr = argv[i];
				}
				if (basename_ptr &&
					strcmp (temp_ptr, basename_ptr) != 0) {
					different_basenames = 1;
				}
				basename_ptr = temp_ptr;
			}

			file_count++;
		}
	}

	/*
	 * Resolve actual file names and open files.
	 *
	 * The name specified on the command line might be a directory name.
	 */

	if (file_count < 2) {
		error ("not enough files specified");
	}
	if (file_count == directory_count) {
		error ("cannot compare directories");
	}
	if (directory_count != 0 &&
			file_count - directory_count > 1 &&
			different_basenames) {
		error ("ambiguous directory name");
	}

	total_record_count = 0;
	for (i = 0; i < file_count; ++i) {

		if (is_directory[i]) {
			temp_ptr = mem_alloc (strlen (files[i].name_ptr) +
					strlen (basename_ptr) + 2);
			sprintf (temp_ptr, "%s/%s", files[i].name_ptr,
					basename_ptr);
			files[i].name_ptr = temp_ptr;
		}

#ifdef VOS
		files[i].seq_fd =
			fopen (files[i].name_ptr, "r", max_record_len, "s", $OPEN_DB);
		files[i].rnd_fd =
			fopen (files[i].name_ptr, "r", max_record_len, "s", $OPEN_RMAI);
#else
		files[i].seq_fd = fopen (files[i].name_ptr, "r");
		files[i].rnd_fd = fopen (files[i].name_ptr, "r");
#endif

		if (files[i].seq_fd == 0 || files[i].rnd_fd == 0) {
			perror(files[i].name_ptr);
			exit(2);
		}

		fstat (fileno (files[i].seq_fd), &stat_buf);

		temp_ptr = ctime (&(stat_buf.st_mtime));
		temp_ptr[strlen (temp_ptr) - 1] = '\0';
		files[i].lw_ptr = mem_alloc (strlen (temp_ptr) + 1);
		strcpy (files[i].lw_ptr, temp_ptr);

		setvbuf (files[i].seq_fd, mem_alloc (stat_buf.st_blksize),
			_IOFBF, stat_buf.st_blksize);
		setvbuf (files[i].rnd_fd, mem_alloc (stat_buf.st_blksize),
			_IOFBF, stat_buf.st_blksize);

		/* estimate record count by assuming 20 chars per record */
		/* Don't allow overly small record counts */
		record_count = max( stat_buf.st_size / 20, RA_ORIG);
		files[i].record_array_alloc = record_count;
		total_record_count += record_count;

		files[i].record = (record_type *)
			mem_alloc (record_count * sizeof (record_type));

	}

	/*
	 * Sort column ranges into ascending order.
	 */
	for (i = 0; i + 1 < column_count; ++i) {
		for (j = i + 1; j < column_count; ++j) {
			if (first_column[i] > first_column[j]) {
				k = first_column[i];
				first_column[i] = first_column[j];
				first_column[j] = k;
				k = last_column[i];
				last_column[i] = last_column[j];
				last_column[j] = k;
			}
		}
	}

	/*
	 * Ensure there are no overlapping column ranges.
	 */
	for (i = 0; i + 1 < column_count; ++i) {
		if (last_column[i] >= first_column[i + 1]) {
			error ("overlaping column ranges specified");
		}
	}

	/*
	 * Allocate cache entries.
	 *
	 * Cache entries include an extra word at the end of the buffer.
	 * This word allows a word of blanks to be inserted after the end
	 * of each read line. This, in turn, allows hash code computations
	 * and line comparisons to be word oriented rather than byte oriented.
	 *
	 * The cache is allocated in one chunk below for two reasons:
	 *    1) For small files the huge number of allocations consumes
	 *	 significant time.
	 *    2) Less memory is used since mem_alloc allocates a block
	 *       which is larger than is actually requested. (The next larger
	 *       power of two.)
	 */
	cache_entry_size =
		sizeof (cache_entry_type) + sizeof (int) + max_record_len;
	cache_entry_size += sizeof (int) - (cache_entry_size % sizeof (int));
	the_cache = mem_alloc (CACHE_ENTRIES * cache_entry_size);
	for (i = 0; i < CACHE_ENTRIES; ++i) {
		cache_ptr = (cache_entry_type *) the_cache;
		cache_ptr -> recordp = the_cache + sizeof(cache_entry_type);
		cache_ptr -> record_alen = cache_entry_size -
					sizeof(cache_entry_type);
		cache_ptr -> hash_code = HASH_FREE_ENTRY;
		enq_head_dll (cache_head_ptr, cache_tail_ptr, cache_ptr,
				cache_next_ptr, cache_prev_ptr);
		the_cache += cache_entry_size;
	}

	/*
	 * Compute size of symbol table.
	 *
	 * 1) Initially quess size of symbol table as the sum of the number of
	 *    records in all of the input files times 2.
	 * 2) Never allocate a symbol table of less than 1024 entries. (This step
	 *    is required due to the organization of the prime number table.)
	 * 3) Round the size down to a multiple of 1024. (This tries to force the
	 *    symbol table to be an integer number of pages. It also limits the
	 *    size of the prime number table).
	 * 4) Round the size down to a prime number. (The hashing algorithm requires*
	 *    that the size of the table is a prime number).
	 */
	sym_tab_size = total_record_count * 2;
	sym_tab_size = max (1024, sym_tab_size);

	/* Prime number table contains only those primes which are less than
	   and closest to a multiple of 1024 */
	for (i = 1; primes[i] != -1; ++i) {
		if (sym_tab_size < primes[i]) {
			break;
		}
	}

	sym_tab_size = primes[i - 1];

	/*
	 * Allocate symbol table.
	 */
	for (i = 0; i < file_count; ++i) {
		files[i].sym_tab_index = (int *) mem_alloc (sym_tab_size * sizeof (int));
	}

	sym_tab_cache_ptr = (cache_entry_type **)
		mem_alloc (sym_tab_size * sizeof (cache_entry_type *));

}
/*
 * link_records: link two records together.
 *
 * This routine links a record in the current file to a record in the
 * corresponding file.
 *
 * If either of these records are already
 * linked to a record in the other file, finish up all of the
 * linkages. Pass5 considers it an inconsistent state if only two of
 * the three linkages between files are made. Usually, this inconsistent
 * state will clear itself up. However, certain input files will indeed
 * allow the inconsistency to remain.
 *
 * Note: This routine also discovers an attempt to link records in an
 * impossible fashion. Suppose, this record in the 'current' file is
 * already linked to record A in the 'other' file. This record in the
 * 'corresponding' file is already linked to record B in the 'other' file.
 * Any attempt to link the current and corresponding records would
 * require that record A and record B be the same record (impossible).
 * In that circumstance, this routine acts as a no-op. The calling
 * routine is not informed since this new information wouldn't change the
 * decision making process which it is going through.
 *
 * Return value:
 *      This procedure has no return value.
 */
void link_records (match_no, index1, index2)
int     match_no;		/* input */
 				/* Which relationship is being scanned */

int     index1;			/* Index into the current file of the record to
				   link. */

int     index2;			/* Index into the corresponding file of the
				   record to link. */

{

	file_type * file1_ptr;	/* First file - current_file */

	file_type * file2_ptr;	/* Second file - corresponding file */

	file_type * file3_ptr;	/* Third file - other file */

	int     file1_sub;	/* For each record of the first file, this is a
				   subscript of the 'value' array of the
				   relationship between file1 and file2 */

	int     file2_sub;	/* For each record of the second file, this is
				   a subscript of the 'value' array of the
				   relationship between file2 and file1 */

	int     file3_sub;	/* For each record of the third file, this is a
				   subscript of the 'value' array of the
				   relationship between file3 and file1 */

	int     hash_code;	/* Hash code for the record being linked. */

	int     index3;		/* Index into record array of file3 is the
				   'next' record in file3 */

	int    *other_val1_ptr;	/* Pointer to the 'value' field in the record
				   on file1. This is the 'value' which
				   indicates the relationship to file3. */

	int    *other_val2_ptr;	/* Pointer to the 'value' field in the record
				   on file2. This is the 'value' which
				   indicates the relationship to file3. */

	int    *val1_ptr;	/* Pointer to the 'value' field in record on
				   file1. This is the 'value' which indicates
				   the relationship to file2. */

	int    *val2_ptr;	/* Pointer to the 'value' field in record on
				   file2. This is the 'value' which indicates
				   the relationship to file1. */

	int    *val3_ptr;	/* Pointer to the 'value' field in record on
				   file3. */



	/*
	 * Set up misc local variables.
	 */

	if (p3_debug || p4_debug) {
		printf ("link_records: matchno: %d indices: %d %d\n",
				match_no, index1, index2);
	}

	file1_ptr = &files[curr_file[match_no]];
	file2_ptr = &files[corres_file[match_no]];
	file1_sub = value_sub[match_no];
	file2_sub = rev_value_sub[match_no];

	/*
	 * Link the two records together.
	 */

	val1_ptr = &(file1_ptr -> record[index1].value[file1_sub]);
	val2_ptr = &(file2_ptr -> record[index2].value[file2_sub]);

	hash_code = *val1_ptr;
	*val1_ptr = index2;
	*val2_ptr = index1;

	/*
	 * If either of these two records are already linked to the third file,
	 *     connect these two record to the record in the third file.
	 */

	other_val1_ptr =
		&(file1_ptr -> record[index1].value[other_sub (file1_sub)]);
	other_val2_ptr =
		&(file2_ptr -> record[index2].value[other_sub (file2_sub)]);

	if (is_hash_code (*other_val1_ptr)) {
		if (*other_val1_ptr != hash_code) {
			error ("hash code mis-match 1");
		}
		if (is_hash_code (*other_val2_ptr)) {
			if (*other_val2_ptr != hash_code) {
				error ("hash code mis-match 2");
			}
			return;
		} else {
			index3 = *other_val2_ptr;
			*other_val1_ptr = index3;
		}
	} else {
		index3 = *other_val1_ptr;
		if (is_hash_code (*other_val2_ptr)) {
			if (*other_val2_ptr != hash_code) {
				error ("hash code mis-match 3");
			}
			*other_val2_ptr = index3;
		} else {
			if (*other_val1_ptr != *other_val2_ptr) {
				/* error( "other file index mismatch 1" ) ; */
				/* In this error condition, just undo what
				   we've already done */
				*val1_ptr = hash_code;
				*val2_ptr = hash_code;
				return;
			}
		}
	}

	/*
	 * Connect the record in the third file to the record in the first file.
	 */
	file3_ptr = &files[other_file[match_no]];
	file3_sub = other_value_sub[match_no];
	val3_ptr = &(file3_ptr -> record[index3].value[file3_sub]);

	if (is_hash_code (*val3_ptr)) {
		if (*val3_ptr != hash_code) {
			error ("hash code mis-match 4");
		}
		*val3_ptr = index1;
	} else {
		if (*val3_ptr != index1) {
			error ("other file index mismatch 2");
		}
	}

	/*
	 * Connect the record in the third file to the record in the second file.
	 */

	val3_ptr =
		&(file3_ptr -> record[index3].value[other_sub (file3_sub)]);

	if (is_hash_code (*val3_ptr)) {
		if (*val3_ptr != hash_code) {
			error ("hash code mis-match 5");
		}
		*val3_ptr = index2;
	} else {
		if (*val3_ptr != index2) {
			error ("other file index mismatch 3");
		}
	}

}
/*
 * error: output fatal error message
 *
 * This routine outputs an error message and terminates.
 *
 * Return value:
 *      This procedure has no return value.
 */

void error (error_ptr)
char   *error_ptr;		/* input */
 /* Record to output. */

{
	fprintf (stderr, "combine: %s.\n", error_ptr);
	exit (2);
}
/*
 * mem_alloc: allocate memory
 *
 * This routine uses the standard memory allocator, heowever, if memory
 * is not available, this routine outputs an error message and terminates.
 *
 * Return value:
 *      This procedure returns a pointer to the allocated block.
 */
char   *mem_alloc (size)
int     size;			/* input */
				/* Size (in bytes) of the block to allocate */

{

	char   *block_ptr;	/* Misc. variable */

	extern char    *malloc ();

	block_ptr = malloc (size);
	if (block_ptr == 0) {
		error ("not enough memory -- files too big");
	}

	return (block_ptr);

}

/*
 * reread_into_cache -- re-read a record from a file into a cache entry
 *
 * This routine is used to re-read a record (which has previously been
 * read) into a cache entry.
 */
void reread_into_cache( file_ptr, index, cache_ptr )
	file_type * file_ptr;		/* file to be read from */
	int	index;			/* record number to read */
	cache_entry_type * cache_ptr;	/* cache entry to read into */
{
	int status;
	char mbuffer[LINE_LENGTH];

	status = fseek (file_ptr->rnd_fd, file_ptr->record[index].rfa, 0);
	if ( status == -1 ) {
		(void) sprintf (mbuffer, "Disk error while seeking '%s'",
				file_ptr -> name_ptr);
		error (mbuffer);
	}

	status = read_into_cache(file_ptr->rnd_fd,
			file_ptr->record[index].rfa,
			cache_ptr);

	if (status < 0) {
		(void) sprintf (mbuffer, "Disk error while re-reading '%s'",
				file_ptr -> name_ptr);
		error (mbuffer);
	}
}

/*
 * read_into_cache -- read a record from a file into a cache entry
 *
 * Read a record into a cache entry. This routine reads an entire record
 * into the cache entry. If the currently allocated buffer is too small,
 * a larger buffer will be allocated.
 *
 * Return Value:
 *	Byte count read (-1 for EOF)
 */
int read_into_cache( fp, rfa, cache_ptr)
	FILE	*fp;			/* File to read */
	rfa_type	rfa;		/* rfa to read (already positioned) */
	cache_entry_type * cache_ptr;	/* cache entry to read into */
{
	char    c;
	char   *char_ptr;
	int	status;
	int     i;

	char_ptr = fgets (cache_ptr->recordp, cache_ptr->record_alen, fp);
	if (char_ptr == NULL)
		return (-1);

	i = strlen (cache_ptr->recordp) - 1;
	if (cache_ptr->recordp[i] != '\n') {
		status = fseek (fp, rfa, 0);
		if ( status == -1 ) 
			error("Internal error: cannot reseek");
		for (i=0;;i++) {
			c = getc (fp);
			if (feof (fp)) {
			/* not (c==EOF) because of binary files */
				break;
			/* This is sort of a kludge, we only check for
			   non-ascii if the record length is too long */
			} else if (!isascii (c) || c == '\0' ) {
				error ("non-ascii character in file");
			} else if (c == '\n') {
				break;
			}
		}
#ifdef notdef 	/* The i+=sizeof(int) covers this already */
		i+=2;	/* Leave room from newline and null byte */
#endif notdef
		i+=sizeof(int); /* leave space at end for extra nulls for
				   checksum algorithm */
		i += sizeof (int) - (i % sizeof (int));

		/*
		 * Don't deallocate the old buffer since it was probably
		 * allocated as a part of a larger buffer.
		 */
		cache_ptr->recordp = mem_alloc(i);
		cache_ptr->record_alen = i;

		status = fseek (fp, rfa, 0);
		if ( status == -1 ) 
			error("Internal error: cannot reseek");

		char_ptr = fgets (cache_ptr->recordp, cache_ptr->record_alen, fp);
		if (char_ptr == NULL)
			return (-1);

		i = strlen (cache_ptr->recordp) - 1;
		/* Perhaps we should warn about this */
		if (cache_ptr->recordp[i] != '\n')
			i++;
	}
	cache_ptr->recordp[i] = '\0';
	cache_ptr->record_length = i;

	return (i);

}
