/*
 * The combine utility is a product of Harris, Inc. and is provided for
 * unrestricted use provided that this legend is included on all tape
 * media and as a part of the software program in whole or part.  Users
 * may copy, modify, license or distribute the combine utility without charge.
 * 
 * THE COMBINE UTILITY IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND
 * INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE
 * PRACTICE.
 * 
 * The combine utility is provided with no support and without any obligation
 * on the part of Harris, Inc. to assist in its use, correction,
 * modification or enhancement.
 * 
 * HARRIS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
 * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY THE COMBINE
 * UTILITY OR ANY PART THEREOF.
 * 
 * In no event will Harris, Inc. be liable for any lost revenue
 * or profits or other special, indirect and consequential damages, even if
 * Harris has been advised of the possibility of such damages.
 * 
 * Harris Computer Systems Division
 * 2101 W Cypress Creek Rd
 * Fort Lauderdale, Florida 33309
 */
#include <stdio.h>
#include <ctype.h>
#include "util.h"
#include "combine.h"
/*
 * pass4: Handle non-unique record clusters.
 *
 * This routine tries to fix up problems which arise due to groups of
 * non-unique records which are surrounded by insertions. When this
 * condition arises, the group of non-unique records are treated as
 * a part of the inserted text.
 *
 * This routine finds a pair of anchor records which have inserted lines
 * between them. (See diagram below.) All inserted lines are compared
 * to one another trying to find a group of non-unique lines which
 * match. Such lines are considered equal and are flagged as anchor
 * points.
 *
 *          match1                       match1
 *          insert
 *          non-unique1                  non-unique1
 *          insert
 *          match2                       match2
 *
 * This routine runs in M x N time where 'M' is the number of lines
 * inserted at the current position in the first file and 'N' is the number
 * of lines inserted at the current position in the second file.
 *
 * Possible design flaw: The algorithm used to match the non-unique lines
 * should really be the same alogirthm used for the rest of the file.
 * That algorithm runs in linear time. The main problem with doing that is
 * that the other passes of this program were not designed for or optimized
 * around working for a portion of a file. Also, the proposed algorithm
 * relies on the fact that some of the non-unique lines will become unique
 * when only this fragment of the files are compared.
 *
 * Return value:
 *      This procedure has no return value.
 */

void pass4 () {

	int     i;		/* Misc. variable */

	int     j;		/* Misc. variable */

	/*
	 * Scan each pair of files.
	 */

	for (j = 0; j < UNIQUE_MATCH_COUNT; ++j) {

		i = unique_match[j];

		if (files[curr_file[i]].record != 0 &&
				files[corres_file[i]].record != 0) {

			pass4_scan (i);

		}

	}

}
/*
 * pass4_scan: pass4 for two files.
 *
 * This routine implements the pass4 algorithm for a pair of files.
 *
 * Return value:
 *      This procedure has no return value.
 */

void pass4_scan (match_no)
int     match_no;		/* input */
				/* Which relationship is being scanned */

{

	register int i;		/* Index into record array of file1 of the
				   current position in the inserted records */

	register int j;		/* Index into record array of file2 of the
				   current position in the inserted records */

	file_type * file1_ptr;	/* First file - current_file */

	file_type * file2_ptr;	/* Second file - corresponding file */

	int     file1_sub;	/* For each record of the first file, this is a
				   subscript of the 'value' array of the
				   relationship between file1 and file2 */

	int     file2_sub;	/* For each record of the second file, this is
				   a subscript of the 'value' array of the
				   relationship between file2 and file1 */

	register int fn_index1;	/* Index into record array of file1 of the
				   position of the last inserted record */

	register int fn_index2;	/* Index into record array of file2 of the
				   position of the last inserted record */

	int     index1;		/* Index into record array of file1 of the
				   record currently being processed */

	record_type * record1;	/* Pointer to record array of file1 */

	record_type * record2;	/* Pointer to record array of file2 */

	register int st_index1;	/* Index into record array of file1 of the
				   position of the first inserted record */

	register int st_index2;	/* Index into record array of file2 of the
				   position of the first inserted record */


	/*
	 * Initialize indexes to first and last record of the files.
	 */

	file1_ptr = &files[curr_file[match_no]];
	file2_ptr = &files[corres_file[match_no]];
	file1_sub = value_sub[match_no];
	file2_sub = rev_value_sub[match_no];

	record1 = file1_ptr -> record;
	record2 = file2_ptr -> record;

	/*
	 * For each record in the first file.
	 *
	 * Note: given the existence of the 'begin' and 'end' record on each
	 * end of the record array,
	 * we can convince ourselves that the tests below won't ever index
	 * off the end of the arrays.
	 */
	for (index1 = BEGIN_INDEX; index1 < file1_ptr->record_array_size-1;
		index1++) {

		/*
		 * If the current record in file1 is not an anchor point,
		 *     continue the big loop.
		 */
		st_index2 = record1[index1].value[file1_sub];
		if (is_hash_code (st_index2)) {
			continue;
		}

		/*
		 * Compute the index into file2 of the corresponding record.
		 * Compute a pointer to value field of the next record in each
		 *	file.
		 *
		 * If either of the next records are anchor points,
		 *      continue the big loop.
		 */
		if (!is_hash_code( record1[index1 + 1].value[file1_sub]))
			continue;
		if (!is_hash_code( record2[st_index2 + 1].value[file2_sub]))
			continue;

		/*
		 * We have found the point where a group of inserted records
		 * begins in both files.
		 *
		 * Set up indexes to the first and last inserted records in
		 * the group.
		 */
		st_index1 = fn_index1 = index1 + 1;
		st_index2 = st_index2 + 1;
		while (is_hash_code (record1[fn_index1].value[file1_sub])) {
			++fn_index1;
		}
		fn_index2 = record1[fn_index1].value[file1_sub];
		fn_index1--;	/* Adjust to index to last inserted record */
		fn_index2--;	/* Adjust to index to last inserted record */

		/*
		 * If this group of inserted records is adjacent to a group of
		 * moved records, skip the comparison. This isn't the case
		 * we're interested in.
		 */
		index1 = fn_index1;
		if (st_index2 > fn_index2) {
			continue;
		}

		/*
		 * Ensure that only non-unique records exist in the range
		 * of the corresponding file. This catches a move in the
		 * other direction.
		 */
		for ( j=st_index2; j<=fn_index2; ++ j){
			if (!is_hash_code (record2[j].value[file2_sub]))
				break;
		}
		if ( j <= fn_index2 )
			continue;

		/*
		 * Compare the hash codes. If two records are identical,
		 * consider them to be anchor points.
		 */
		if ( p4_debug ) {
			printf("pass4: match_no: %d (%d-%d) (%d-%d)\n",
				match_no, st_index1, fn_index1,
					 st_index2, fn_index2);
		}

		for (i = st_index1; i<=fn_index1; ++i) {
			for (j=st_index2; j<=fn_index2; ++j) {

				if (record1[i].value[file1_sub] ==
					record2[j].value[file2_sub]) {

					link_records(match_no,i, j);
					st_index2 = j + 1;
					break;

				}

			}
		}
	}

}
