/* $Id: filediff.c,v 1.1.1.1 1996/10/09 11:25:19 davidn Exp $
 * FidoNet nodelist difference file generator
 *
 */

#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <time.h>
#include "filediff.h"
#include "osdep.h"
#include "log.h"

static int diff_match(FDIFF * F, int nocmmnt);


/* filediff() - generate nodelist difference file from old/new files
 */

int
filediff(char const * outfile, char const * oldfile, char const * newfile)
{
    int rc = -1;
    FILE *fp = fopen(outfile, "wb");
    if (fp == NULL)
	logit(LOG_ERROR, "unable to create diff %s: %s", outfile, strerror(errno));
    else {
	clock_t clocks = clock();
	logit(LOG_PROGRESS, "OLD nodelist: %s", oldfile);
	logit(LOG_PROGRESS, "NEW nodelist: %s", newfile);
	logit(LOG_PROGRESS, "Generating difference file: %s", outfile);
	rc = diff(fp, oldfile, newfile);
	if (fclose(fp) == EOF && rc == 0) {	/* I/O error */
	    logit(LOG_ERROR, "I/O error closing %s: %s", outfile, strerror(errno));
	    rc = -1;
	} else if (rc == 0) {
	    clocks = clock() - clocks;
	    logit(LOG_PROGRESS, "Difference file generated in %lu.%02lu secs", clocks / CLOCKS_PER_SEC, ((clocks % CLOCKS_PER_SEC) * 100) / CLOCKS_PER_SEC);
	}
    }
    return rc;
}


/* diff() - Difference of two nodelist files
 * NOTE: For DOSISH operating systems, FILE*fp must be in BINARY mode!
 */

int
diff(FILE *fp, char const * oldfile, char const * newfile)
{
    int rc = -1;
    FDIFF F;

    static char const erropen[] = "unable to open %s: %s";
    wfile_init(&F.wf[F_OLD], oldfile);
    wfile_init(&F.wf[F_NEW], newfile);

    if (!wfile_open(&F.wf[F_OLD]))
	logit(LOG_ERROR, erropen, oldfile, strerror(errno));
    else {
	if (!wfile_open(&F.wf[F_NEW]))
	    logit(LOG_ERROR, erropen, newfile, strerror(errno));
	else {
	    /*
	     * Start the diff segment with the old file's id line
	     */
	    int len;
	    unsigned matches = 0;
	    char const *p = wfile_ptr(&F.wf[F_OLD], &len);
	    fwrite(p, len, 1, fp);

	    /*
	     * Now, we generate the diff itself. The algorithm used is very
	     * straight-forward: we just generate a copy command for matching
	     * lines from each file, then attempt to 'sync' on non-comment
	     * blocks when a difference is encountered. During the sync (which
	     * is a fairly crude unsophisticated sliding match) insersions,
	     * deletions and changes (which is a deletion followed by an
	     * insersion) are automagically detected and output. Lines inserted
	     * into the new list are copied into the difference file after the
	     * "A" command is generated.
	     * 
	     * MAKENL is apparently a little more clever, but I've not been able
	     * to produce as nice (small) a result as it - yet.
	     */

	    rc = 0;
	    while (rc == 0 && (!wfile_eof(&F.wf[F_OLD]) || !wfile_eof(&F.wf[F_NEW]))) {
		if (diff_match(&F, 0)) {
		    int old_eof = !wfile_advance(&F.wf[F_OLD], F.len[F_OLD]);
		    int new_eof = !wfile_advance(&F.wf[F_NEW], F.len[F_NEW]);
		    /* Don't count the equality at EOF */
		    if (!old_eof && !new_eof) {
			/*
			 * Let's keep this number <= MAXINT for all platforms
			 */
			if (++matches == 32767) {
			    fprintf(fp, "C%u\r\n", matches);
			    matches = 0;
			}
		    }
		} else {
		    /*
		     * static 'difference' lengths for each file
		     */
		    unsigned difflen[2] = {0, 0};
		    /*
		     * These are save markers for the current file positions
		     */
		    WFMARK mark[2];

		    long loffset = 0, roffset = 0;

		    int left = F_OLD, rght = F_NEW;
		    if (matches) {
			/*
			 * Output matching line count as "copy"
			 */
			fprintf(fp, "C%u\r\n", matches);
			matches = 0;
		    }
		    /*
		     * Save our current positions before we go touring around
		     * the countryside looking for matching lines
		     */
		    mark[F_OLD] = wfile_getmark(&F.wf[F_OLD]);
		    mark[F_NEW] = wfile_getmark(&F.wf[F_NEW]);

		    /*
		     * The following code is responsible for the line syncing
		     * It starts out by seeking forward on the 'new' file and
		     * searching for a match for the current 'old' line
		     * (assuming that something new has been added). If it
		     * can't sync on that line, a deletion is assumed, and the
		     * transaction switches sides. This keeps going until a
		     * firm match is made on a non-comment line, at which point
		     * we can generate the deletions (if any) and output the
		     * Add command followed by any new lines inserted.
		     */

		    do {
			if (wfile_advance(&F.wf[rght], F.len[rght]))
			    ++roffset;
			else {
			    /*
			     * EOF on right, count remaining lines in left side
			     * and quit
			     */
			    while (wfile_advance(&F.wf[left], F.len[left])) {
				++loffset;
				F.sp[left] = wfile_ptr(&F.wf[left], &F.len[left]);
			    }
			    break;
			}
			/*
			 * Back left pointer up
			 */
			if (--loffset >= 0)
			    wfile_backup(&F.wf[left]);
			else {	/* We reached a divergence */
			    int i = rght;	/* Swap input files */
			    rght = left;	/* and their pointers */
			    left = i;
			    loffset = roffset;
			    roffset = 0;
			}
		    }
		    while (!diff_match(&F, 1));	/* Match on non-comment only */

		    /*
		     * Now, in case we had any trailing comments which we did
		     * not match (but in fact do), we need to backtrace over
		     * all matching comment lines to find the real end of the
		     * divergence
		     */

		    while (loffset && roffset) {
			int l;
			wfile_backup(&F.wf[left]);
			wfile_backup(&F.wf[rght]);
			/* 'A' comment diffs are treated specially */
			if (memcmp(wfile_ptr(&F.wf[left], &l), ";A", 2) == 0 ||
			    memcmp(wfile_ptr(&F.wf[rght], &l), ";A", 2) == 0)
			    break;
			if (!diff_match(&F, 0))
			    break;
			--loffset;
			--roffset;
		    }

		    /*
		     * Set length of differences before sync
		     */
		    difflen[left] = (unsigned) loffset;
		    difflen[rght] = (unsigned) roffset;

		    /*
		     * Then reset files to their original locations
		     */
		    wfile_setmark(&F.wf[F_OLD], &mark[F_OLD]);
		    wfile_setmark(&F.wf[F_NEW], &mark[F_NEW]);

		    /*
		     * At this point, we have presumably synced the files and
		     * found the first matching non-comment line after the
		     * point where they were at variance. We have also set the
		     * file and line pointers back to where the differences
		     * start, so now we must output the results. First, we
		     * handle deletions. These are represented by any
		     * non-matched files on the F_OLD file.
		     */

		    if (difflen[F_OLD]) {
			fprintf(fp, "D%u\r\n", difflen[F_OLD]);
			while (difflen[F_OLD]--) {
			    /*
			     * Skip over deleted lines
			     */
			    F.sp[F_OLD] = wfile_ptr(&F.wf[F_OLD], &F.len[F_OLD]);
			    wfile_advance(&F.wf[F_OLD], F.len[F_OLD]);
			}
		    }
		    if (difflen[F_NEW]) {
			fprintf(fp, "A%u\r\n", difflen[F_NEW]);
			while (difflen[F_NEW]--) {
			    /*
			     * Output added lines
			     */
			    F.sp[F_NEW] = wfile_ptr(&F.wf[F_NEW], &F.len[F_NEW]);
			    fwrite(F.sp[F_NEW], F.len[F_NEW], 1, fp);
			    wfile_advance(&F.wf[F_NEW], F.len[F_NEW]);
			}
		    }
		}
	    }

	    if (matches) {	/* Output matching line count as "copy" */
		fprintf(fp, "C%u\r\n", matches);
		matches = 0;
	    }
#if 0				/* Surprisingly, MAKENL doesn't actually do
				 * this */
	    /*
	     * Assume we succeeded, and write a ^Z to the end of the generated
	     * diff. Ah, don't you just love redundancy!?
	     */
	    fwrite("\x1a", 1, 1, fp);
#endif
	    wfile_close(&F.wf[F_NEW]);
	}
	wfile_close(&F.wf[F_OLD]);
    }
    return rc;
}


/* diff_match() - for a match
 */

static int
diff_match(FDIFF * F, int nocmmnt)
{
    int rc = 0;			/* Default, no match */
    int oldeof = (F->sp[F_OLD] = wfile_ptr(&F->wf[F_OLD], &F->len[F_OLD])) == NULL;
    int neweof = (F->sp[F_NEW] = wfile_ptr(&F->wf[F_NEW], &F->len[F_NEW])) == NULL;
    if (oldeof && neweof)	/* Always match both at eof */
	rc = 1;
    else {

	if (!oldeof)
	    wfile_analyse(&F->wf[F_OLD], F->sp[F_OLD], F->len[F_OLD]);

	if (!neweof)
	    wfile_analyse(&F->wf[F_NEW], F->sp[F_NEW], F->len[F_NEW]);

	if (!oldeof && !neweof) {
	    if (nocmmnt && wfile_isacomment(&F->wf[F_OLD]))
		rc = 0;
	    else if (F->len[F_OLD] == F->len[F_NEW])
		rc = memcmp(F->sp[F_OLD], F->sp[F_NEW], F->len[F_OLD]) == 0;
	}
    }
    return rc;
}


/* diff_name() - given the name of a target nodelist, and possibly
 * the root name of a difference file, return a name for a target
 * difference file.
 */

char *
diff_name(char *dest, char const * target_list, char const * diffname)
{
    char *base, *lstp;
    char tmp[_MAX_PATH];
    if (diffname && *diffname) {
	if (!*get_path(tmp, diffname))
	    get_path(tmp, target_list);
	strcpy(tmp, build_path(NULL, tmp, diffname, NULL));
	/*
	 * Fix extension to be the same as target
	 */
	add_ext(tmp, path_ext((char *) target_list));
	if (stricmp(tmp, target_list) != 0)
	    return strcpy(dest, tmp);
	base = strlwr(last_component(tmp));
    } else
	base = strlwr(last_component(strcpy(tmp, target_list)));
    lstp = strstr(base, "list");
    /*
     * Change "list" for "diff" in title
     */
    if (lstp != NULL)
	memcpy(lstp, "diff", 4);
    else {
	/*
	 * Else, put 'D' in extension for diff
	 */
	lstp = path_ext(base);
	if (*lstp)
	    *lstp = 'D';
	else
	    strcpy(lstp, "D");
    }
    return strcpy(dest, tmp);
}
