/*
 *	$Source: /site/Don/track/RCS/stamp.c,v $
 *	$Header: stamp.c,v 4.8 88/06/21 19:44:36 don Locked $
 *
 *	$Log:	stamp.c,v $
 * Revision 4.8  88/06/21  19:44:36  don
 * fixed a bug in the prior version's dec_entry. don't use the prior
 * version.
 * 
 * Revision 4.7  88/06/20  18:57:38  don
 * support for stamp.c version 4.4 :
 * this version's dec_entry has less static state, because it
 * detects that the entry hasn't been processed before, in a better way.
 * 
 * Revision 4.6  88/06/10  15:58:39  don
 * changed DEV usage to RDEV.
 * 
 * Revision 4.5  88/06/10  14:29:57  don
 * better error-handling on encountering sockets.
 * 
 * Revision 4.4  88/05/26  13:59:55  don
 * cosmetics in sort_entries(): fixed indentation, and added comments.
 * 
 * Revision 4.3  88/05/25  21:25:02  don
 * fixed a bug in sort_entries: only one descendant was getting added
 * to each entry's exception-list.
 * 
 * Revision 4.2  88/05/24  17:41:08  don
 * beefed up garbled-statfile messages, to aid in finding a bug.
 * the bug is a garbled-statfile message at the end of an update.
 * this is intermittent.
 * 
 * Revision 4.1  88/05/04  18:14:26  shanzer
 * fixed a bug in sort_entries(); its augmentation of a parent-entry's
 * exception-list didn't always work.	-don
 * 
 * Revision 4.0  88/04/14  16:43:00  don
 * this version is not compatible with prior versions.
 * it offers, chiefly, link-exporting, i.e., "->" systax in exception-lists.
 * it also offers sped-up exception-checking, via hash-tables.
 * a bug remains in -nopullflag support: if the entry's to-name top-level
 * dir doesn't exist, update_file doesn't get over it.
 * the fix should be put into the updated() routine, or possibly dec_entry().
 * 
 * Revision 3.0  88/03/09  13:17:47  don
 * this version is incompatible with prior versions. it offers:
 * 1) checksum-handling for regular files, to detect filesystem corruption.
 * 2) more concise & readable "updating" messages & error messages.
 * 3) better update-simulation when nopullflag is set.
 * 4) more support for non-default comparison-files.
 * finally, the "currentness" data-structure has replaced the statbufs
 * used before, so that the notion of currency is more readily extensible.
 * note: the statfile format has been changed.
 * 
 * Revision 2.5  88/02/23  19:21:36  don
 * fixed pushpath() & poppath() so that pushing "" onto a path-stack
 * doesn't push  a '/' as well. for example, pushpath( "/bin", "") should
 * yield "/bin", instead of "/bin/". this was causing findparent() to fail
 * when track needs to create bin under the subsciber's mount-point.
 * 
 * Revision 2.4  88/01/29  18:24:02  don
 * bug fixes. also, now track can update the root.
 * 
 * Revision 2.3  87/12/03  19:50:02  don
 * moved SIGN macro to track.h.
 * 
 * Revision 2.2  87/12/03  17:31:15  don
 * fixed rt-port bug in dec_statfile's use of sscanf():
 * can't fill a short int directly, because sscanf will interpret it
 * as a long-int, and on the rt, short* gets converted to int* via
 * truncation!
 * 
 * Revision 2.1  87/12/01  16:44:54  don
 * fixed bugs in readstat's traversal of entries] and statfile:
 * cur_ent is no longer global, but is now part of get_next_match's
 * state. also, last_match() was causing entries[]'s last element to be
 * skipped.
 * 
 * Revision 2.0  87/11/30  15:19:43  don
 * general rewrite; got rid of stamp data-type, with its attendant garbage,
 * cleaned up pathname-handling. readstat & writestat now sort overything
 * by pathname, which simplifies traversals/lookup. should be comprehensible
 * now.
 * 
 * Revision 1.1  87/02/12  21:15:36  rfrench
 * Initial revision
 * 
 */

#ifndef lint
static char *rcsid_header_h = "$Header: stamp.c,v 4.8 88/06/21 19:44:36 don Locked $";
#endif lint

#include "mit-copyright.h"

#include "track.h"

/* XXX
 * convert right-shifted st_mode type-bits to corresponding formats:
 * S_IFCHR = 0020000 gets mapped to elt 1 of the array,
 * S_IFDIR = 0040000 => elt 2 of the array,
 * S_IFBLK = 0060000 => elt 3 of the array,
 * S_IFREG = 0100000 => elt 4 of the array,
 * S_IFLNK = 0120000 => elt 5 of the array,
 * S_IFSOCK= 0140000 => elt 6 of the array, ( only for error messagess),
 * S_IFMT  = 0170000 => elt 7 of the array, ( dropping 1 bit).
 */
static char *write_formats[] = {
	"*ERROR (write_statline): %s's file type is 0.\n",
	"c %s u%d g%d m%o %c d%d %c\n",		/* S_IFCHR */
	"d %s u%d g%d m%o %c %c %c\n",		/* S_IFDIR */
	"b %s u%d g%d m%o %c d%d %c\n",		/* S_IFBLK */
	"f %s u%d g%d m%o t%ld s%x %c\n",	/* S_IFREG */
	"l %s %c %c %c %c ->%s %c\n",		/* S_IFLNK */
	"*ERROR (write_statline): can't track socket %s.\n",
	"*ERROR (write_statline): bad type S_IFMT %s.\n"
};

static char *read_formats[] = {
	"",
	"u%d g%d m%o %1s d%d %1s\n",	/* S_IFCHR */
	"u%d g%d m%o %1s %1s %1s\n",	/* S_IFDIR */
	"u%d g%d m%o %1s d%d %1s\n",	/* S_IFBLK */
	"u%d g%d m%o t%ld s%x %1s\n",	/* S_IFREG */
	"%1s %1s %1s %1s ->%s %1s\n",	/* S_IFLNK */
	"",				/* S_IFSOCK */
	""				/* S_IFMT */
};

char type_char[] = " cdbfls*89ABCDEF";

#define COUNT(p) (*(int*)p[CNT])

/*
 * Place a time-stamp line in the format:
 *       type file uid.gid.mode.time
 * or a suitable derivate thereof
 */

write_statline( path, c)
char **path; struct currentness *c;
{
	char  *format, *linebuf, *name;
	char same_name = '=';
	unsigned int type;
	struct stat fromstat, *s;
	static struct stat dot_stat;
	unsigned size, content, time;

	if ( cur_line >= maxlines) {
		maxlines += MAXLINES;
		size = maxlines * sizeof statfilebufs[0];
		statfilebufs =
			(Statline *) ( cur_line ?
				       realloc( (char *) statfilebufs, size)
				      : malloc( size));
		if ( ! statfilebufs) {
			sprintf( errmsg, "alloc failed: %d statfile lines\n",
				 cur_line);
			do_panic();
		}
		/* XXX: setup at alloc-time, so it's infrequently done.
		 * sprintf needs these fields to be exactly one character long.
		 */
		UID( dot_stat) = GID( dot_stat) = RDEV( dot_stat) =
		     dot_stat.st_mode = ( unsigned short) '.';
		TIME(dot_stat) = ( unsigned int) '.';
	}
	/*
	 * set up type-dependent currency data:
	 */

	s = &c->sbuf;

	switch( type = TYPE( *s)) {
	case S_IFREG:
		content = c->cksum;
		time = TIME( *s);
		break;
	case S_IFLNK:
		content = (unsigned int) c->link;
		time = TIME( dot_stat);
		s = &dot_stat;
		break;
	case S_IFDIR:
		content = time = TIME( dot_stat);
		break;
	case S_IFBLK:
	case S_IFCHR:
		time = TIME( dot_stat);
		content = RDEV( *s);
		break;
	case S_IFSOCK:
		sprintf( errmsg, "can't track socket %s.\n", path[ ROOT]);
		do_gripe();
		return( type);
	case S_IFMT:
	default:
		sprintf( errmsg,
		"bad type for inode %d, pathname %s.\n\tapparent type = %c\n",
		    c->sbuf.st_ino, path[ ROOT], type_char[ type >> 13]);
		do_panic();
	}
	/* set up name & sortkey:
	 * root is a special case:
	 * its "relative path" is "", which dec_statfile() can't read.
	 */
	name = path[ NAME];
	if ( !*name) name = "/";

	strcpy( statfilebufs[ cur_line].sortkey, path[ COUNT( path)]);

	linebuf = statfilebufs[ cur_line].line;

	if	( ! strcmp( path[ NAME], c->name));
	else if ( (*statf)( path[ ROOT], &fromstat)) {
		sprintf( errmsg, "(write_statline) can't %s %s\n",
			 statn, path[ ROOT]);
		do_panic();
	}
	else {
		/* this entry's fromfile != cmpfile,
		 * so the subscribing machine needs to know:
		 */
		type = TYPE( fromstat);
		same_name == '~';
	}

	/* to choose printing format, convert type-bits to array-index:
	 * the formats specify 3-7 arguments, according to type:
	 */
	format = write_formats[ type >> 13];

	sprintf( linebuf, format, name, UID( *s), GID( *s), MODE( *s),
		 time, content, same_name);

	cur_line++;

	if ( verboseflag)
		fputs( linebuf, stderr);

	return( type);
}

fake_link( root, name, c) char *root, *name; struct currentness *c; {

	/* it is difficult to fool write_statline(),
	 * update_file(), and curr_diff() all at once.
	 * write_statline() can't take a normal currency in this case,
	 * because it can't know the subscriber's fromroot.
	 * curr_diff() needs to see a normal link's currency,
	 * or else unneccessary link-updates will occur.
	 * update_file() can use a normal currency to make a link.
	 */
	if ( name != c->name)		/* speed hack for dec_statfile() */
		strcpy( c->name, name);

	if ( *root)
		sprintf( c->link, "%s/%s", root, name);
	else	*c->link = '\0';	/* special case for write_statline() */

	c->cksum = 0;
	clear_stat( &c->sbuf);
	c->sbuf.st_mode = S_IFLNK;
}

sort_stat( i) int i; {

	if ( ! cur_line) return;

	/* NOTE: this qsort call assumes that each statfilebufs[] element
	 * begins with a sortkey as its first field.
	 */
        qsort( (char *) &statfilebufs[ i], cur_line - i,
		sizeof( statfilebufs[ 0]), strcmp);
}

sort_entries() {
	char *tail;
	Table list;
	Entry *A, *C;
	int i, j;

	list.table = NULL;
	list.shift = 0;

	/* NOTE: we assume that each entry begins with a sortkey string.
         * don't include entries[ 0] in the sort:
         */
        qsort( (char *)& entries[ 1], entrycnt - 1,
               sizeof(   entries[ 1]), strcmp);

	/* for each entry's fromfile (call it A),
	 * look for A's children amongst the subsequent entries,
	 * and add any that you find to A's exception-table.
	 * note that this may overfill the hash-table; in this event,
	 * we accept the performance-hit, and don't try to rehash.
	 */
	for (     A = &entries[ i = 1  ]; i < entrycnt; A = &entries[ ++i]) {
	    for ( C = &entries[ j = i+1]; j < entrycnt; C = &entries[ ++j]) {
		switch(  keyncmp( C->sortkey, i)) {
		case 1:  break;			/* get next A */
		case 0:  C->parent = i;
			 tail = C->fromfile + A->keylen;
			 while( '/' == *tail) tail++;
			 if ( A->names.table)
			     store( add_list_elt( tail, DONT_TRACK, NULL),
				    &A->names);
			 else add_list_elt( tail, DONT_TRACK, LIST( list));
		case -1: continue;	/* unlikely */
		}
		break; /* get next A */
	    }
	    /* if A doesn't already have an exception-list of names,
	     * then we've accumulated the list of descendant-entries
	     * in the list{} structure; convert it to a hash-table for A:
	     */
	    if ( ! list.table);
	    else if ( ! A->names.table) {
		     A->names.table = list.table;
		     A->names.shift = list.shift;
		     list.table = NULL;
		     list.shift = 0;
		     list2hashtable( &A->names);
	    }
	    else {
		     sprintf(errmsg, "sort_entries: internal error\n");
		     do_panic();
	    }
	}
}

/*
 * Decode a statfile line into its individual fields.
 * setup TYPE(), UID(), GID(), MODE(), TIME(), & RDEV() contents.
 */

char *
dec_statfile( line, c)
char *line; struct currentness *c;
{
	struct stat *s;
	char dummy[12], *format, *path, *same_name = "\0", type;
	int *content = (int *)&dummy[4], *time = (int *)&dummy[4];
	int d = 0, u = 0, g = 0, m = 0;

	/* these long-int temps are necessary for pc/rt compatibility:
	 * sscanf cannot scan into a short, though it may sometimes succeed
	 * in doing so. the difficulty is that it can't know about the
	 * target-integer's length, so it assumes that it's long.
	 * since the rt will truncate a short's addr, in order to treat
	 * it as a long, sscanf's data will often get lost.
	 * the solution is to give scanf longs, and then to convert these
	 * longs to shorts, explicitly.
	 */

	/* for speed, we laboriously parse the type-independent part
	 * of the statline without calling sscanf().
	 * thus, we avoid copying the pathname strings around,
	 * since the caller can re-use the originals,
	 * once they're broken out of the line-format.
	 */
	type = *line;
	path = line += 2;
	line = index( line, ' ');
	if ( ! line) {
		sprintf( errmsg, "garbled statfile: bad line =\n%s\n", line);
		sprintf( errmsg, "line has only one field\n");
		do_panic();
	}
	*line++ = '\0';

	/* in the  statfile, which contains only relative pathnames,
	 * "/" is the only pathname which can begin with a slash.
	 * in entries[], the root appears as "", which is more natural,
	 * because "" is "/"'s pathname relative to the mount-point fromroot.
	 * and because pushpath() prepends slashes in the right places, anyway.
	 * "" is hard to read with sscanf(), so we handle "/" specially:
	 */
	if ( '/' != *path);
	else if ( ! path[ 1]) *path = '\0';
	else {
		sprintf(errmsg, "statfile passed an absolute pathname: %s\n",
			path);
		do_gripe();
	}
	*c->link = '\0';
	c->cksum = 0;

	/*
	 * set up scanf arg's for type-dependent currency-data:
	 */

	s = &c->sbuf;
	clear_stat( s);

	switch( type) {
	case 'f':
		s->st_mode = S_IFREG;
		content = (int*) &c->cksum;
		time =    (int*) &s->st_mtime;
		break;
	case 'l':	/* more common than dir's */
		s->st_mode = S_IFLNK;
		content = (int*) c->link;
		break;
	case 'd':
		s->st_mode = S_IFDIR;
		content = (int *)&dummy[4];
		break;
	case 'b':
		s->st_mode = S_IFBLK;
		content = &d;
		break;
	case 'c':
		s->st_mode = S_IFCHR;
		content = &d;
		break;
	default:
		sprintf( errmsg, "garbled statfile: bad line =\n%s\n", line);
		sprintf( errmsg, "first char isn't a file-type [fldbc]\n");
		do_panic();
	}
	/* if we've already parsed the line,
	 * as in S_IFLNK case, skip the sscanf call:
	 */
	if ( *(format = read_formats[ s->st_mode >> 13]))
		sscanf( line, format, &u, &g, &m, time, content, same_name);

	s->st_uid   = (short) u;
	s->st_gid   = (short) g;
	s->st_mode |= (short) m & 07777;
	s->st_rdev  = (short) d;

	if ( type == 'l' && ! *same_name) {
	    *same_name = *(char*)content;
	    fake_link( fromroot, c->name, c);
	}
	/* the subscriber needs to know whether the currency-data
	 * came from the remote fromfile or from the remote cmpfile.
	 */
	switch( *same_name) {
	case '=': strcpy( c->name, path);
		  break;
	case '~': *c->name = '\0';
		  break;
	default:
		sprintf( errmsg, "garbled statfile: bad line =\n%s\n", line);
		do_gripe();
		sprintf( errmsg, "bad equality flag = %s\n", same_name);
		do_panic();
	}
	return( path);
}

/* the match abstractions implement a synchronized traversal
 * of the entries[] array & the statfile. both sets of data
 * are sorted by strcmp() on their sortkeys, which are made
 * via the KEYCPY macro.
 */

/* XXX: not the best, but not the worst approach either.
 * this  allows main() to call readstat() twice, with correct initialization.
 */
int prev_ent = 0;;

init_entry( ent) int ent; {
	prev_ent = 0;
}

/* small_ancestor:
 * even when we know that path begins with an entry's fromfile,
 * path may nevertheless match other entries.
 * for example, the path /a/b/c matches the entries /a & /a/b.
 * we want the longest entry that matches.
 * look-ahead to find last sub-list entry that matches
 * the current pathname: this works because by keyncmp,
 *	    /	is greater than		/Z...
 *	   |	matches			/a
 *	   |	is greater than		/a/a
 *	   \	matches			/a/b
 * /a/b/c  <	is greater than		/a/b/a
 *	   /	matches			/a/b/c
 *	   |	is less than		/a/b/c/a
 *	   |	is less than		/a/b/d
 *	    \	is less than		/a/c...
 * NOTE that the right-hand column is sorted by key,
 * and that the last match is the longest.
 */
int
small_ancestor( path, great) char *path; int *great; {
	int i, small;
	char key[ LINELEN];

	KEYCPY( key, path);

	for ( ; *great < entrycnt; (*great)++) {
		switch ( keyncmp( key, *great)) {
		case 0:  break;
		case -1: return(0);		/* advance path's key */
		case 1:  continue;		/* advance great's key */
		}
		break; /* quit loop */
	}
	if ( *great == entrycnt) return( -1); /* out of entries */

	/* now *great is the greatest ancestor of path.
	 * we continue searching the table for path's smallest ancestor:
	 */
        for ( i = 1 + ( small = *great); i < entrycnt; i++) {
                switch ( keyncmp( key, i)) {
		case -1: break;			/* quit at   /a/b/d */
		case 0:  small = i;		/* remember  /a or /a/b */
		case 1:  continue;		/* skip over /a/a */
		}
		break; /* quit loop */
	}
	return( small);
}
/* read tree-structure recorded by sort_entries:
 */
int
great_ancestor( ent)
int ent;
{
	int i;

	if ( i = entries[ ent].parent)
		return( great_ancestor( i));
		
	return( ent);
}


/* compare the path r with i's sortkey in the following way:
 * if the key is a subpath of r, return 0, as a match.
 * if the key is < or > r, return 1 or -1  respectively.
 */
int
keyncmp( r, i) char *r; int i; {
	char *l;
	int diff, n;

	l = entries[ i].sortkey;
	n = entries[ i].keylen;

	diff = SIGN( strncmp( r, l, n));

	/* if diff == 0 & n != 0 ( l isn't root) &
	 * r[n] == '\0' or '\001', we have a match.
	 * if n == 0, l is root, which matches everything.
	 */
	return( diff? diff: n ? ( (unsigned) r[n] > '\001') : 0);   /* XXX */
}

struct currentness *
dec_entry( ent, fr, to, cmp, tail)
int ent; char *fr[], *to[], *cmp[], *tail; {
        static struct currentness currency_buf, *entry_currency;
	int i;
	Entry *e;
	static int xref_flag = 0;

	/* this routine's main purpose is to transfer ent's contents
	 * to the paths fr, to, & cmp.
	 * for efficiency and data-hiding reasons, we maintain various static
	 * data, including the currentness data for readstat's last update.
	 */

	/* we avoid calling get_currentness(cmp) redundantly, but not just
	 * for efficiency reasons: this helps our update-simulation for
	 * nopullflag-support. see update_file().
	 * each cmpfile may get updated at most once;
	 * if it does, update_file() will mark its currency "out-of-date".
	 * xref_flag is set if any entry uses another entry's file
	 * as a cmpfile. in this case, we have to search entries[]
	 * to see if what we've modified is another entry's cmpfile.
	 * if so, propagate the "out-of-date" mark to that entry.
	 */
	if ( xref_flag && updated( &currency_buf, NULL)) {
		for ( e = &entries[ i = 1]; i <= entrycnt; e = &entries[ ++i]) {
			if ( updated( &e->currency, NULL) ||
			     strcmp(   e->cmpfile,   currency_buf.name));
			else updated( &e->currency, &currency_buf);
		}
	}
	currency_buf.sbuf.st_mode = S_IFMT;	/* kill short-term data. */

	if (    prev_ent != ent) {
		prev_ent =  ent;

		/* a subtler, longer search would set this flag less often.
		 */
		if ( ! writeflag)
			xref_flag = strncmp( entries[ ent].cmpfile,
					     entries[ ent].tofile,
				     strlen( entries[ ent].tofile));

		poppath( fr); pushpath( fr,  entries[ ent].fromfile);
		poppath( to); pushpath( to,  entries[ ent].tofile);
		poppath(cmp); pushpath( cmp, entries[ ent].cmpfile);
		
		/* this function-var is global, and used generally.
		 */
		statf = entries[ ent].followlink ?  stat  :  lstat;
		statn = entries[ ent].followlink ? "stat" : "lstat";

		entry_currency = &entries[ ent].currency;
	}
	if ( updated( entry_currency, NULL))
		get_currentness( cmp, entry_currency);

	if ( ! tail || ! *tail )
		return( entry_currency);

	if ( S_IFDIR == TYPE( entry_currency->sbuf)) { /* usual case */
		pushpath( cmp, tail);
		get_currentness( cmp, &currency_buf);
		poppath( cmp);
		return( &currency_buf);
	}
	/* rarely, a directory may have a non-dir as its cmpfile.
	 * if the entry's cmpfile isn't a dir, then cmp[ ROOT],
	 * is the comparison-file for each of the tofile's dependents.
	 */
	return( entry_currency);
}

/* these routines handle a stack of pointers into a character-string,
 * which typically is a UNIX pathname.
 * the first element CNT of the stack points to a depth-counter.
 * the second element ROOT points to the beginning of the pathname string.
 * subsequent elements point to terminal substrings of the pathname.
 * a slash precedes each element.
 */
char **
initpath( name) char *name; {
	char **p;

	/* for each stack-element, alloc a pointer 
	 * and 15 chars for a filename:
	 */
	p =      (char **) malloc( stackmax * sizeof NULL);
	p[ CNT] = (char *) malloc( stackmax * 15 + sizeof ((int) 0));
	COUNT( p) = 1;
	p[ ROOT] = p[ CNT] + sizeof ((int) 1);
	strcpy( p[ ROOT], name);
	p[ NAME] = p[ ROOT] + strlen( name);
	return( p);
}
int
pushpath( p, name) char **p; char *name; {
	char *top;

	if ( ! p) return( -1);
	if ( ++COUNT( p) >= stackmax) {
		sprintf( errmsg, "%s\n%s\n%s %d.\n",
			"path stack overflow: directory too deep:", p[ ROOT],
			"use -S option, with value >", stackmax);
		do_panic();
	}
	if ( *name) *p[ COUNT( p)]++ = '/';
	top = p[ COUNT( p)];
	strcpy( top, name);
	p[ COUNT( p) + 1] = top + strlen( top);
	return( COUNT( p));
}
poppath( p) char **p; {
	if ( ! p) return;
	else if ( 1 >= COUNT( p)) {
		sprintf(errmsg,"can't pop root from path-stack");
		do_panic();
	}
	else if ( *p[ COUNT( p)]) p[ COUNT( p)]--;
		/* non-null last elt; remove its initial slash */
	*p[  COUNT( p)--] = '\0';
	return;
}
