
/*
**
**	Copyright (c) 1988, Robert L. McQueer
**		All Rights Reserved
**
** Permission granted for use, modification and redistribution of this
** software provided that no use is made for commercial gain without the
** written consent of the author, that all copyright notices remain intact,
** and that all changes are clearly documented.  No warranty of any kind
** concerning any use which may be made of this software is offered or implied.
**
*/

#include <stdio.h>
#include "node.h"
#include "config.h"

extern NODE *RatList[];
extern int Verbosity;
extern int Zflag;

NODE *path_find();

/*
** topological sort.  This destroys the refcounts, which are assumed
** at this point to accurately reflect the DEP records.  The sense of the
** sort is that files containing a definition come before their
** dependent files, ie. the inclusion order for the files.  If you
** want to consider arrows drawn from referring file to defining file,
** this is a "backwards" sort, finding the terminal nodes first - of
** course, if you want to draw the arrows the other direction, you
** are finding the originating nodes first, but then you have "backwards"
** arrows :-).
**
** once you follow out the conventions for the data structures involved
** this is pretty much the textbook standard topological sort - find
** a node with no successors (predecessors, if you want to draw the
** arrows the other way), that's the next node.  Decrement successor
** (predecessor) counts on all its predecessors (successors), and iterate.
** Failure to find a node indicates circularity.
**
** Circular references are resolved by finding a cycle, arbitrarily
** choosing one of its nodes, and zapping one of the dependencies
** from consideration for future cycle detection.  I THINK the set
** of cycles we wind up with is a fundamental set.
*/
rat_top_sort()
{
	NODE *flist, *tail, *ptr, *pred, *pp;
	char bufr[CATBUFFER+40]; /* see path_find() */

	/*
	** list is reverse of user's original order - re-reverse,
	** unless z-option was used (we'll reverse at end in that case)
	** Basically, we want the sort to reflect the user's order
	** as closely as possible.
	*/
	if (!Zflag)
	{
		flist = NULL;
		for (pp = RatList[FNAME]; pp != NULL; pp = ptr)
		{
			ptr = pp->next;
			pp->next = flist;
			flist = pp;
		}
		RatList[FNAME] = flist;
	}

	/* clear all the edge marks */
	for (pp = RatList[DEP]; pp != NULL; pp = pp->next)
		pp->d.dep.erase = 0;

	/*
	** we will build up the sorted list on flist, removing them from
	** RatList[FNAME] as we go.
	*/
	tail = flist = NULL;
	while (RatList[FNAME] != NULL)
	{

		/*
		** find an item with no references.  As long as there
		** are no circular references, there should be one
		*/
		pred = NULL;
		for (ptr = RatList[FNAME]; ptr != NULL; ptr = ptr->next)
		{
			if (ptr->d.fname.refcount == 0)
				break;
			pred = ptr;
		}

		/*
		** if no zero refcounts, find a cyclical reference, ie.
		** a node with a path back to itself, traversing entirely
		** nodes with non-zero refcounts.  Then arbitrarily use
		** that node.
		*/
		if (ptr == NULL)
		{
			if (Verbosity > 2)
				fprintf(stderr,
				"NO UNREF'ED files - finding a cycle\n");
			pred = NULL;
			for (ptr = RatList[FNAME]; ptr != NULL; ptr = ptr->next)
			{
				if ((pp = path_find(ptr,ptr,bufr,0)) != NULL)
					break;
				pred = ptr;
			}
			if (ptr == NULL)
				fatal("logic error - topological sort");

			/* enter a cycle */
			new_cycle(bufr);

			/*
			** this is tricky - we'll arbitrarily use this node,
			** but we have to take care with marks.  path_find
			** passes back the last edge in the cycle - we mark
			** the edge to exclude it from future cycle detections.
			** The path is constructed in such a way that this
			** edge represents a file which depends upon ptr -
			** its refcount will be taken care of below.  ptr
			** will still have a positive refcount, even though
			** we are removing it from the list.  This will be
			** cleared out when we pick up the appropriate
			** neighbor in the cycle - note that we have to very
			** careful because we may have lots of overlapping
			** cycles, and we don't want to pick up the same one
			** over again.
			*/
			pp->d.dep.erase = 1;
			if (Verbosity > 2)
				fprintf(stderr,"cycle edge: ref %s, def %s\n",
					(pp->d.dep.rfile)->key.name,
					(pp->d.dep.dfile)->key.name);
		}

		/* take off RatList */
		if (pred == NULL)
		{
			RatList[FNAME] = ptr->next;
			if (Verbosity > 2)
				fprintf(stderr,"found list head: %s\n",
							ptr->key.name);
		}
		else
		{
			pred->next = ptr->next;
			if (Verbosity > 2)
				fprintf(stderr,"found %s after %s\n",
						ptr->key.name,pred->key.name);
		}

		if (flist == NULL)
			flist = ptr;
		else
			tail->next = ptr;
		ptr->next = NULL;
		tail = ptr;

		/*
		** run down the list of dependencies (our "edge-list", really)
		** and decrement the refcount of all nodes referring to
		** this one
		*/
		for (pred = RatList[DEP]; pred != NULL; pred = pred->next)
		{
			if (pred->d.dep.dfile != ptr)
				continue;
			if (Verbosity > 3)
				fprintf(stderr,"decrement %s ref count\n",
					(pred->d.dep.rfile)->key.name);
			--((pred->d.dep.rfile)->d.fname.refcount);
		}
	}

	/* replace list with sorted one */
	RatList[FNAME] = flist;

	/* for -z option, reverse the list */
	if (Zflag)
	{
		flist = NULL;
		for (pp = RatList[FNAME]; pp != NULL; pp = ptr)
		{
			ptr = pp->next;
			pp->next = flist;
			flist = pp;
		}
		RatList[FNAME] = flist;
	}
}

/*
** path_find routine is a recursive DFS to find a path.  Used only to
** find cycles.  Returns NULL if no path, fills in bufr with path if there
** is one.  Pointer returned is DEP node for final path edge.  Top level
** call should have count = 0, flagging top entry.
*/
static NODE *
path_find(from,to,buf,count)
NODE *from;
NODE *to;
char *buf;
int count;
{
	NODE *ptr, *p2;
	char *str1;
	char *str2;
	int len;

	/* on top-level call, initialize marks, put "from" in buffer */
	if (count <= 0)
	{
		for (ptr = RatList[FNAME]; ptr != NULL; ptr = ptr->next)
			ptr->d.fname.mark = 0;
		strcpy(buf,from->key.name);
		buf += (count = strlen(buf));
	}

	/* for space character, and to assure positive */
	++count;

	/* mark that we've been here */
	from->d.fname.mark = 1;

	/*
	** we WILL look for a direct path first
	** - IGNORE mark because original from may = to, and has
	** already been marked
	*/
	for (ptr = from->d.fname.dep; ptr != NULL; ptr = ptr->d.dep.next)
	{
		/* if "erased" edge, ignore */
		if (ptr->d.dep.erase)
			continue;

		if (ptr->d.dep.dfile == to)
		{
			str1 = to->key.name;
			str2 = ptr->d.dep.sym;
			len = strlen(str1) + strlen(str2) + 4;
			if ((count += len) > CATBUFFER)
				strcpy(buf," ....");
			else
				sprintf(buf," (%s) %s",str2,str1);
			return (ptr);
		}
	}

	/* USE mark on recursive call */
	for (ptr = from->d.fname.dep; ptr != NULL; ptr = ptr->d.dep.next)
	{
		/* if "erased" edge, ignore */
		if (ptr->d.dep.erase)
			continue;

		/* if visited node, ignore */
		if ((ptr->d.dep.dfile)->d.fname.mark)
			continue;

		str1 = (ptr->d.dep.dfile)->key.name;
		str2 = ptr->d.dep.sym;
		len = strlen(str1)+strlen(str2)+4;
		if ((count += len) < CATBUFFER)
			sprintf(buf," (%s) %s",str2,str1);

		p2 = path_find(ptr->d.dep.dfile, to, buf+len, count);
		if (p2 != NULL)
			return(p2);
		count -= len;
	}

	return(NULL);
}
