/************************************************************************
 *									*
 *			Copyright (c) 1982, Fred Fish			*
 *			    All Rights Reserved				*
 *									*
 *	This software and/or documentation is released for public	*
 *	distribution for personal, non-commercial use only.		*
 *	Limited rights to use, modify, and redistribute are hereby	*
 *	granted for non-commercial purposes, provided that all		*
 *	copyright notices remain intact and all changes are clearly	*
 *	documented.  The author makes no warranty of any kind with	*
 *	respect to this product and explicitly disclaims any implied	*
 *	warranties of merchantability or fitness for any particular	*
 *	purpose.							*
 *									*
 ************************************************************************
 */


/*
 *  FILE
 *
 *	hashtbl.c   generic hash table utility routines
 *
 *  KEYWORDS
 *
 *	hash table
 *	hash function
 *
 *  DESCRIPTION
 *
 *	In many programming applications, a need exists for
 *	a relatively simple set of utility routines for
 *	managing a lookup table.  Typically, information
 *	is accessed by a single "key", such as a person's
 *	name, a programming variable character string, etc.
 *
 *	These utility routines provide useful table manipulation
 *	routines for small to medium sized tables.  The method
 *	used is a hash table, with all entries hashing to a
 *	particular value stored in a linked list.
 *
 *	Each table entry points to an arbitrary structure (tbl_data)
 *	defined externally, which contains the actual information
 *	associated with that entry (including the name of the entry).
 *	To keep these routines generic, only pointers to the
 *	structure are manipulated.  Specifically note that the
 *	actual storage associated with the information in each
 *	entry must be allocated external to these routines.
 *
 *  FUNCTIONS
 *
 *	tbl_find   find a table entry and return pointer
 *	tbl_add    add an entry to the table
 *	dump_tbl   dump the hash table for debugging
 *
 *  BUGS
 *
 *	Current hash function is very primitive and may give
 *	a very unbalanced table for some classes of strings.
 *
 *  AUTHOR
 *
 *	Fred Fish
 *
 */

#include <stdio.h>
#include "hashtbl.h"

struct tbl_entry {		/* Structure for each table entry */
    char *name;			/* Table entry name */
    struct tbl_data *tdp;	/* Structure defined in "hashtbl.h" */
    struct tbl_entry *next;	/* Next entry with same hash value */
};

#define SCR_WIDTH 80		/* Screen width for table dump */
#define SMATCH 0		/* Value when strings are same */

extern int debug;		/* Debug flag */
extern int trace;		/* Trace flag */

static struct tbl_entry *hash_table[HASH_SIZE];


/*
 *  FUNCTION
 *
 *	tbl_find   find a table entry and return struct pointer
 *
 *  KEY WORDS
 *
 *	table lookup
 *	hash table
 *
 *  SYNOPSIS
 *
 *	struct tbl_data *tbl_find(name)
 *	char *name;
 *
 *  DESCRIPTION
 *
 *	Given the name of an entry in the table, this routine
 *	attempts to look it up and return a pointer to it's
 *	associated structure.   If no entry of the specified
 *	name is found then a NULL is returned.
 *
 *	The table is structured as a number of linked lists 
 *	with the pointer to the first link stored in a hash table.
 *	Each entry in a specific linked list hashes to the same
 *	value.  For example:
 * 
 *	    	HASH TABLE      LINKED LIST
 *	    	----------	-----------------------
 *
 *	    	   .
 *	    	   .
 *	    	  NULL
 *	    	  "12"	   =>	name1 => name2 => name3 
 *	    	  NULL
 *	    	  "31"	   =>	name4
 *	    	  "32"	   =>	name5 => name6
 *	    	   .
 *	    	   .
 *	    	
 *  RETURNS
 *
 *	Returns pointer to structure if the entry is found.  Returns
 *	NULL if no entry has specified name.
 *
 */

/*
 *  PSEUDO CODE
 *
 *	Begin tbl_find.
 *	    If pointer to name is not NULL then
 *	        Get pointer to first entry for current hash value.
 *	        While there is a table entry to test for a name match
 *	            If the names match then
 *	    	        Return pointer to the structure for the entry.
 *	            Else
 *	    	        Lookup the next table entry for the hash value.
 *	            End if
 *	        End while
 *	    End if
 *	    Return NULL for no entry found.
 *	End tbl_find.
 *
 */
 
struct tbl_data *tbl_find(name)
char *name;
{
    struct tbl_entry *lookup;

    if(trace) {printf("beginning table_find\n");}
    if (name != NULL) {
        lookup = hash_table[hash(name)];
        while (lookup != NULL) {
	    if (strcmp(lookup->name,name) == SMATCH) {
	        return (lookup->tdp);
	    } else {
	        lookup = lookup->next;
	    }
	}
    }
    return ((struct tbl_data *)NULL);
}

/*
 *  FUNCTION
 *
 *	tbl_add   add an entry to the table
 *
 *  KEY WORDS
 *
 *	hash table
 *	table insertions
 *
 *  SYNOPSIS
 *
 *	struct tbl_data *tbl_add(new_data)
 *	struct tbl_data *new_data;
 *
 *  DESCRIPTION
 *
 *	This routine is responsible for adding entries to the table.
 *	Note that to avoid any possibility of duplicate entries, 
 *	tbl_add first calls tbl_find to see if an entry exists.
 *	This usually results in duplication of effort since most
 *	calls to tbl_add are already preceded by a call to
 *	tbl_find.  However duplicate entries are a serious matter
 *	and every effort should be made to avoid them.
 *
 *  RETURNS
 *
 *	Returns pointer to the table data structure if add is
 *	successful, NULL otherwise (such as duplicate entry).
 *
 */

/*
 *  PSEUDO CODE
 *
 *	Begin tbl_add.
 *	    If the pointer to the entry name is not valid then
 *	        Return a NULL.
 *	    Else if the entry already exists then
 *		Return a NULL.
 *	    Else
 *	        Allocate new memory for the new table entry.
 *		If allocation fails then
 *		    Return a NULL.
 *		Else
 *	            Save pointer to the entry name in the table entry.
 *	            Save the structure pointer in the table entry.
 *	            Initialize the pointer to the next table entry to NULL.
 *	            Compute the hash value for the entrie's name.
 *	            If the hash table currently contains no entry for value
 *	    	        Start a new table entry chain with hash value.
 *	            Else
 *	    	        While there is another link in the current chain
 *	    	            Skip over current and go to the next link.
 *	    	        End while
 *	    	        Add the new link at the end of the chain.
 *	            End if
 *		    Return pointer to struct (for success).
 *		End if
 *	    End if
 *	End tbl_add
 *
 */


struct tbl_data *tbl_add(new_data)
struct tbl_data *new_data;
{
    struct tbl_entry *new, *scan;
    int hash_value;
    char *get_memory();

    if(trace) {printf("beginning tbl_add\n");}
    if (new_data->name == NULL) {
	return((struct tbl_data *)NULL);
    } else if (tbl_find(new_data->name) != NULL) {
	return((struct tbl_data *)NULL);
    } else {
        new = (struct tbl_entry *) get_memory(sizeof(struct tbl_entry));
	if (new == NULL) {
	    return((struct tbl_data *)NULL);
	} else {
	    new->name = new_data->name;
	    new->tdp = new_data;
	    new->next = NULL;
	    hash_value = hash(new->name);
	    if ((scan = hash_table[hash_value]) == NULL) {
	        hash_table[hash_value] = new;
	    } else {
	        while (scan->next != NULL) {
		    scan = scan->next;
	        }
	        scan->next = new;
	    }
	    return(new_data);
	}
    }
}

/*
 *  FUNCTION
 *
 *	hash   compute hash value for a string
 *
 *  KEY WORDS
 *
 *	hash table
 *	hash function
 *
 *  SYNOPSIS
 *
 *	int hash(string)
 *	char *string;
 *
 *  DESCRIPTION
 *
 *	Takes an input string and returns a hash value
 *	for that string.  Note that the algorithm used has the merit
 *	of extreme simplicity but is by no means the best.
 *
 *  RETURNS
 *
 *	Hash value of string.
 *
 */

/*
 *  PSEUDO CODE
 *
 *	Begin hash.
 *	    Initialize hash value to be zero.
 *	    While there is a character left in input string.
 *	        Sum the character's value into the hash value.
 *	    End while
 *	    Return the value (modulo the hash table size).
 *	End hash.
 *
 */

static int hash(string)
char *string;
{
    register int hash_value;

    if(trace) {printf("beginning hash\n");}
    hash_value = 0;
    while (*string != NULL) {
	hash_value += *string++;
    }
    hash_value %= HASH_SIZE;
    return (hash_value);
}

/*
 *  FUNCTION
 *
 *	dump_tbl   dump hash table for debugging purposes
 *
 *  KEY WORDS
 *
 *	hash table
 *
 *  SYNOPSIS
 *
 *	dump_tbl()
 *
 *  DESCRIPTION
 *
 *	Prints the table contents on the standard output
 *	in the format:
 *
 *		nnn:    name : name : name : name ...
 *
 *	Where "nnn" is the hash value (index into the table) and
 *	"name" is the name of an entry.  The names are printed
 *	in the same order they are encountered in the linked list, and
 *	all have the same hash value.
 *
 *	Note that the distribution of names can give some indication of
 *	the suitability of the particular hash function used.
 *
 */

/*
 *  PSEUDO CODE
 *
 *	Begin dump_tbl
 *	    For each hash table entry in the table
 *		If the hash table entry points to a chain then
 *		    Initialize output buffer with hash value output.
 *		    For each entry in the linked list chain
 *			If adding name would overflow buffer then
 *			    Print the buffer.
 *			    Reinitialize buffer for additional line.
 *			End if
 *			Add entry name to buffer
 *		    End for
 *		    Print the buffer.
 *		End if
 *	    End for
 *	End dump_tbl
 *
 */

dump_tbl()
{
    char buffer[SCR_WIDTH];
    int hash_index;
    struct tbl_entry *tp;

    for (hash_index=0; hash_index<HASH_SIZE; hash_index++) {
	if (hash_table[hash_index] != NULL) {
	    sprintf(buffer,"%d ",hash_index);
	    for (tp=hash_table[hash_index]; tp != NULL; tp=tp->next) {
		if ((strlen(tp->name) + strlen(buffer) + 4) > SCR_WIDTH) {
		    printf("%s\n",buffer);
		    sprintf(buffer,"\t");
		}
		strcat(buffer," : ");
		strcat(buffer,tp->name);
	    }
            printf("%s\n",buffer);
	}
    }
}

/*
 *  FUNCTION
 *
 *	get_memory   allocate more main memory and zero it
 *
 *  KEY WORDS
 *
 *	memory allocator
 *
 *  SYNOPSIS
 *
 *	char *get_memory(how_much)
 *	unsigned int how_much;
 *
 *  DESCRIPTION
 *
 *	This routine is responsible for all memory allocation 
 *	requests.  If the amount of memory requested cannot be 
 *	allocated for any reason, an error message is printed
 *	and NULL is returned.
 *
 *	To avoid any question about the contents of the newly
 *	allocated memory (some systems zero it, others don't),
 *	we explicitly zero the memory.
 *
 *  RETURNS
 *
 *	Pointer to allocated memory or NULL if memory allocation
 *	fails.
 *
 */

/*
 *  PSEUDO CODE
 *
 *	Begin get_memory
 *	    Attempt to allocate the requested number of bytes.
 *	    If allocation attempt failed then
 *	        Print memory allocation failure message.
 *		Return NULL.
 *	    Else
 *	        For each byte in new memory.
 *	    	    Zero the byte.
 *	        End for
 *	    End if
 *	    Return pointer to the memory which was allocated.
 *	End get_memory
 *
 */

char *get_memory(how_much)
unsigned int how_much;
{
    char *memory, *mem;
    char *malloc();
    int count;

    if(trace) {printf("beginning get_memory\n");}
    memory = malloc(how_much);
    if (memory == NULL) {
	fprintf(stderr,"?hashtbl: unable to allocate more memory!\n");
	return(NULL);
    } else {
	mem = memory;
	for (count=0; count<how_much; count++) {
	    *mem++ = 0;
	}
    }
    return(memory);
}
