/************************** SOUNDEX4.C *************************
 * Optimized Soundex Algorithm (Algorithm #4)
 * Author: Joe Celko
 * Compilers: Turbo C 2.0, Microsoft C 5.0
 *
 * Compile time switches:
 *  TEST to get a test driver
 *
 * Source code may be freely used if source is acknowledged
 * Object code may be freely used
 */

/* Preserves first character, uppercase name, drop non-alphas
 * convert letters to Soundex digits, and returns first n letters.
 * Many of the steps here could be combined into the same loop,
 * but they are kept separate for clarity and to give the user
 * a chance to experiment with changes.
 */

#define TEST 100

#include <string.h>

void soundex4 (char *inname,  /* name to be transformed */
          char *outcode,      /* where to put soundex code */
          int n               /* length of soundex code */
		 )
{
	char *p, *p1;
	int i;
	char workbuf[100];  /* that should be long enough */
	char priorletter;   /* for duplicate checking */

	/* make a working copy */
	strncpy(workbuf, inname, 99);
	workbuf[99] = '\0'; /* just in case */
	strupr(workbuf);

	/* convert all vowels to A */
	for (p = workbuf; *p; p++)
		if (strchr("AEIOUY", *p))
			*p = 'A';

	/* prefix transformations: done only once on the front of a name */
	if (strncmp(workbuf, "MAC", 3) == 0)      /* MAC to MCC */
		workbuf[1] = 'C';
	else if (strncmp(workbuf, "KN", 2) == 0)  /* KN to NN */
		workbuf[0] = 'N';
	else if (workbuf[0] == 'K')               /* K to C */
		workbuf[0] = 'C';
	else if (strncmp(workbuf, "PF", 2) == 0)  /* PF to FF */
		workbuf[0] = 'F';
	else if (strncmp(workbuf, "SCH", 3) == 0) /* SCH to SSS */ {
		workbuf[1] = 'S';
		workbuf[2] = 'S';
	}


	/* infix transformations: done after the first letter,
	 * and are from left to right on the name
	 */
	while ((p = strstr(workbuf, "DG")) > workbuf)    /* DG to GG */
		p[0] = 'G';
	while ((p = strstr(workbuf, "CAAN")) > workbuf)  /* CANN to TAAN */
		p[0] = 'T';
	while ((p = strchr(workbuf, 'D')) > workbuf)    /* D to T */
		p[0] = 'T';
	while ((p = strstr(workbuf, "NST")) > workbuf)  /* NST to NTT */
		p[2] = 'S';
	while ((p = strstr(workbuf, "AV")) > workbuf)   /* AV to AF */
		p[1] = 'F';
	while ((p = strchr(workbuf, 'Q')) > workbuf)    /* Q to G */
		p[0] = 'G';
	while ((p = strchr(workbuf, 'Z')) > workbuf)    /* Z to S */
		p[0] = 'S';
	while ((p = strchr(workbuf, 'M')) > workbuf)    /* M to N */
		p[0] = 'N';
	while ((p = strstr(workbuf, "KN")) > workbuf)   /* KN to NN */
		p[0] = 'N';
	while ((p = strchr(workbuf, 'K')) > workbuf)    /* K to C */
		p[0] = 'C';
    while ((p = strstr(workbuf, "AH")) > workbuf)   /* AH to AA */
		p[1] = 'A';
    while ((p = strstr(workbuf, "HA")) > workbuf)   /* HA to AA */
		p[0] = 'A';
    while ((p = strstr(workbuf, "AW")) > workbuf)   /* AW to AA */
		p[1] = 'A';
    while ((p = strstr(workbuf, "PH")) > workbuf)   /* PH to FF */ {
		p[0] = 'F';
		p[1] = 'F';
	}
    while ((p = strstr(workbuf, "SCH")) > workbuf) {   /* SCH to SSS */
        p[1] = 'S'; p[2] = 'S';
	};

	/* suffix transformations: done on the end of the word,
	 * going right to left
	 */

	/* (1) remove terminal A's and S's */
	i = strlen(workbuf) - 1;
    for (; (i > 0) && (workbuf[i] == 'S' || workbuf[i] == 'A'); i--)
        workbuf[i] = '\0';

	/* (2) terminal NT  to TT */
	i = strlen(workbuf) - 1;
	for (; (i > 0) && (workbuf[i-1] == 'N') && (workbuf[i] == 'T'); i--)
		workbuf[i-1] = 'T';

	/* now strip out all vowels except the first (remember that all
	 * vowels were transformed into 'A's earlier).
	 */
	p = p1 = workbuf;
	while (*p1++ = *p++) {
		while (*p == 'A')
            p++;
	}

	/* Remove all duplicate letters.
	 *  Note this is different from the Soundex3 (see article) duplicate
	 *  cleanup because the letter transforms can create duplicates
	 *  at the front of the output name.
	 */
	p = p1 = workbuf;
	priorletter = '\001'; /* an unlikely value */
	do {
		while (*p == priorletter)
            p++;
		priorletter = *p;
	} while (*p1++ = *p++);

	strncpy(outcode, workbuf, n);
	outcode[n] = '\0'; /* just in case */
}

#if defined (TEST)

#include <stdio.h>
#include <stdlib.h>
void main(int argc, char **argv)
{
    char outbuf[50];

    if (argc != 3) {
        puts("Usage: soundex4 name length\n");
        exit(0);
    }

    soundex4(argv[1], outbuf, atoi(argv[2]));

    printf(" Result: %s\n", outbuf);
}
#endif