/* Program Name: soundex4.c
*  Author: C. FORSYTH
*
*  (c) 1988 by Charles F. Forsyth
*
*  Created: 5/23/1988 at 11:27                              
*                                                          
* Revision: 1.2 Last Revised: 6/29/1988 at 14:22                         *
*                                                          
*                                                          
*************************** ALL RIGHTS RESERVED **************************
*
*          You are free to copy and distribute this source code
*          as well as incorporate it in your own programs as long
*          as the copyright notice is retained and the resulting
*          application is not for commercial use.
*          You may contact the author at (602) 995-4244
*          or address mail to C. Forsyth, 7760 N. 19th Dr.,
*          Phoenix, AZ 85021
*
*
****************************************************************************
*            Change history:
*  Rev. 1.2 6-29-88
*  Revised code to correct for problem encountered when only a single
*  character was passed as the string for which a soundex code was to
*  to be generated. Also changed the algorithm so that an invalid
*  string (control characters, etc.) would result in a return code
*  of as many spaces as the requested number of numerics.
****************************************************************************
*
*	SOUNDEX ALGORITHM in C	
*			 				  
*                Variations on a theme
*
*					  
*	The difference between this function and a 'plain'
*  soundex function is that this one allows variations
*  of the return code as well as the ability for the caller
*  to specify the number of ascii numeric codes to be allowed
*  in the return string (3 is the default, 5 is maximum as coded here)
*
*	SYNTAX:   soundex4( <exp1C>,[<exp2N>],[<exp3N>] )
*  Where:    <exp1C> is the only required parameter -- the input
*                    string for which the soundex code is wanted
*
*            <exp2N> is the variation as described in the table below
*
*            <exp3N> is the number of ascii numeric codes desired
*	
*	Variation #:
*                  
*     1 - default  -- first letter of input string is             
*         returned as the first character of the             
*         return code. The numeric code for the first
*         character is not included in the return code.
*         This is essentially the "Soundex" method
*         described by Knuth in his book, "The Art of
*         Computer Programming, Volume 3, Sorting and
*         Searching", page 392. The Tom Rettig and Nantucket
*         implementations of the algorithm differ in that
*         the Rettig and Nantucket versions do not disallow
*         repeating sound codes for adjacent characters.
*         Rettig points this out in his examples using
*         "Rettig" (R320) and "Reddick" (R322). Variation
*         1 of this function would return R320 for both
*         these words. This variation differs slightly from
*         that described by Knuth since it disallows the
*         repetition of a numeric code in the second position
*         if it is the same as that of the first letter.
*         This means that a name entered as "Schutts" will
*         have the same sound code as "Shutts".
*
*                  
*     2 - first letter of the input string is returned             
*         as the last character of the return code.
*         you might find this a very valuable variation
*         for use with indexes.
*                  
*     3 - return code is all numeric -- no alpha characters.
*         this variation can be very helpful when one is
*         dealing with words that begin with 'P' or 'B' or 'V'.
*
*     4 - first letter of input string is returned as
*         first character of the return code followed
*         by the numeric codes for entire string. 
*
*     5 - additional checking is done for 'silent pair' consonants
*         and a conversion to 'F' is done for the letter pair 'PH'.
*         Otherwise, this is much the same as variation 4. This
*         variation will allow embedded zeroes since they are
*         significant when part of a pair of consonants where the
*         leading consonant is silent. This variation might prove
*         to be the most valuable.
*
****************************************************************************/
#include "nandef.h"
#include "extend.h"

#define UPPER        ('a' - 'A')

 
#define isalpha(p) ((('A'<=(p)&&(p)<= 'Z')||('a'<= (p)&&(p)<='z'))\
                                                     ? TRUE : FALSE) 
#define isdigit(p) (('0'<=(p)&&(p)<='9') ? TRUE : FALSE)

#define isspace(p) ((((p) == ' ')||((p) == '\n')||((p) == '\t')||\
                   ((p) == '\f')||((p) == '\r')) ? TRUE : FALSE)

#define islower(p) ( 'a' <= (p) && (p) <= 'z' )

#define toupper(p) ( islower(p) ? ((p) - UPPER) : ((p)) )


void soundex4()                    /* get soundex code for a string */
                                      /* with variations allowed       */
{
   int silent_pr();
   char *in_str;
   static char soundexbuf[7];         /* set up the return string buffer */

               /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
               /* :::::::::::::::::::::::::: */
   char *table = "01230120022455012623010202";

   char *sdx = soundexbuf, lastcode = ' '; /* initialize the return     */
                                           /* string pointer and the    */
                                           /* ascii numeric code holder */

   char first_ltr = ' ';                   /* initialize first letter   */
                                           /* holder                    */   

   register int count = 0;          /* initialize the counter for the   */
   register int ch = 0;
   int max_count = 0;               /* number of numeric codes to be    */
   int variation = 0;               /* allowed in the return string     */ 
   int cp = 0;
                                    /* and the number for the maximum   */
                                    /* number of numeric codes to be    */
                                    /* allowed and the variation type   */
                                    /* of code to be returned           */
    int num_params = 0;
    int valid_str = FALSE;

    num_params = PCOUNT;
    variation = ( num_params > 1 ? _parni(2) : 1 );
    max_count = ( num_params > 2 ? _parni(3) : 3 );
    variation = ( ((variation > 0) && (variation < 6)) ? variation : 1);
    if (num_params >0 && ISCHAR(1) )/* if there is at least one parameter  */
    {                               /* and it is a char variable           */                           
        in_str = _parc(1);          /* assign the address to in_str        */  
        
        valid_str = ( isalpha(*in_str) ? TRUE : FALSE );

        ch = toupper( *in_str );
        if(( num_params == 1 )||( variation == 1 )|| (variation == 4 )|| (variation == 5 ))
        {
        if (variation == 5)
         {
            cp = ( silent_pr(in_str) );
            if( cp )
            {
            ch = cp;
            in_str++;
            }
            {
            if ( (ch == 'P') && (toupper(*(in_str+1)) == 'H'))
               ch = 'F';
               in_str++;            /* skip the 'H'                        */
            }
         }
        *sdx++ = ch;                /* put the first letter in the code    */
        }                            /* making sure it is upper case       */
        else
        {                           /* otherwise save it for possible use  */
                                    /* later                               */ 
        first_ltr = ch ;
        }

        lastcode = *(table + ch - 'A'); 

                                    /* save the ascii numeric code         */
                                    /* so that we can compare it to the    */
                                    /* code for the next character         */

        if(( variation > 1 ))
          *sdx++ = lastcode;

    }
    else                                     
    {
         _retc("ERROR");               /* return an error */      
         return;                       /* return to program */     
    }

   max_count = ( ((max_count <= 6) && (max_count >= 1)) ? max_count : 3 );   
   max_count = ( variation >= 4 && valid_str ? --max_count : max_count);

   while ((ch = toupper(*in_str)) && (count < max_count) && valid_str )
      {
      if (isalpha(ch) && ( ch != lastcode)) 
         {
         *sdx = *(table + ch - 'A');
                                 /* get the ascii numeric code         */ 

         if ((*sdx != '0') && (*sdx != lastcode))

        /* if the ascii numeric code is not zero(indicating that it is */
        /* valid) and if the code is not the same as that of the       */
        /* previous input character                                    */
            {
            if (variation == 5) 
               {
               cp = silent_pr(in_str);
               if ( cp )
                  {
                  ch = cp;         /* discard leading consonants of */
                                   /* silent pairs by incrementing  */ 
                                   /* the input string pointer      */

                  *sdx = *(table + ch - 'A');
                  in_str++;        
                  }
               if(( ch  == 'P') && toupper(* (in_str+1)) == 'H')
                  *sdx = '1';   /* numeric code for 'F'       */
               }
                                /* save the numeric code      */
            lastcode = *sdx++;  /* and increment the counter  */
                                /* as well as the output      */
            count++;            /* string pointer             */
                                
            
            }
            else                /*     otherwise...           */        
            lastcode = *sdx;    /* just save the numeric code */  
                                                                 
         }
         
      in_str++;             /* increment the input string pointer */
      
      }
      if( valid_str )        /* if we had a valid input string     */
      {
      while ( count++ < max_count ) 
         /* fill remaining positions with zeros */
         {
         *sdx++ = '0';
         }
      
         if( variation == 2 )
           *( sdx-1 ) = first_ltr;
      }
      else
         {
         sdx = soundexbuf;
         for ( ch = 0; ch < max_count; ch++)
            *sdx++ = ' ';
         }

   *sdx = '\0';                 /* add the terminating null */
   _retc(soundexbuf);	        /* to return string         */
   return;                      /* and return to caller     */
}

silent_pr(str_)
   char *str_;
   {
                                    
  /*
   * The first letter of the following consonant pairs is silent.
   * This function will return either the second consonant of the
   * the pair, or 0 if character is not part of a pair.
   */
   static char	*sil_cons_pairs = "TSTZGHKNPNPTPFPK";
	register char	*sil_cons;
   register int   c;
	int		ret_val;
	int		next;	/* Next character in the string		*/

   c = toupper( *str_ ) ;
   next = toupper(*( str_ + 1 ));
   ret_val = 0;

		/*
		 * Ignore the first letter in a silent-pair.
		 */
		for (sil_cons = sil_cons_pairs; *sil_cons != 0; sil_cons++)
      {
		    if (*sil_cons++ == c && *sil_cons == next)
          {
			 ret_val = *sil_cons;
          break;
		    }
      }
      return(ret_val);
   }
