/* unjumble.c - try to make words from a jumbled string of letters.

                                Ron Charlton
                             9002 Balcor Circle
                             Knoxville, TN 37923
   
                             Phone: (615)694-0800
   
                              PLINK: R*CHARLTON
                           BINTNET: charltr@utkvx1

                                 05-Jul-90

This program is in the public domain.  It may be used for any purpose.

Algorithm:  Generate all of the permutations of the letters in a string
and test each one for its "trigram weight".  (A trigram is a group of three
letters, e.g., AAA, EWE, QQQ.)	Use a table of trigram weights (frequencies)
derived from a 38,500 word dictionary.	Calculate an n-letter string's
trigram weight as the sum of the weights of its (n - 2) trigrams.  A
trigram's weight is the log base 2 of its frequency of occurrence in
the dictionary.

Keep a table of the twenty highest weights while avoiding duplicates due to
multiple occurrences of a single letter.  Print out only the non-zero
entries in the table.  This all works because some trigrams (such as "ING")
are more likely than others (such as "QQQ").

The trigram data are stored on disk as a nybble_table and a bit_table for
compactness, then converted to an array (nn[][][]) for execution speed.
The string to be unjumbled is adjusted by making 'A' 1, 'B' 2, ... 'Z' 26
so that the characters can be used as indices into nn[][][].
This allows nn[][][] to be as small as possible (leaving '\0' available as
the end-of-string indicator).


history:
v1.3 - don't include any 'word' with any individual trigram weight of 0
v1.4 - general cleanup (& change to strupr)
v1.5 - added exit() from main() & editorial comments & misc. tweakings
v1.6 - changed to structure for table of words and weights
v1.7 - changed do{}while() to for() in print_table(); READ_NYBBLE(); test_bit
v1.8 - TEST_BIT() as macro; removed unused variables in main()
v1.9 - rearranged main(), changed function names, modified intro(), strlwr()
v1.10  removed strlwr() definition
v1.11  changed temp variable to static in insert()
v1.12  eliminated second strcpy() in permute() (faster!!)
v1.13  allow 8-letter words, add 'nothing found'
v1.14  adjust MAXLEN, restored strlwr() definition, conbined printf's in intro()
*/

char version[] = "v1.14";

#include <stdio.h>
#include <ctype.h>

#define CSI 0x9b
#define CLS 0x0c

#define FALSE 0
#define TRUE (!FALSE)

#define MAPVALUE ('a'-1)        /* 'A' for uppercase, 'a' for lowercase */

#define MAXLEN 10		/* max. length of string to unjumble */
#define TABLESIZE 20		/* size of table of guesses */

/* read a nybble from a table of nybbles (32-bit longs in table) */
#define READ_NYBBLE( nyb_number ) \
( (nybble_table [ nyb_number >> 3 ] >> ((nyb_number & 7L) << 2)) & 0xFL )

/* test bit n in table of 32-bit ints */
#define TEST_BIT( n ) \
	( bit_table [ n >> 5 ] & ( 1L << (n & 0x1FL) ) ? 1 : 0 )

/* the follow two lines are associated with tables.c */
extern int bit_table_size, nybble_table_size;
extern unsigned long bit_table[], nybble_table[];

/* structure array to hold candidate words and their weights */
struct {
    char word [ MAXLEN + 1 ];
    int weight;
       }  table [ TABLESIZE ];

unsigned char nn[27][27][27];	/* array of probabilities of trigrams */
int min_weight; 		/* used in insert() for minimum weight */
int min_weight_ptr;		/* used in insert() to point at minimum */

char *strlwr();


/*------------------------------------------------------------------------*/

main()
{
char buf [ 255+1 ];
int length;

intro();

get_weights();			     /* read weights into nn[][][] */

for (;;)
  {
  init_table();
  printf ( "\tEnter 3 to 8 letters ( <RETURN> to quit ): ");

  if ( ! gets ( buf ) )
    break;

  length = strlen ( buf );

  if ( length == 0 )
    break;

  else if ( !all_letters ( buf ) )
    printf ( "\tYou may only use letters (a-z; A-Z).\n\n" );

  else if ( length > 8 )
    printf ( "\tToo many letters.\n\n" );

  else if ( length < 3 )
    printf ( "\tToo few letters.\n\n" );

  else
    {
    /* do the unjumbling */
    strlwr ( buf );
    map ( buf );			/* convert to array indices */
    permute ( buf, 0, length ); 	/* do all permutations */
    print_table();			/* display the results */
    }
  }
  exit ( 0 );
}

/*------------------------------------------------------------------------*/

intro()
{
#ifdef MCH_AMIGA
printf ( "%c", CLS );                   /* clear screen */
printf ( "%c0;33;40m", CSI );           /* color 3,0 */
#endif
printf ( "\n\t\t\t      Unjumble %s\n\n"
	   "\t\t\t           by\n\n"
	   "\t\t\t      Ron Charlton\n"
	   "\t\t\t   9002 Balcor Circle\n"
	   "\t\t\t   Knoxville, TN 37923\n"
	   "\t\t\t    Plink: R*CHARLTON\n"
	   "\t\t\t  BITNET: charltr@utkvx1\n"
	   "\t\t    Internet: charltr@utkvx1.utk.edu\n\n", version );
#ifdef MCH_AMIGA
printf ( "%c0;31;40m", CSI );           /* color 1,0 */
#endif

printf ( "\tUnjumble tries to create words out of a group of letters\n"
	 "\tthat you enter.  It will print out up to twenty guesses in\n"
	 "\torder with each guess' weight before it.  Three to 8 letters at\n"
	 "\ta time are accepted.  Eight letters requires several seconds of \n"
	 "\tcalculation.  More letters are harder to unjumble.\n\n" );
}
/*------------------------------------------------------------------------*/

/* test for all letters in a string */
all_letters ( str )
  char *str;
  {
  while ( isalpha ( *str++ ) )
      ;
  return ( ! *--str );	/* pointing at '\0' if all alpha */
  }

/*------------------------------------------------------------------------*/

/* convert letters to array indices (A-->1, B-->2 ... Z-->26) */
map ( str )
  char *str;
  {
  while ( * str )
    * str++ -= MAPVALUE;
  }

/*------------------------------------------------------------------------*/

/* find sum of weights of trigrams in a string. */
weigh ( str )
  char *str;
  {
  register int weight = 0, len, pos, triweight;
  if ( (len = strlen ( str ) ) >= 3 )
    for ( pos = 0; pos <= len - 3; pos++)
      {
      triweight = nn [ str [pos] ] [ str [pos+1] ] [ str [pos+2] ];
      if ( triweight > 0 )
	  weight += triweight;
      else
	  {
	  /* eliminate those with any trigram of 0 weight */
	  weight = 0;
	  break;
	  }
      }
  return ( weight );
  }

/*------------------------------------------------------------------------*/

/* find all permutations of a string */
permute ( a, k, n )
char *a;
int k, n;
{
int i;
char b [ MAXLEN ];
static char temp;
if (k == n)
    insert ( a );			/* insert into table if weighty */
else
    {
    strcpy(b, a);
    for (i = k; i < n; i++)
	{
	temp = b [ k ];
	b [ k ] = b [ i ];
	b [ i ] = temp;
	permute ( b, k+1, n );
	}
    }
}

/*------------------------------------------------------------------------*/

/* convert array indices to alphabetic 1-->A, 2-->B ... 26-->Z */
unmap ( str )
  char *str;
  {
  while ( * str )
    *str++ += MAPVALUE;
  }

/*------------------------------------------------------------------------*/

/* initialize the guess table to empty */
init_table()
  {
  int i;
  min_weight = 0;
  min_weight_ptr = 0;
  for ( i = 0; i < TABLESIZE; i++)
    {
    table[i].weight = 0;
    table[i].word[0] = '\0';
    }
  }

/*------------------------------------------------------------------------*/

/* if this is a weighty word, insert it in the table */
insert ( str )
  char *str;
  {
  register int weight, i;
  
  weight = weigh ( str );
  if ( weight > min_weight ) /* does this word weigh more than table min.? */
    {
    for ( i = 0; i < TABLESIZE; i++ ) /* avoid duplicates (BIGgER BIgGER) */
      if ( ! strcmp ( table[i].word, str ) )
	return; 		  /* it's already in the table */

    /* put this one in table in place of old minimum */
    table [ min_weight_ptr ].weight = weight;
    strcpy ( table [ min_weight_ptr ].word, str );

    /* search for new minimum in the table */
    min_weight_ptr = 0;
    min_weight = table [ 0 ].weight;
    for ( i = 1; i < TABLESIZE; i++ )
      if ( table [ i ].weight < min_weight )
	{
	min_weight_ptr = i;
	min_weight = table [ i ].weight;
	}
    }
  }

/*------------------------------------------------------------------------*/

/* print the non-zero table entries in descending order. */
/* this is a bogus, but adequate, sort */
print_table()
  {
  int max_entry, i, first_pass = TRUE;

  for(;;)
    {
    max_entry = 0;			/* point at maximum weight */

    for ( i = 1; i < TABLESIZE; i++ )
      if ( table [ i ].weight > table [ max_entry ].weight )
	max_entry = i;

    if ( first_pass && table [ max_entry ].weight == 0 )
	{
	printf ( "\t\tNothing found.\n" );
	break;
	}

    if ( table [ max_entry ].weight )
      {
      unmap ( table [ max_entry ].word );
      printf("\t\t%5d %s\n", table [ max_entry ].weight,
		table [ max_entry ].word );
      table [ max_entry ].weight = 0;	/* 'delete' from table */
      }
    else
      break;

    first_pass = FALSE;
    }
}

/*------------------------------------------------------------------------*/

/* get trigram weights from nybble_table and bit_table into nn[][][] */
get_weights()
  {
  register int i, j, k, m = 0, ptr = 0;
  for ( i = 1; i <= 26; i++ )
    for ( j = 1; j <= 26; j++ )
      for ( k = 1; k <= 26; k++ )
	{
	/* beware of ++x side effects */
	if ( TEST_BIT ( m ) )
	  {
	  nn [ i ] [ j ] [ k ] = READ_NYBBLE ( ptr );
	  ++ptr;
	  }
	++m;
	}
  }

/*------------------------------------------------------------------------*/
/*
 * Convert a string to lower case in place and return a pointer
 * to the resulting string.
 */

char *
strlwr ( char *str )
    {
    register char *s = str;
    while ( *s )
	{
	if (*s >= 'A' && *s <= 'Z') 
		*s = *s - 'A' + 'a';
	++s;
	}

    return ( str );
    }

/*-------------------------- END OF FILE ---------------------------------*/
