/*

Title:    LGIHPWD
Author:   Shawn A. Clifford (sysop@robot.nuceng.ufl.edu)
Date:     19-FEB-1993
Purpose:  Portable C version of the last 3 encryption methods for DEC's
          password hashing algorithms.

Usage:    status = lgihpwd(&out_buf,&pwd_buf,encrypt,salt,&unam_buf);

          int lgihpwd(dscdescriptor *output_hash,
                      dscdescriptor *password,
                      int encrypt,
                      word salt,
                      dscdescriptor *username)

          Where:
                      output_hash = 8 byte output buffer descriptor
                         password = n character password string descriptor
                          encrypt = 1 byte; value determines algorithm to use
                                    0 -> CRC algorithm   (not supported)
                                    1 -> Purdy algorithm
                                    2 -> Purdy_V
                                    3 -> Purdy_S (Hickory algorithm)
                             salt = 2 byte random number
                         username = up to 31 character username descriptor
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mytypes.h"
#include "hpwd.h"

/* 2^64 - 59 is the biggest quadword prime */

#define A 59
#define P_D_LOW  (DWORD_MAX-A+1UL)
#define P_D_HIGH DWORD_MAX

/* These exponents are prime, but this is      */
/* not required by the algorithm               */
/* N0 = 2^24 - 3;  N1 = 2^24 - 63;  N0-N1 = 60 */

#define N0 0xFFFFFDUL
#define N1 0xFFFFC1UL

#define MASK 0x7  /* mask for COLLAPSE_R2 */

/************
   PQMOD_R0
 ************/

/*
   U : output buffer (quadword)

   This routine replaces the quadword U with U MOD P, where P is of the form
   P = 2^64 - a                    (RELATIVELY GLOBAL a = 59)
     = FFFFFFFF.FFFFFFFF - 3B + 1  (DWORD_MAX = FFFFFFFF = 4,294,967,295)
     = FFFFFFFF.FFFFFFC5

   Method:  Since P is very nearly the maximum integer you can specify in a
   quadword (ie. P = FFFFFFFFFFFFFFC5, there will be only 58 choices for
   U that are larger than P (ie. U MOD P > 0).  So we check the high longword
   in the quadword and see if all its bits are set (-1).  If not, then U can
   not possibly be larger than P, and U MOD P = U.  If U is larger than
   DWORD_MAX - 59,then U MOD P is the differential between (DWORD_MAX - 59)
   and U, else U MOD P = U. If U equals P, then U MOD P = 0 = (P + 59).

   Optimized by Mario Ambrogetti

*/

#define PQMOD_R0(U) if ((U).d_high==P_D_HIGH && (U).d_low>=P_D_LOW) {  \
                      (U).d_low+=A;(U).d_high=0UL;}

/** Function prototypes **/

#ifdef USE386

  #define NO_EMULQ
  #define NO_PQADD_R0

  #ifdef __WATCOMC__

    #define USE_QROL1

    #pragma aux QROL1 = 0xD1 0x00       /* rol dword ptr [eax],1   */  \
                        0xD1 0x40 0x04  /* rol dword ptr [eax+4],1 */  \
                        parm [eax];

    #pragma aux EMULQ = 0xF7 0xE2        /* mul  edx         */  \
                        0x89 0x03        /* mov  [ebx],eax   */  \
                        0x89 0x53 0x04   /* mov  [ebx+4],edx */  \
                        parm [eax] [edx] [ebx] modify [eax edx];

    #pragma aux PQADD_R0 = 0x8B 0x03        /* mov  eax,[ebx]   */  \
                           0x8B 0x53 0x04   /* mov  edx,[ebx+4] */  \
                           0x03 0x01	    /* add  eax,[ecx]   */  \
                           0x13 0x51 0x04   /* adc  edx,[ecx+4] */  \
                           0x73 0x06	    /* jnc  NC          */  \
                           0x83 0xC0 A      /* add  eax,A       */  \
                           0x83 0xD2 0x00   /* adc  edx,0       */  \
                           0x89 0x06        /* NC:mov [esi],eax */  \
                           0x89 0x56 0x04   /* mov  [esi+4],edx */  \
                           parm [ebx] [ecx] [esi] modify [eax edx];

  #elif defined(__BORLANDC__)

    #define USE_EMULQ_386
    #define USE_PQADD_R0_386

    #define EMULQ    emulq_386
    #define PQADD_R0 pqadd_r0_386

    static void emulq_386(dword,dword,qword *);
    static void pqadd_r0_386(qword *,qword *,qword *);

  #else

    #error Error USE386

  #endif

#else

  #define EMULQ    emulq
  #define PQADD_R0 pqadd_r0

  static void emulq(dword,dword,qword *);           /* (a * b)       */
  static void pqadd_r0(qword *,qword *,qword *);    /* (U + Y) MOD P */

#endif

static void COLLAPSE_R2(dscdescriptor *,char *,int);
static void Purdy(qword *);





static void PQEXP_R3 (qword *U,
                      unsigned long n,
                      qword *result);            /* U^n MOD P     */
static void PQMUL_R2 (qword *U,
                      qword *Y,
                      qword *result);            /* U * Y MOD P   */
static void PQLSH_R0 (qword *U,
                      qword *result);            /* 2^32*U MOD P  */

/** RELATIVELY GLOBAL variables **/

/*
 *   The following table of coefficients is used by the Purdy polynmial
 *   algorithm.  They are prime, but the algorithm does not require this.
 */

static qword C1={0xFFFFFFADUL,0xFFFFFFFFUL};
static qword C2={0xFFFFFF4DUL,0xFFFFFFFFUL};
static qword C3={0xFFFFFEFFUL,0xFFFFFFFFUL};
static qword C4={0xFFFFFEBDUL,0xFFFFFFFFUL};
static qword C5={0xFFFFFE95UL,0xFFFFFFFFUL};

/** LGIHPWD entry point **/

/*  Optimized by Mario Ambrogetti */

void lgihpwd(dscdescriptor *output_hash,
             dscdescriptor *password,
             int encrypt,
             word salt,
             dscdescriptor *username)
{
  static qword   *U;          /* Points to start of output buffer   */
  static qword   *r4;         /* Address of the output buffer       */
  static int      r7;         /* Flag for encryption method # 3     */
  char   uname[13];

  /* Setup pointer references */

  U=r4=(qword *)output_hash->dsca_pointer;
  r7=(encrypt==3);

  /* Clear the output buffer (zero the quadword) */

  U->d_low=U->d_high=0UL;

  switch (encrypt) {

    /* CRC algorithm with Autodin II poly */

    case UAIC_AD_II:

      /* As yet unsupported */

      return;

    /* Purdy algorithm */

    case UAIC_PURDY:

      /* Use a blank padded username */

      strcpy(uname,"            ");
      strncpy(uname,username->dsca_pointer,username->dscw_length);
      username->dsca_pointer=uname;
      username->dscw_length=12;

      break;

    /* Purdy with blanks stripped               */
    /* Hickory algorithm; Purdy_V with rotation */

    case UAIC_PURDY_V:
    case UAIC_PURDY_S:

      /* If Purdy_S:  Bytes 0-1 => plaintext length */

      if (r7)
        U->d_low=password->dscw_length;

    break;

  }  /* switch */

  /* Collapse the password to a quadword U; buffer pointed to by r4 */
  /* password already points to password descriptor                 */

  COLLAPSE_R2(password,(char *)r4,r7);

  /* Add random salt into the middle of U                                     */

  *(word *)(((char *)r4)+3)+=salt;

  /* Collapse the username into the quadword point */
  /* password to the valid username desriptor      */

  COLLAPSE_R2(username,(char *)r4,r7);

  /* Run U through the polynomial mod P */

  Purdy(U);

} /* LGIHPWD */

/***************
   COLLAPSE_R2
 ***************/

/*
   r3 :  input string descriptor
   r4 :  output buffer (quadword)
   r7 :  flag (1) if using Hickory method (encrypt = 3)

   This routine takes a string of bytes (the descriptor for which is pointed
   to by r3) and collapses them into a quadword (pointed to by r4).  It does
   this by cycling around the bytes of the output buffer adding in the bytes
   of the input string.  Additionally, after every 8 characters, each
   longword in the resultant hash is rotated by one bit (PURDY_S only).

   Optimized by Mario Ambrogetti
*/

static void COLLAPSE_R2(dscdescriptor *r3,char *r4,int r7)
{
#ifndef USE_QROL1

  register dword tmp;
  register dword rotate;

#endif

  int r0;
  int r1;
  char *r2;

/* ------------------------------------------------------------------------- */

  r0=r3->dscw_length;  /* Obtain the number of input bytes */

  /* Loop until input string exhausted */

  for (r2=r3->dsca_pointer;r0!=0;r0--) {

    *(r4+(r1=r0 & MASK))+=*r2++;

    /* If Purdy_S and last byte ... */

    if (r7 && (r1==7)) {

#ifdef USE_QROL1

      QROL1((dword *)r4);
#else
      /* Rotate first longword one bit */

      rotate=((tmp=((qword *)r4)->d_low)>>31) & 1;
      ((qword *)r4)->d_low=(tmp<<1) | rotate;

      /* Rotate second longword one bit */

      rotate=((tmp=((qword *)r4)->d_high)>>31) & 1;
      ((qword *)r4)->d_high=(tmp<<1) | rotate;

#endif

    } /* if Purdy_S */

  } /* for loop */

} /* COLLAPSE_R2 */

/*********
   Purdy
 *********/

/*
   U : output buffer (quadword)

   This routine computes  f(U) = p(U) MOD P.  Where P is a prime of the form
   P = 2^64 - a.  The function p is the following polynomial:

                   X^n0 + X^n1*C1 + X^3*C2 + X^2*C3 + X*C4 + C5
                   ^^^^^^^^^^^^^^   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
                       part1                 part2

   Note:  Part1 is calculated by its congruence  (X^(n0-n1) + C1)*X^n1
          finding X^n1, X^(n0-n1), X^(n0-n1)+C1, then
          (X^(n0-n1) + C1)*X^n1  which equals  X^n0 + X^n1*C1

   The input U is an unsigned quadword.
*/

static void Purdy(qword *U)
{
  qword X;                /* The variable X in the polynomial            */
  qword stack;            /* Returned quadword on stack                  */
  qword part1;            /* Collect the polynomial ...                  */
  qword part2;            /* ... in two parts                            */

/* ------------------------------------------------------------------------- */

  /* Ensure U less than P by taking U MOD P  */
  /* Save copy of result:  X = U MOD P       */

  X.d_low =U->d_low;
  X.d_high=U->d_high;

  PQMOD_R0(X);                          /* Make sure U is less than P   */

  PQEXP_R3 (&X, N1, &stack);            /* Calculate X^N1               */

  PQEXP_R3 (&X, (N0-N1), &part1);       /* Calculate X^(N0-N1)          */

  PQADD_R0(&part1,&C1,&part2);          /* X^(n0-n1) + C1               */

  PQMUL_R2 (&stack, &part2, &part1);    /* X^n0 + X^n1*C1               */

  /* Part 1 complete */

  PQMUL_R2(&X,&C2,&stack);              /* X*C2                         */

  PQADD_R0(&stack,&C3,&part2);          /* X*C2 + C3                    */

  PQMUL_R2 (&X, &part2, &stack);        /* X^2*C2 + X*C3                */

  PQADD_R0(&stack,&C4,&part2);          /* X^2*C2 + X*C3 + C4           */

  PQMUL_R2 (&X, &part2, &stack);        /* X^3*C2 + X^2*C3 + X*C4       */

  PQADD_R0(&stack,&C5,&part2);          /* X^3*C2 + X^2*C3 + X*C4 + C5  */

  /* Part 2 complete */


  /* Add in the high order terms.  Replace U with f(x) */

  PQADD_R0(&part1,&part2,U);

} /* Purdy */


/*********
   EMULQ
 *********/

/*
   a, b   : longwords that we want to multiply (quadword result)
   result : the quadword result

   This routine knows how to multiply two unsigned longwords, returning the
   unsigned quadword product.

   Optimized by Mario Ambrogetti
*/

#ifndef NO_EMULQ

static void emulq(dword a,dword b,qword *result)
{
  register dword temp;

  /* c->d_low=a.w_low*b.w_low */

  result->d_low =(dword)(((dword_t *)&a)->w_low)  * (dword)(((dword_t *)&b)->w_low);

  /* c->d_high=a.w_high*b.w_high */

  result->d_high=(dword)(((dword_t *)&a)->w_high) * (dword)(((dword_t *)&b)->w_high);

  /* temp=a.w_high*b.w_low */

  temp=(dword)(((dword_t *)&a)->w_high) * (dword)(((dword_t *)&b)->w_low);

  /* checking for carry out */

  if (~temp<((qword_t *)result)->l_mid)
    ((qword_t *)result)->w3++;

  /* c->>l_mid=c->>l_mid+a.w_high*b.w_low */

  ((qword_t *)result)->l_mid+=temp;

  /* temp=a.w_low*b.w_high */

  temp=(dword)(((dword_t *)&a)->w_low) * (dword)(((dword_t *)&b)->w_high);

  /* checking for carry out */

  if (~temp<((qword_t *)result)->l_mid)
    ((qword_t *)result)->w3++;

  /* c->>l_mid=c->>l_mid+a.w_low*b.w_high */

  ((qword_t *)result)->l_mid+=temp;

} /* EMULQ */

#endif /* NO_EMULQ */

#ifdef USE_EMULQ_386

static void emulq_386(dword a,dword b,qword *c)
{
  asm {

    db 0x66
    mov   ax,word ptr [a]      /* mov eax,dword ptr [a] */

    db 0x66
    mul   word ptr [b]         /* mul dword ptr [b] */

#if __HUGE__ || __LARGE__ || __COMPACT__
    les   bx,[c]

    db 0x66
    mov   es:[bx],ax           /* mov es:[bx],eax */

    db 0x66
    mov   es:[bx+4],dx         /* mov es:[bx+4],edx */
#else
    mov   bx,[c]

    db 0x66
    mov   [bx],ax              /* mov [bx],eax */

    db 0x66
    mov   [bx+4],dx            /* mov [bx+4],edx */
#endif

  }
}

#endif /* USE_EMULQ_386 */

/************
   PQEXP_R3
 ************/

static void PQEXP_R3 (qword *U, unsigned long n,qword *result)
/*
U     : pointer to output buffer (quadword)
n     : unsigned longword (exponent for U)

The routine returns U^n MOD P where P is of the form P = 2^64-a.
U is a quadword, n is an unsigned longword, P is a RELATIVELY GLOBAL quad.

The method comes from Knuth, "The Art of Computer Programing, Vol. 2", section
4.6.3, "Evaluation of Powers."  This algorithm is for calculating U^n with
fewer than (n-1) multiplies.  The result is U^n MOD P only because the
multiplication routine is MOD P.  Knuth's example is from Pingala's Hindu
algorthim in the Chandah-sutra.
*/

{
  qword Y, Z, Z1;     /* Intermediate factors for U */
  dword odd;          /* Test for n odd/even        */

  /* Initialize */

  Y.d_low =1UL;   /* Y = 1 */
  Y.d_high=0UL;

  Z = *U;

  /* Loop until n is zero */

  while (n != 0) {

    /* Test n */

    odd = n & 1;

    /* Halve n */

    n>>=1;

    /* If n was odd, then we need an extra x (U) */

    if (odd)     /* n was odd */ {

      PQMUL_R2 (&Y, &Z, result);

      if (n == 0)
        return;                 /* This is the stopping condition */
                                /* Disregard the exponent, which  */
                                /* would be the final Z^2.        */
      Y = *result;              /* Copy for next pass */
    }

    /* Square Z; this squares the current power for Z */

    Z1.d_low =Z.d_low;
    Z1.d_high=Z.d_high;

    PQMUL_R2 (&Z1, &Z1, &Z);

  }

} /* PQEXP_R3 */



/************
   PQMUL_R2
 ************/

static void PQMUL_R2 (qword *U, qword *Y, qword *result)

/*
U, Y : quadwords we want to multiply

Computes the product U*Y MOD P where P is of the form P = 2^64 - a.
U, Y are quadwords less than P.  The product replaces U and Y on the stack.

The product may be formed as the sum of four longword multiplications
which are scaled by powers of 2^32 by evaluating:

        2^64*v*z + 2^32*(v*y + u*z) + u*y
        ^^^^^^^^   ^^^^^^^^^^^^^^^^   ^^^
        part1      part2 & part3      part4

The result is computed such that division by the modulus P is avoided.

u is the low longword of  U;    u = U.l_low
v is the high longword of U;    v = U.l_high
y is the low longword of  Y;    y = Y.l_low
z is the high longword of Y;    z = Y.l_high
*/

{
  qword stack;
  qword part1;
  qword part2;
  qword part3;

  EMULQ(U->d_high,Y->d_high,&stack);        /* Multiply   v*z      */

  PQMOD_R0(stack);                          /* Get   (v*z) MOD P   */

  /*** 1st term ***/

  PQLSH_R0(&stack, &part1);                 /* Get   2^32*(v*z) MOD P  */

  EMULQ(U->d_high,Y->d_low,&stack);         /* Multiply   v*y      */

  PQMOD_R0(stack);                          /* Get   (v*y) MOD P   */

  EMULQ(U->d_low,Y->d_high,&part2);         /* Multiply   u*z      */

  PQMOD_R0(part2);                          /* Get   (u*z) MOD P   */

  PQADD_R0(&stack,&part2,&part3);           /* Get   (v*y + u*z)   */

  PQADD_R0(&part1,&part3,&stack);           /* Get   2^32*(v*z) + (v*y + u*z) */

  /*** 1st & 2nd terms ***/

  PQLSH_R0(&stack, &part1);       /* Get   2^64*(v*z) + 2^32*(v*y + u*z) */

  EMULQ(U->d_low,Y->d_low,&stack);          /* Multiply   u*y      */

  /*** Last term ***/

  PQMOD_R0(stack);

  PQADD_R0(&part1,&stack,result);           /* Whole thing */

} /* PQMUL_R2 */



/************
   PQLSH_R0
 ************/

static void PQLSH_R0 (qword *U, qword *result)

/*
Computes the product 2^32*U MOD P where P is of the form P = 2^64 - a.
U is a quadword less than P.

This routine is used by PQMUL in the formation of quadword products in
such a way as to avoid division by modulus the P.
The product 2^64*v + 2^32*u is congruent a*v + 2^32*U MOD P.

u is the low longword in U
v is the high longword in U
*/

{
  qword stack;

  /* Get  a*v   */

  EMULQ(U->d_high,A,&stack);

  /* Form  Y = 2^32*u  */

  U->d_high=U->d_low;
  U->d_low =0UL;

  /* Get  U + Y MOD P  */

  PQADD_R0(U,&stack,result);

} /* PQLSH_R0 */


/************
   PQADD_R0
 ************/

/*
   U, Y : quadwords that we want to add


   Computes the sum (U + Y) MOD P where P is of the form P = 2^64 - a.
   U, Y are quadwords less than P.

   Fixed with the help of the code written by Terence Lee (DEC/HKO).

   Optimized by Mario Ambrogetti
*/

#ifndef NO_PQADD_R0

static void pqadd_r0(qword *U,qword *Y,qword *result)
{
  register dword carry;

  /* Add the low longwords */

  result->d_low=U->d_low+Y->d_low;

  /* Add the high longwords, checking for carry out */

  result->d_high=U->d_high+Y->d_high+(carry=(~U->d_low<Y->d_low ));

  /* Check if we have to MOD P the result */

  if ((~U->d_high >= (Y->d_high+carry)) && (Y->d_high != DWORD_MAX))
    return;      /* Outta here? */

  result->d_low +=A;               /* U + Y MOD P */
  result->d_high+=(Y->d_low > ~A);

} /* PQADD_R0 */

#endif /* NO_PQADD_R0 */

#ifdef USE_PQADD_R0_386

static void pqadd_r0_386(qword *U,qword *Y,qword *result)
{
  asm {

    mov bx,[U]

    db 0x66
    mov   ax,[bx]              /* mov eax,[bx] */

    db 0x66
    mov   dx,[bx+4]            /* mov edx,[bx+4] */

    mov   bx,[Y]

    db 0x66
    add   ax,[bx]              /* add eax,[bx] */

    db 0x66
    adc   dx,[bx+4]            /* adc edx,[bx+4] */

    jnc   NoCarry

    db 0x66
    add   ax,A
    dw 0x0000                  /* add eax,A */

    db 0x66
    adc   dx,0x0000            /* adc edx,0 */

  }

NoCarry:

  asm {

    mov   bx,[result]

    db 0x66
    mov   [bx],ax              /* mov [bx],eax */

    db 0x66
    mov   [bx+4],dx            /* mov [bx+4],edx */
  }
}

#endif /* USE_PQADD_R0_386 */
