(*
This program demonstrates the use of the assembly language implemention
of the Shell-Metzner sort algorithm.  The shell sort is ideally suited
for sorting Pascal data structures for three reasons:

           1. It is much(!) faster than a bubble sort
           2. Unlike a quick sort, it is even faster if
              the data are partially ordered.
           3. It is relatively simple to implement in
              8086/8088 assembly language.

Sort times will depend primarily upon three main factors:

           1. Length of the key sort field.
           2. Size of the record structure.
           3. Number of records in the structure.

This routine has been developed so that a user may use it to sort any (well,
almost any!) size array of Turbo Pascal records.  The records can be of any
desirable structure but the key field must be a string, char array, or byte
type.  Integers are stored internally with the LSB first, so this routine
will not properly sort on an integer field.  The routine is modifiable,
however, and may be adapted to sort on integers or even reals.  If it is
desired to use a string type as a key field, two things are important to
note.  First, initialize the array with zeros before filling the array so
that the unused field slots are all the same for a proper comparison.  The
Turbo FILLCHAR(A,SIZEOF(X),0) procedure is best for this.  Second, be sure
to increment the offset of the key field by one to set the pointers at the
first character of the string and not at the string length byte.  This
program illustrates how string fields are properly set up and sorted.

The routine uses the parametric values of the key field location and length
and the record size to compare fields in accordance with the shell algorithm
and then exchange records based upon the comparison.  It might be speeded up
a hair with more efficient register utilization, but I doubt it.  If anyone
does speed it up significantly, I'd appreciate knowing about it.  By the
way, the times below were derived on a Leading Edge Model "M" running at
8 Mhz and are accordingly less than will be realized on a stock PC.

For those desiring to implement this routine in Turbo inline code, I strongly
suggest you get a copy of David Baldwin's outstanding(!!!) inline assembler
(located in DL1) and modify the MASM code in the routine to assemble to
inline code (but get rid of the underscores, Baldwin's assembler chokes on
them).

COPYRIGHT (C) 1986 by John J. Newlin.  The 8086/8088 assembly code and
Turbo Pascal code supplied here is intended for the private use of those
acquiring it.  It may be freely copied and distributed but it may not be
utilized in any IBM PC software marketed for profit.  Direct questions,
comments, or complaints to me at 71535,665 on CIS.


=========================SHELL SORT ROUTINE================================

;Assemble to SHELSORT.EXE, then use EXE2BIN to convert to .COM file
;declare in TURBO PASCAL source file as below

;procedure shellsort(len,field,entries,size:integer; var struc);

;len     = the length of the key (sort) field
;field   = offset of the field within the record (add 1 for string fields)
;entries = number of records in the array
;struc   = the declared name of the array

code         segment
             assume cs:code

;use equates to keep things straight

STRUC        equ [bp+4]
SIZE         equ [bp+8]
ENTRIES      equ [bp+10]
FIELD        equ [bp+12]
LEN          equ [bp+14]
N            equ [bp-2]
JUMP         equ [bp-4]
N_JUMP       equ [bp-6]


sort:        push bp                ;save bp
             mov bp,sp              ;reference the stack with bp
             sub sp,10              ;make some working space for local vars
             push ds                ;preserve ds
             push es                ;and es as well (although not necessary)
             les di,STRUC           ;load es with struc seg - di with struc ofs
             lds si,STRUC           ;same with ds
             jmp sortem             ;goto main body

compare:     push si                ;save the pointers
             push di
             push cx                ;save the counter
             mov cx,LEN             ;no of bytes to scan
             add si,word ptr FIELD  ;bump si by key field length
             add di,word ptr FIELD  ;bump di by key field length
             repz cmpsb             ;compare em!
             pop cx                 ;flag will be set accordingly
             pop di                 ;restore regs
             pop si
             ret                    ;and return

swap:        push si                ;save the pointers
             push di
             push cx                ;save the counter
             push ax                ;will use ax, so save it
             cld                    ;move is forward
again1:      mov al,byte ptr[di]    ;save one byte
             movsb                  ;move one bye
             mov byte ptr es:[si-1],al ;move saved byte
             loop again1            ;continue for length of record
             pop ax                 ;restore regs
             pop cx
             pop di
             pop si
             ret                    ;and return

sortem:      mov cx,ENTRIES         ;no. of entries
             mov dx,SIZE            ;size of record
             mov N,cx               ;store N
             mov JUMP,cx            ;store JUMP (JUMP = N)
             dec word ptr N         ;N = N - 1

loop1:       cmp word ptr JUMP,1    ;is JUMP > 1?
             jbe exit               ;no - sort complete
             shr word ptr JUMP,1    ;JUMP = JUMP DIV 2

loop2:       mov bl,1               ;set DONE = TRUE
             mov ax,N               ;get N
             sub ax,word ptr JUMP   ;compute N - JUMP
             mov N_JUMP,ax          ;store N - JUMP
             mov cx,0
                                    ;for J = 1 to N - JUMP DO
loop3:       push si                ;save pointer to record
             push di                ;save pointer to record
             mov ax,SIZE            ;get rec size
             mul cx                 ;multipy by J
             add si,ax              ;j = si, so a[j] = a[si]
             mov ax,SIZE            ;get rec size
             mul word ptr JUMP      ;multiply by JUMP
             add ax,si              ;offset from si (j)
             mov di,ax              ;i = di, so a[i] = a[di]
             call compare           ;compare fields
             jbe no_swap            ;no swap
             push cx                ;save loop counter
             mov cx,SIZE            ;SWAP needs size of record
             call swap              ;do it!
             pop cx                 ;restore loop counter
             mov bl,0               ;set DONE = FALSE
no_swap:     cmp cx,word ptr N_JUMP ;is cx = N - JUMP?
             pop di                 ;restore pointer
             pop si                 ;restore pointer
             inc cx                 ;bump the counter
             jb loop3               ;if cycle not complete, go again
             cmp bl,0               ;is DONE = FALSE
             je loop2               ;no, another cycle
             jmp loop1              ;keep going until sort is complete
exit:        pop es                 ;restore es reg
             pop ds                 ;restore ds reg
             mov sp,bp              ;restore original sp
             pop bp                 ;restore original bp
             ret 12                 ;clean up stack for TURBO
code         ends
             end sort



=====================SORT DEMONSTRATION PROGRAM=============================
*)

{$U+}
const
  recs = 15;  {CHANGE THIS VALUE AS DESIRED}
(*  SORT PERFORMANCE USING A 12 BYTE KEY FIELD IN A 24 BYTE RECORD

    NO. RECS                 BUBBLE           SHELL
    --------                ---------        --------
      50                     00:00.50        00:00.17
     100                     00:01.00        00:00.49
     150                     00:04.00        00:00.77
     200                     00:05.00        00:01.16
     250                     00:08.00        00:01.59
     300                     00:12.00        00:01.86
     350                     00:17.00        00:02.09
     400                     00:22.00        00:03.18
     500                     00:33.00        00:03.79
     750                     01:15.00        00:06.00
    1000                     02:15.00        00:08.79

Note: the bubble sort used for this test was also an assembly language
      routine.
*)

type
   Regtype     = record Ax,Bx,Cx,Dx,Bp,Si,Di,Ds,Es,Flags:integer  end;
   a_type = string[12];
   x_type = record
              a : integer;
              b : a_type;
              c : integer;
              d : byte;
              e : array[1..6] of byte;
             end;
   str12 = string[8];

var
  rgs : regtype;
  x : x_type;
  i,j,n : integer;
  temp : a_type;
  k,ch : char;
  a : array[1..recs] of x_type;

function time : str12;
var
  m,h,x,s,timestr : str12;
  i : integer;
begin
  rgs.ax := $2C00;
  msdos(rgs);
  str(lo(rgs.cx):2,m);
  str(hi(rgs.dx):2,s);
  str(lo(rgs.dx):2,h);
  timestr := m + ':' + s + '.' + h;
  for i := 1 to 8 do if timestr[i] = #32 then timestr[i] := '0';
  time := timestr;
end;

procedure shellsort(len,field,entries,size:integer; var struc);
external 'shelsort.bin';

begin
  fillchar(a,sizeof(a),0);
  for i := 1 to recs do
    begin
      n := random(11) + 1;
      temp[0] := chr(n);
      for j := 1 to n do temp[j] := chr(random(26) + 65);
      a[i].b := temp;
      a[i].a := i;
    end;
  writeln('STARTED SORT',^g);
  writeln(time);
  shellsort(12,3,recs,sizeof(x_type),a);
  writeln(time);
  writeln('ENDED SORT',^g);
  for i := 1 to 15 do  writeln(a[i].b); {REMOVE, IF DESIRED, FOR LONGER SORTS}
end.
