TITLE fpu087.asm (C) 1989, Mark C. Peterson, CompuServe [70441,3353] SUBTTL All rights reserved. ; ; Code may be used in any program provided the author is credited ; either during program execution or in the documentation. Source ; code may be distributed only in combination with public domain or ; shareware source code. Source code may be modified provided the ; copyright notice and this message is left unchanged and all ; modifications are clearly documented. ; ; I would appreciate a copy of any work which incorporates this code. ; ; Mark C. Peterson ; 405-C Queen St., Suite #181 ; Southington, CT 06489 ; (203) 276-9721 ; ; References: ; The VNR Concise Encyclopedia of Mathematics ; by W. Gellert, H. Hustner, M. Hellwich, and H. Kastner ; Published by Van Nostrand Reinhold Comp, 1975 ; ; 80386/80286 Assembly Language Programming ; by William H. Murray, III and Chris H. Pappas ; Published by Osborne McGraw-Hill, 1986 ; ; History since Fractint 16.3: ; CJLT changed e2x and Log086 algorithms for more speed ; CJLT corrected SinCos086 for -ve angles in 2nd and 4th quadrants ; CJLT speeded up SinCos086 for angles >45 degrees in any quadrant ; (See comments containing the string `CJLT') ; 14 Aug 91 CJLT removed r16Mul - not called from anywhere ; 21 Aug 91 CJLT corrected Table[1] from 6 to b ; improved bx factors in Log086 for more accuracy ; corrected Exp086 overflow detection to 15 from 16 bits. ; ; CJLT= Chris J Lusby Taylor ; 32 Turnpike Road ; Newbury, England (where's that?) ; Contactable via Compuserve user Stan Chelchowski [100016,351] ; or Tel 011 44 635 33270 (home) ; ; ;PROCs in this module: ;FPUcplxmul PROC x:word, y:word, z:word ;FPUcplxdiv PROC x:word, y:word, z:word ;FPUcplxlog PROC x:word, z:word ;FPUsinhcosh PROC x:word, sinh:word, cosh:word ;FPUsincos PROC x:word, sinx:word, cosx:word ;r16Mul PROC uses si di, x1:word, x2:word, y1:word, y2:word ;RegFloat2Fg PROC x1:word, x2:word, Fudge:word ;RegFg2Float PROC x1:word, x2:word, FudgeFact:byte ;ExpFudged PROC uses si, x_low:word, x_high:word, Fudge:word ;LogFudged PROC uses si di, x_low:word, x_high:word, Fudge:word ;LogFloat14 PROC x1:word, x2:word ;RegSftFloat PROC x1:word, x2:word, Shift:byte ;RegDivFloat PROC uses si di, x1:word, x2:word, y1:word, y2:word ;SinCos086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinAddr:WORD, \ ;_e2xLT PROC ;argument in dx.ax (bitshift=16 is hard coded here) ;Exp086 PROC uses si di, LoNum:WORD, HiNum:WORD ;SinhCosh086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinhAddr:WORD, \ ;Log086 PROC uses si di, LoNum:WORD, HiNum:WORD, Fudge:WORD IFDEF ??version MASM51 QUIRKS EMUL ENDIF .model medium, c extrn cos:far extrn _Loaded387sincos:far extrn compiled_by_turboc:word .data extrn cpu:WORD PUBLIC TrigLimit, TrigOverflow PiFg13 dw 6487h InvPiFg33 dd 0a2f9836eh InvPiFg16 dw 517ch Ln2Fg16 dw 0b172h ;ln(2) * 2^16 . True value is b172.18 Ln2Fg15 dw 058b9h ;used by e2xLT. True value is 58b9.0C TrigOverflow dw 0 TrigLimit dd 0 ; ;Table of 2^(n/16) for n=0 to 15. All entries fg15. ;Used by e2xLT ; Table dw 08000h,085abh,08b96h,091c4h dw 09838h,09ef5h,0a5ffh,0ad58h dw 0b505h,0bd09h,0c567h,0ce25h dw 0d745h,0e0cdh,0eac1h,0f525h one dw ? expSign dw ? a dw ? SinNeg dw ? ;CJLT - not now needed by SinCos086, but by CosNeg dw ? ; ArcTan086 Ans dq ? fake_es dw ? ; Windows can't use ES for storage TaylorTerm MACRO LOCAL Ratio add Factorial, one jnc SHORT Ratio rcr Factorial, 1 shr Num, 1 shr one, 1 Ratio: mul Num div Factorial ENDM _4_ dq 4.0 _2_ dq 2.0 _1_ dq 1.0 PointFive dq 0.5 temp dq ? Sign dw ? extrn fpu:word .code FPUcplxmul PROC x:word, y:word, z:word mov bx, x fld QWORD PTR [bx] ; x.x fld QWORD PTR [bx+8] ; x.y, x.x mov bx, y fld QWORD PTR [bx] ; y.x, x.y, x.x fld QWORD PTR [bx+8] ; y.y, y.x, x.y, x.x mov bx, z fld st ; y.y, y.y, y.x, x.y, x.x fmul st, st(3) ; y.y*x.y, y.y. y.x, x.y, x.x fld st(2) ; y.x, y.y*x.y, y.y, y.x, x.y, x.x fmul st, st(5) ; y.x*x.x, y.y*x.y, y.y, y.x, x.y, x.x fsubr ; y.x*x.x - y.y*x.y, y.y, y.x, x.y, x.x fstp QWORD PTR [bx] ; y.y, y.x, x.y, x.x fmulp st(3), st ; y.x, x.y, x.x*y.y fmul ; y.x*x.y, x.x*y.y fadd ; y.x*x.y + x.x*y.y fstp QWORD PTR [bx+8] ret FPUcplxmul ENDP FPUcplxdiv PROC x:word, y:word, z:word mov bx, x fld QWORD PTR [bx] ; x.x fld QWORD PTR [bx+8] ; x.y, x.x mov bx, y fld QWORD PTR [bx] ; y.x, x.y, x.x fld QWORD PTR [bx+8] ; y.y, y.x, x.y, x.x fld st ; y.y, y.y, y.x, x.y, x.x fmul st, st ; y.y*y.y, y.y, y.x, x.y, x.x fld st(2) ; y.x, y.y*y.y, y.y, y.x, x.y, x.x fmul st, st ; y.x*y.x, y.y*y.y, y.y, y.x, x.y, x.x fadd ; mod, y.y, y.x, x.y, x.x fdiv st(1), st ; mod, y.y=y.y/mod, y.x, x.y, x.x fdivp st(2), st ; y.y, y.x=y.x/mod, x.y, x.x mov bx, z fld st ; y.y, y.y, y.x, x.y, x.x fmul st, st(3) ; y.y*x.y, y.y. y.x, x.y, x.x fld st(2) ; y.x, y.y*x.y, y.y, y.x, x.y, x.x fmul st, st(5) ; y.x*x.x, y.y*x.y, y.y, y.x, x.y, x.x fadd ; y.x*x.x - y.y*x.y, y.y, y.x, x.y, x.x fstp QWORD PTR [bx] ; y.y, y.x, x.y, x.x fmulp st(3), st ; y.x, x.y, x.x*y.y fmul ; y.x*x.y, x.x*y.y fsubr ; y.x*x.y + x.x*y.y fstp QWORD PTR [bx+8] ret FPUcplxdiv ENDP FPUcplxlog PROC x:word, z:word LOCAL Status:word mov bx, x fld QWORD PTR [bx+8] ; x.y fld QWORD PTR [bx] ; x.x, x.y mov bx, z fldln2 ; ln2, x.x, x.y fdiv _2_ ; ln2/2, x.x, x.y fld st(2) ; x.y, ln2/2, x.x, x.y fmul st, st ; sqr(x.y), ln2/2, x.x, x.y fld st(2) ; x.x, sqr(x.y), ln2/2, x.x, x.y fmul st, st ; sqr(x.x), sqr(x.y), ln2/2, x.x, x.y fadd ; mod, ln2/2, x.x, x.y fyl2x ; z.x, x.x, x.y fstp QWORD PTR [bx] ; x.x, x.y cmp fpu, 387 jne Restricted fpatan jmp StoreZX Restricted: mov bx, x mov dh, BYTE PTR [bx+7] or dh, dh jns ChkYSign fchs ; |x.x|, x.y ChkYSign: mov dl, BYTE PTR [bx+8+7] or dl, dl jns ChkMagnitudes fxch ; x.y, |x.x| fchs ; |x.y|, |x.x| fxch ; |x.x|, |x.y| ChkMagnitudes: fcom st(1) ; x.x, x.y fstsw Status ; x.x, x.y test Status, 4500h jz XisGTY test Status, 4000h jz XneY fstp st ; x.y fstp st ; fldpi ; Pi fdiv _4_ ; Pi/4 jmp ChkSignZ XneY: fxch ; x.y, x.x fpatan ; Pi/2 - Angle fldpi ; Pi, Pi/2 - Angle fdiv _2_ ; Pi/2, Pi/2 - Angle fsubr ; Angle jmp ChkSignZ XisGTY: fpatan ChkSignZ: or dh, dh js NegX or dl, dl jns StoreZX fchs jmp StoreZX NegX: or dl, dl js QuadIII fldpi fsubr jmp StoreZX QuadIII: fldpi fsubr fchs StoreZX: mov bx, z fstp QWORD PTR [bx+8] ; ret FPUcplxlog ENDP FPUsinhcosh PROC x:word, sinh:word, cosh:word LOCAL Control:word fstcw Control push Control ; Save control word on the stack or Control, 0000110000000000b fldcw Control ; Set control to round towards zero mov Sign, 0 ; Assume the sign is positive mov bx, x fldln2 ; ln(2) fdivr QWORD PTR [bx] ; x/ln(2) cmp BYTE PTR [bx+7], 0 jns DuplicateX fchs ; x = |x| DuplicateX: fld st ; x/ln(2), x/ln(2) frndint ; int = integer(|x|/ln(2)), x/ln(2) fxch ; x/ln(2), int fsub st, st(1) ; rem < 1.0, int fdiv _2_ ; rem/2 < 0.5, int f2xm1 ; (2**rem/2)-1, int fadd _1_ ; 2**rem/2, int fmul st, st ; 2**rem, int fscale ; e**|x|, int fstp st(1) ; e**|x| cmp BYTE PTR [bx+7], 0 jns ExitFexp fdivr _1_ ; e**x ExitFexp: fld st ; e**x, e**x fdivr PointFive ; e**-x/2, e**x fld st ; e**-x/2, e**-x/2, e**x fxch st(2) ; e**x, e**-x/2, e**-x/2 fdiv _2_ ; e**x/2, e**-x/2, e**-x/2 fadd st(2), st ; e**x/2, e**-x/2, cosh(x) fsubr ; sinh(x), cosh(x) mov bx, sinh ; sinh, cosh fstp QWORD PTR [bx] ; cosh mov bx, cosh fstp QWORD PTR [bx] ; pop Control fldcw Control ; Restore control word ret FPUsinhcosh ENDP FPUsincos PROC x:word, sinx:word, cosx:word LOCAL Status:word mov bx, x fld QWORD PTR [bx] ; x cmp fpu, 387 jne Use387FPUsincos call _Loaded387sincos ; cos(x), sin(x) mov bx, cosx fstp QWORD PTR [bx] ; sin(x) mov bx, sinx fstp QWORD PTR [bx] ; ret Use387FPUsincos: sub sp, 8 ; save 'x' on the CPU stack mov bx, sp fstp QWORD PTR [bx] ; FPU stack: call cos add sp, 8 ; take 'cos(x)' off the CPU stack mov bx, ax cmp compiled_by_turboc,0 jne turbo_c1 fld QWORD PTR [bx] ; FPU stack: cos(x) turbo_c1: fld st ; FPU stack: cos(x), cos(x) fmul st, st ; cos(x)**2, cos(x) fsubr _1_ ; sin(x)**2, cos(x) fsqrt ; +/-sin(x), cos(x) mov bx, x fld QWORD PTR [bx] ; x, +/-sin(x), cos(x) fldpi ; Pi, x, +/-sin(x), cos(x) fadd st, st ; 2Pi, x, +/-sin(x), cos(x) fxch ; |x|, 2Pi, +/-sin(x), cos(x) fprem ; Angle, 2Pi, +/-sin(x), cos(x) fstp st(1) ; Angle, +/-sin(x), cos(x) fldpi ; Pi, Angle, +/-sin(x), cos(x) cmp BYTE PTR [bx+7], 0 jns SignAlignedPi fchs ; -Pi, Angle, +/-sin(x), cos(x) SignAlignedPi: fcompp ; +/-sin(x), cos(x) fstsw Status ; +/-sin(x), cos(x) mov ax, Status and ah, 1 jz StoreSinCos ; Angle <= Pi fchs ; sin(x), cos(x) StoreSinCos: mov bx, sinx fstp QWORD PTR [bx] ; cos(x) mov bx, cosx fstp QWORD PTR [bx] ; ret FPUsincos ENDP PUBLIC r16Mul r16Mul PROC uses si di, x1:word, x2:word, y1:word, y2:word mov si, x1 mov bx, x2 mov di, y1 mov cx, y2 xor ax, ax shl bx, 1 jz Exitr16Mult ; Destination is zero rcl ah, 1 shl cx, 1 jnz Chkr16Exp xor bx, bx ; Source is zero xor si, si jmp Exitr16Mult Chkr16Exp: rcl al, 1 xor ah, al ; Resulting sign in ah stc ; Put 'one' bit back into number rcr bl, 1 stc rcr cl, 1 sub ch, 127 ; Determine resulting exponent add bh, ch mov al, bh mov fake_es, ax ; es has the resulting exponent and sign mov ax, di mov al, ah mov ah, cl mov dx, si mov dl, dh mov dh, bl mul dx mov cx, fake_es shl ax, 1 rcl dx, 1 jnc Remr16MulOneBit ; 'One' bit is the next bit over inc cl ; 'One' bit removed with previous shift jmp Afterr16MulNorm Remr16MulOneBit: shl ax, 1 rcl dx, 1 Afterr16MulNorm: mov bl, dh ; Perform remaining 8 bit shift mov dh, dl mov dl, ah mov si, dx mov bh, cl ; Put in the exponent rcr ch, 1 ; Get the sign rcr bx, 1 ; Normalize the result rcr si, 1 Exitr16Mult: mov ax, si mov dx, bx ret r16Mul ENDP PUBLIC RegFloat2Fg RegFloat2Fg PROC x1:word, x2:word, Fudge:word mov ax, WORD PTR x1 mov dx, WORD PTR x2 mov bx, ax or bx, dx jz ExitRegFloat2Fg xor bx, bx mov cx, bx shl ax, 1 rcl dx, 1 rcl bx, 1 ; bx contains the sign xchg cl, dh ; cx contains the exponent stc ; Put in the One bit rcr dl, 1 rcr ax, 1 sub cx, 127 + 23 add cx, Fudge jz ChkFgSign jns ShiftFgLeft neg cx ShiftFgRight: shr dx, 1 rcr ax, 1 loop ShiftFgRight jmp ChkFgSign ShiftFgLeft: shl ax, 1 rcl dx, 1 loop ShiftFgLeft ChkFgSign: or bx, bx jz ExitRegFloat2Fg not ax not dx add ax, 1 adc dx, 0 ExitRegFloat2Fg: ret RegFloat2Fg ENDP PUBLIC RegFg2Float RegFg2Float PROC x1:word, x2:word, FudgeFact:byte mov ax, x1 mov dx, x2 mov cx, ax or cx, dx jz ExitFudgedToRegFloat mov ch, 127 + 32 sub ch, FudgeFact xor cl, cl shl ax, 1 ; Get the sign bit rcl dx, 1 jnc FindOneBit inc cl ; Fudged < 0, convert to postive not ax not dx add ax, 1 adc dx, 0 FindOneBit: shl ax, 1 rcl dx, 1 dec ch jnc FindOneBit dec ch mov al, ah mov ah, dl mov dl, dh mov dh, ch shr cl, 1 ; Put sign bit in rcr dx, 1 rcr ax, 1 ExitFudgedToRegFloat: ret RegFg2Float ENDP PUBLIC ExpFudged ExpFudged PROC uses si, x_low:word, x_high:word, Fudge:word LOCAL exp:WORD xor ax, ax mov WORD PTR Ans, ax mov WORD PTR Ans + 2, ax mov ax, WORD PTR x_low mov dx, WORD PTR x_high or dx, dx js NegativeExp div Ln2Fg16 mov exp, ax or dx, dx jz Raiseexp mov ax, dx mov si, dx mov bx, 1 PosExpLoop: add WORD PTR Ans, ax adc WORD PTR Ans + 2, 0 inc bx mul si mov ax, dx xor dx, dx div bx or ax, ax jnz PosExpLoop Raiseexp: inc WORD PTR Ans + 2 mov ax, WORD PTR Ans mov dx, WORD PTR Ans + 2 mov cx, -16 add cx, Fudge add cx, exp or cx, cx jz ExitExpFudged jns LeftShift neg cx RightShift: shr dx, 1 rcr ax, 1 loop RightShift jmp ExitExpFudged NegativeExp: not ax not dx add ax, 1 adc dx, 0 div Ln2Fg16 neg ax mov exp, ax or dx, dx jz Raiseexp mov ax, dx mov si, dx mov bx, 1 NegExpLoop: sub WORD PTR Ans, ax sbb WORD PTR Ans + 2, 0 inc bx mul si mov ax, dx xor dx, dx div bx or ax, ax jz Raiseexp add WORD PTR Ans, ax adc WORD PTR Ans + 2, 0 inc bx mul si mov ax, dx xor dx, dx div bx or ax, ax jnz NegExpLoop jmp Raiseexp LeftShift: shl ax, 1 rcl dx, 1 loop LeftShift ExitExpFudged: ret ExpFudged ENDP PUBLIC LogFudged LogFudged PROC uses si di, x_low:word, x_high:word, Fudge:word LOCAL exp:WORD xor bx, bx mov cx, 16 sub cx, Fudge mov ax, x_low mov dx, x_high or dx, dx jz ChkLowWord Incexp: shr dx, 1 jz DetermineOper rcr ax, 1 inc cx jmp Incexp ChkLowWord: or ax, ax jnz Decexp jmp ExitLogFudged Decexp: dec cx ; Determine power of two shl ax, 1 jnc Decexp DetermineOper: mov exp, cx mov si, ax ; si =: x + 1 shr si, 1 stc rcr si, 1 mov dx, ax xor ax, ax shr dx, 1 rcr ax, 1 shr dx, 1 rcr ax, 1 ; dx:ax = x - 1 div si mov bx, ax ; ax, Fudged 16, max of 0.3333333 shl ax, 1 ; x = (x - 1) / (x + 1), Fudged 16 mul ax shl ax, 1 rcl dx, 1 mov ax, dx ; dx:ax, Fudged 35, max = 0.1111111 mov si, ax ; si = (ax * ax), Fudged 19 mov ax, bx ; bx is the accumulator, First term is x mul si ; dx:ax, Fudged 35, max of 0.037037 mov fake_es, dx ; Save high word, Fudged (35 - 16) = 19 mov di, 0c000h ; di, 3 Fudged 14 div di ; ax, Fudged (36 - 14) = 21 or ax, ax jz Addexp mov cl, 5 shr ax, cl add bx, ax ; bx, max of 0.345679 ; x = x + x**3/3 mov ax, fake_es ; ax, Fudged 19 mul si ; dx:ax, Fudged 38, max of 0.004115 mov fake_es, dx ; Save high word, Fudged (38 - 16) = 22 mov di, 0a000h ; di, 5 Fudged 13 div di ; ax, Fudged (38 - 13) = 25 or ax, ax jz Addexp mov cl, 9 shr ax, cl add bx, ax ; x = x + x**3/3 + x**5/5 mov ax, fake_es ; ax, Fudged 22 mul si ; dx:ax, Fudged 41, max of 0.0004572 mov di, 0e000h ; di, 7 Fudged 13 div di ; ax, Fudged (41 - 13) = 28 mov cl, 12 shr ax, cl add bx, ax Addexp: shl bx, 1 ; bx *= 2, Fudged 16, max of 0.693147 ; x = 2 * (x + x**3/3 + x**5/5 + x**7/7) mov cx, exp mov ax, Ln2Fg16 ; Answer += exp * Ln2Fg16 or cx, cx js SubFromAns mul cx add ax, bx adc dx, 0 jmp ExitLogFudged SubFromAns: neg cx mul cx xor cx, cx xchg cx, dx xchg bx, ax sub ax, bx sbb dx, cx ExitLogFudged: ; x = 2 * (x + x**3/3 + x**5/5 + x**7/7) + (exp * Ln2Fg16) ret LogFudged ENDP PUBLIC LogFloat14 LogFloat14 PROC x1:word, x2:word mov ax, WORD PTR x1 mov dx, WORD PTR x2 shl ax, 1 rcl dx, 1 xor cx, cx xchg cl, dh stc rcr dl, 1 rcr ax, 1 sub cx, 127 + 23 neg cx push cx push dx push ax call LogFudged add sp, 6 ret LogFloat14 ENDP PUBLIC RegSftFloat RegSftFloat PROC x1:word, x2:word, Shift:byte mov ax, x1 mov dx, x2 shl dx, 1 rcl cl, 1 add dh, Shift shr cl, 1 rcr dx, 1 ret RegSftFloat ENDP PUBLIC RegDivFloat RegDivFloat PROC uses si di, x1:word, x2:word, y1:word, y2:word mov si, x1 mov bx, x2 mov di, y1 mov cx, y2 xor ax, ax shl bx, 1 jnz ChkOtherOp jmp ExitRegDiv ; Destination is zero ChkOtherOp: rcl ah, 1 shl cx, 1 jnz ChkDivExp xor bx, bx ; Source is zero xor si, si jmp ExitRegDiv ChkDivExp: rcl al, 1 xor ah, al ; Resulting sign in ah stc ; Put 'one' bit back into number rcr bl, 1 stc rcr cl, 1 sub ch, 127 ; Determine resulting exponent sub bh, ch mov al, bh mov fake_es, ax ; es has the resulting exponent and sign mov ax, si ; 8 bit shift, bx:si moved to dx:ax mov dh, bl mov dl, ah mov ah, al xor al, al mov bh, cl ; 8 bit shift, cx:di moved to bx:cx mov cx, di mov bl, ch mov ch, cl xor cl, cl shr dx, 1 rcr ax, 1 div bx mov si, dx ; Save (and shift) remainder mov dx, cx ; Save the quess mov cx, ax mul dx ; Mult quess times low word xor di, di sub di, ax ; Determine remainder sbb si, dx mov ax, di mov dx, si jc RemainderNeg xor di, di jmp GetNextDigit RemainderNeg: mov di, 1 ; Flag digit as negative not ax ; Convert remainder to positive not dx add ax, 1 adc dx, 0 GetNextDigit: shr dx, 1 rcr ax, 1 div bx xor bx, bx shl dx, 1 rcl ax, 1 rcl bl, 1 ; Save high bit mov dx, cx ; Retrieve first digit or di, di jz RemoveDivOneBit neg ax ; Digit was negative neg bx dec dx RemoveDivOneBit: add dx, bx mov cx, fake_es shl ax, 1 rcl dx, 1 jc AfterDivNorm dec cl shl ax, 1 rcl dx, 1 AfterDivNorm: mov bl, dh ; Perform remaining 8 bit shift mov dh, dl mov dl, ah mov si, dx mov bh, cl ; Put in the exponent shr ch, 1 ; Get the sign rcr bx, 1 ; Normalize the result rcr si, 1 ExitRegDiv: mov ax, si mov dx, bx ret RegDivFloat ENDP Term equ Num equ Factorial equ Sin equ Cos equ e equ Inve equ SinCos086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinAddr:WORD, \ CosAddr:WORD mov ax, LoNum mov dx, HiNum xor cx, cx ; mov SinNeg, cx ;CJLT - not needed now ; mov CosNeg, cx ;CJLT - not needed now mov a, cx ;CJLT - Not needed by the original code, but it ; is now! or dx, dx jns AnglePositive not ax not dx add ax, 1 adc dx, cx ;conveniently zero mov a,8 ;a now has 4 bits: Sign+quadrant+octant AnglePositive: mov si, ax mov di, dx mul WORD PTR InvPiFg33 mov bx, dx mov ax, di mul WORD PTR InvPiFg33 add bx, ax adc cx, dx mov ax, si mul WORD PTR InvPiFg33 + 2 add bx, ax adc cx, dx mov ax, di mul WORD PTR InvPiFg33 + 2 add ax, cx adc dx, 0 and dx, 3 ;Remove multiples of 2 pi shl dx, 1 ;move over to make space for octant number ; ;CJLT - new code to reduce angle to: 0 <= angle <= 45 ; or ax, ax jns Lessthan45 inc dx ;octant number not ax ;angle=90-angle if it was >45 degrees Lessthan45: add a, dx ;remember octant and Sign in a mov Num, ax ;ax=Term, now ; ; We do the Taylor series with all numbers scaled by pi/2 ; so InvPiFg33 represents one. Truly. ; mov Factorial, WORD PTR InvPiFg33 + 2 mov one, Factorial mov Cos, Factorial ; Cos = 1 mov Sin, Num ; Sin = Num LoopIntSinCos: TaylorTerm ; ax=Term = Num * (Num/2) * (Num/3) * . . . sub Cos, Term ; Cos = 1 - Num*(Num/2) + (Num**4)/4! - . . . cmp Term, WORD PTR TrigLimit jbe SHORT ExitIntSinCos TaylorTerm sub Sin, Term ; Sin = Num - Num*(Num/2)*(Num/3) + (Num**5)/5! - . . . cmp Term, WORD PTR TrigLimit jbe SHORT ExitIntSinCos TaylorTerm add Cos, Term cmp Term, WORD PTR TrigLimit jbe SHORT ExitIntSinCos TaylorTerm ; Term = Num * (x/2) * (x/3) * . . . add Sin, Term cmp Term, WORD PTR TrigLimit jnbe LoopIntSinCos ExitIntSinCos: xor ax, ax mov cx, ax cmp Cos, WORD PTR InvPiFg33 + 2 jb CosDivide ; Cos < 1.0 inc cx ; Cos == 1.0 jmp StoreCos CosDivide: mov dx, Cos div WORD PTR InvPiFg33 + 2 StoreCos: mov Cos, ax ; cx:Cos xor ax, ax mov bx, ax cmp Sin, WORD PTR InvPiFg33 + 2 jb SinDivide ; Sin < 1.0 inc bx ; Sin == 1.0 jmp StoreSin SinDivide: mov dx, Sin div WORD PTR InvPiFg33 + 2 StoreSin: mov Sin, ax ; bx:Sin ; CJLT again. New tests are needed to correct signs and exchange sin/cos values mov ax, a inc al ;forces bit 1 to xor of previous bits 0 and 1 test al, 2 jz ChkNegCos xchg cx, bx xchg Sin, Cos ; mov ax, SinNeg commented out by CJLT. This was a bug. ; xchg ax, CosNeg ; mov CosNeg, ax and this was meant to be mov SinNeg,ax ChkNegCos: inc al ;forces bit 2 to xor of original bits 1 and 2 test al, 4 jz ChkNegSin not Cos ;negate cos if quadrant 2 or 3 not cx add Cos, 1 adc cx, 0 ChkNegSin: inc al inc al ;forces bit 3 to xor of original bits 2 and 3 test al, 8 jz CorrectQuad not Sin not bx add Sin, 1 adc bx, 0 CorrectQuad: CosPolarized: mov dx, bx mov bx, CosAddr mov WORD PTR [bx], Cos mov WORD PTR [bx+2], cx SinPolarized: mov bx, SinAddr mov WORD PTR [bx], Sin mov WORD PTR [bx+2], dx ret SinCos086 ENDP PUBLIC ArcTan086 ; ;Used to calculate logs of complex numbers, since log(R,theta)=log(R)+i.theta ; in polar coordinates. ; ;In C we need the prototype: ;long ArcTan086(long, long) ;The parameters are x and y, the returned value arctan(y/x) in the range 0..2pi. ArcTan086 PROC uses si di, x1:word, x2:word, y1:word, y2:word xor ax, ax ;ax=0 mov si, x1 ;Lo mov bx, x2 ;Hi or bx, bx ;Sign set ? jns xplus inc ax not si not bx add bx, 1 adc si, 0 ;We have abs(x) now xplus: mov di, y1 ;Lo mov cx, y2 ;Hi or cx, cx ;Sign set? jns yplus inc ax inc ax ;Set bit 1 of ax shl ax, 1 ; and shift it all left not di not cx add di, 1 adc cx, 0 ;We have abs(y) now yplus: cmp bx, cx ;y>x? jl no_xchg jg xchg_xy cmp si, di ;Hi words zero. What about lo words? jle no_xchg xchg_xy: ;Exchange them inc ax ;Flag the exchange xchg si, di xchg bx, cx no_xchg: mov SinNeg, ax ;Remember signs of x and y, and whether exchanged or cx, cx ;y=1.0 ? jz ynorm ;no, so continue normy: ;yes, normalise by reducing y to 16 bits max. rcr cx, 1 ; (We don't really lose any precision) rcr di, 1 clc rcr bx, 1 rcr si, 1 or cx, cx jnz normy ynorm: ret ArcTan086 ENDP ; ;There now follows Chris Lusby Taylor's novel (such modesty!) method ;of calculating exp(x). Originally, I was going to do it by decomposing x ;into a sum of terms of the form: ; ; th -i ; i term=ln(1+2 ) ; -i ;If x>term[i] we subtract term[i] from x and multiply the answer by 1 + 2 ; ;We can multiply by that factor by shifting and adding. Neat eh! ; ;Unfortunately, however, for 16 bit accuracy, that method turns out to be ;slower than what follows, which is also novel. Here, I first divide x not ;by ln(2) but ln(2)/16 so that we can look up an initial answer in a table of ;2^(n/16). This leaves the remainder so small that the polynomial Taylor series ;converges in just 1+x+x^2/2 which is calculated as 1+x(1+x/2). ; _e2xLT PROC ;argument in dx.ax (bitshift=16 is hard coded here) or dx, dx jns CalcExpLT not ax ; if negative, calc exp(abs(x)) not dx add ax, 1 adc dx, 0 CalcExpLT: shl ax, 1 rcl dx, 1 shl ax, 1 rcl dx, 1 shl ax, 1 rcl dx, 1 ;x is now in fg19 form for accuracy ; so, relative to fg16, we have: div Ln2Fg15 ; 8x==(a * Ln(2)/2) + Rem ; x=(a * Ln(2)/16) + Rem/8 ;so exp(x)=exp((a * Ln(2)/16) + Rem/8) ; =exp(a/16 * Ln(2)) * exp(Rem/8) ; =2^(a/16) * exp(Rem) ;a mod 16 will give us an initial estimate mov cl, 4 mov di, ax ;Remember original shr ax, cl mov a, ax ;a/16 will give us a bit shift mov ax, di and ax, 0000fh ;a mod 16 shl ax, 1 ;used as an index into 2 byte per element Table mov di, ax dec cx ;3, please add dx, 4 ;to control rounding up/down shr dx, cl ;Num=Rem/8 (convert back to fg16) ; mov ax, dx shr ax, 1 ;Num/2 fg 16 inc ax ;rounding control stc rcr ax, 1 ;1+Num/2 fg15 mul dx ;dx:ax=Num(1+Num/2) fg31, so dx alone is fg15 shl ax, 1 ;more rounding control adc dx, 8000H ;dx=1+Num(1+Num/2) fg15 mov ax, Table[di] ;Get table entry fg15 mul dx ;Only two multiplys used! fg14, now (15+15-16) shl ax, 1 rcl dx, 1 ;fg15 mov e, dx ret ; return e=e**x * (2**15), 1 < e**x < 2 _e2xLT ENDP ; a= bitshift needed Exp086 PROC uses si di, LoNum:WORD, HiNum:WORD mov ax, LoNum mov dx, HiNum mov TrigOverflow, 0 call _e2xLT ;Use Chris Lusby Taylor's e2x cmp a, 15 ;CJLT - was 16 jae Overflow ; cmp expSign, 0 ; jnz NegNumber cmp HiNum, 0 ;CJLT - we don't really need expSign jl NegNumber mov ax, e mov dx, ax inc a mov cx, 16 sub cx, a shr dx, cl mov cx, a shl ax, cl jmp ExitExp086 Overflow: xor ax, ax xor dx, dx mov TrigOverflow, 1 jmp ExitExp086 NegNumber: cmp e, 8000h jne DivideE mov ax, e dec a jmp ShiftE DivideE: xor ax, ax mov dx, ax stc rcr dx, 1 div e ShiftE: xor dx, dx mov cx, a shr ax, cl ExitExp086: ret Exp086 ENDP SinhCosh086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinhAddr:WORD, \ CoshAddr:WORD mov ax, LoNum mov dx, HiNum call _e2xLT ;calculate exp(|x|) fg 15 ;answer is e*2^a cmp e, 8000h ;e==1 ? jne InvertE ; e > 1, so we can invert it. mov dx, 1 xor ax, ax cmp a, 0 jne Shiftone mov e, ax mov cx, ax jmp ChkSinhSign Shiftone: mov cx, a shl dx, cl dec cx shr e, cl shr dx, 1 shr e, 1 mov cx, dx sub ax, e sbb dx, 0 xchg ax, e xchg dx, cx jmp ChkSinhSign InvertE: xor ax, ax ; calc 1/e mov dx, 8000h div e mov Inve, ax ShiftE: mov cx, a shr Inve, cl inc cl mov dx, e shl e, cl neg cl add cl, 16 shr dx, cl mov cx, dx ; cx:e == e**Exp mov ax, e ; dx:e == e**Exp add ax, Inve adc dx, 0 shr dx, 1 rcr ax, 1 ; cosh(Num) = (e**Exp + 1/e**Exp) / 2 sub e, Inve sbb cx, 0 sar cx, 1 rcr e, 1 ChkSinhSign: or HiNum, 0 jns StoreHyperbolics not e not cx add e, 1 adc cx, 0 StoreHyperbolics: mov bx, CoshAddr mov WORD PTR [bx], ax mov WORD PTR [bx+2], dx mov bx, SinhAddr mov WORD PTR [bx], e mov WORD PTR [bx+2], cx ret SinhCosh086 ENDP Log086 PROC uses si di, LoNum:WORD, HiNum:WORD, Fudge:WORD LOCAL Exp:WORD, Accum:WORD, LoAns:WORD, HiAns:WORD ;NOTE: CJLT's method does not need LoAns, HiAns, but he hasn't yet ;taken them out xor bx, bx mov cx, Fudge mov ax, LoNum mov dx, HiNum mov TrigOverflow, 0 or dx, dx js Overflow jnz ShiftUp or ax, ax jnz ShiftUp Overflow: mov TrigOverflow, 1 jmp ExitLog086 ShiftUp: inc cx ; cx = Exp shl ax, 1 rcl dx, 1 or dx, dx jns ShiftUp ;shift until dx in fg15 form neg cx add cx, 31 mov Exp, cx ;CJLT method starts here. We try to reduce x to make it very close to 1 ;store LoAns in bx for now (fg 16; initially 0) mov cl,2 ;shift count redoterm2: cmp dx, 0AAABH ;x > 4/3 ? jb doterm3 mov ax, dx shr ax, cl sub dx, ax ;x:=x(1-1/4) add bx, 49a5H ;ln(4/3) fg 15 jmp redoterm2 doterm3: inc cl ;count=3 redoterm3: cmp dx, 9249H ;x > 8/7 ? jb doterm4 mov ax, dx shr ax, cl sub dx, ax ;x:=x(1-1/8) add bx, 222eH ;ln(8/7) jmp redoterm3 doterm4: inc cl ;count=4 cmp dx, 8889H ;x > 16/15 ? jb skipterm4 mov ax, dx shr ax, cl sub dx, ax ;x:=x(1-1/16) add bx, 1085h ;ln(16/15) ;No need to retry term4 as error is acceptably low if we ignore it skipterm4: ;Now we have reduced x to the range 1 <=x <1.072 ; ;Now we continue with the conventional series, but x is so small we ;can ignore all terms except the first! ;i.e.: ;ln(x)=2(x-1)/(x+1) sub dx, 8000h ; dx= x-1, fg 15 mov cx, dx stc rcr cx, 1 ; cx = 1 + (x-1)/2 fg 15 ; = 1+x fg 14 mov ax,4000H ;rounding control (Trust me) div cx ;ax=ln(x) add bx, ax ;so add it to the rest of the Ans. No carry MultExp: mov cx, Exp mov ax, Ln2Fg16 or cx, cx js SubFromAns mul cx ; cx = Exp * Lg2Fg16, fg 16 add ax, bx ;add bx part of answer adc dx, 0 jmp ExitLog086 SubFromAns: inc bx ;Somewhat artificial, but we need to add 1 somewhere neg cx mul cx not ax not dx add ax, bx adc dx, 0 ExitLog086: ret Log086 ENDP END