;MPQSHD2P.ASM 3.2
;	MACHINE LANGUAGE SUBROUTINE
;	FOR MULTIPLE POLYNOMIAL QUADRATIC SIEVE
;	PROTECT MODE VERSION
;	Hard Disk version
;	1989/90 by YUJI KIDA
;
.386P
pmode	segment para public use32
	assume	cs:pmode,ds:pmode

	include	mpqshd.h

	org	100h
start:

pmodesetR1R2:
	mov	ax,10h
	mov	ds,ax
	mov	es,ax
	mov	fs,ax
	mov	gs,ax

	; loop for primes

	mov	esi,primeadr+2*primeunitbytes	;skip sign & 2
	xor	ecx,ecx
	mov	cx,ds:[_primes]
	sub	cx,2		;skip sign & 2
	mov	ebx,2
align 4
R1R2loop:
	push	ecx
	xor	eax,eax
	mov	ax,[esi]
	add	ebx,eax
	call	setparam
	add	esi,primeunitbytes
	pop	ecx
	loop	R1R2loop

	; return to real mode

	db	0eah
	dd	104h		;32 bit offset
	dw	18h		;gdt_code16 - gdttbl


	;data structure
	; prime diff	2bytes
	; log(P) 	2bytes(higher=0)
	; sqrt(WN) @ P	4bytes
	; start1	4 bytes
	; start2	4 bytes
	; ( total	10h bytes=primeunitbytes)

	; calc parameters for each prime
setparam:
	push	esi
	mov	esi,_A2
	call	longmod_bx
	mov	eax,edx
	call	ax_modinv_bx
	mov	ds:[_MIA2P],eax	;set 2A @ P

	mov	esi,_B
	call	longmod_bx		;edx = B @ P
	pop	esi

	mov	eax,[esi+4]
	add	edx,eax
	cmp	edx,ebx
	jbe	setparamj1
	sub	edx,ebx
setparamj1:
	mov	eax,ebx
	sub	eax,edx			;(-B-R)@P

	mul	dword ptr ds:[_MIA2P]
	div	ebx			;edx = (-B-R)@P/2A@P

	mov	eax,edx
	add	eax,ds:[_sievewidth]
	xor	edx,edx
	div	ebx
	mov	[esi+8],edx		;((-B-R)@P/2A+M)@P

	mov	eax,[esi+4]
	shl	eax,1
	mul	dword ptr ds:[_MIA2P]	;2R/2A
	div	ebx			;edx = 2R/2A@P
	add	edx,[esi+8]
	cmp	edx,ebx
	jb	setparamj3
	sub	edx,ebx
setparamj3:
	add	edx,ds:[_sievetop]
	mov	[esi+12],edx		;((-B+R)@P/2A+M)@P
	mov	eax,[esi+8]
	add	eax,ds:[_sievetop]
	mov	[esi+8],eax
	ret


	org	280h

	; * sieve process

pmodesieve:
	mov	ax,10h
	mov	ds,ax
	mov	es,ax

	; set initial value

	push	ebp
	mov	ebp,ds:[_sieveBsize]

	mov	edi,ds:[_sievetop]
	mov	ecx,ebp
	shr	ecx,2
	mov	eax,ds:[_inilog]
	rep	stosd

	mov	edx,ds:[_sieveover]

	; sieve main1 start

	mov	esi,primeadr+2*primeunitbytes	;skip sign & 2
;	xor	ecx,ecx
	movzx	ecx,word ptr ds:[_primes]
	sub	cx,ds:[_primes1]	;cut primes >= sieveXsize
	sub	cx,ds:[_primes2]	;cut primes >= sieveBsize
	sub	cx,2			;skip sign & 2
	mov	ebx,2
align 4
primeloop:
	push	ecx
;	xor	eax,eax
	movzx	eax,word ptr [esi]
	add	ebx,eax

	mov	eax,ebx
	shl	eax,sieveRepLog
	sub	eax,ebx
	mov	ecx,edx
	sub	ecx,eax		;ecx = sieveover - (sieveRep-1)*prime

	mov	al,[esi+2]		;log

	mov	edi,[esi+8]
align 4
setlogplp1:
  rept	sieveRep
	sub	[edi],al
	add	edi,ebx
  endm
	cmp	edi,ecx
	jb	setlogplp1

  rept	sieveRep-1
	cmp	edi,edx
	jae	short setlogpskip1
	sub	[edi],al
	add	edi,ebx
  endm
align 4
setlogpskip1:
	sub	edi,ebp			;ds:[_sieveBsize]
	mov	[esi+8],edi

	mov	edi,[esi+12]
align 4
setlogplp2:
  rept	sieveRep
	sub	[edi],al
	add	edi,ebx
  endm
	cmp	edi,ecx
	jb	setlogplp2

  rept	sieveRep-1
	cmp	edi,edx
	jae	short setlogpskip2
	sub	[edi],al
	add	edi,ebx
  endm
align 4
setlogpskip2:
	sub	edi,ebp			;ds:[_sieveBsize]
	mov	[esi+12],edi

	add	esi,primeunitbytes
	pop	ecx
	dec	ecx
	jnz	primeloop

	; sieve main1 end

	; sieve main2 start

	mov	cx,ds:[_primes1]
	or	cx,cx
	jz	sievemain3
align 4
primeloop2:
;	xor	eax,eax
	movzx	eax,word ptr [esi]
	add	ebx,eax

	mov	al,[esi+2]		;log

	mov	edi,[esi+8]
  rept	sieveRep-1
	sub	[edi],al
	add	edi,ebx
	cmp	edi,edx
	jae	short setlogpskip3
  endm
	sub	[edi],al
	add	edi,ebx
align 4
setlogpskip3:
	sub	edi,ebp			;ds:[_sieveBsize]
	mov	[esi+8],edi

	mov	edi,[esi+12]

  rept	sieveRep-1
	sub	[edi],al
	add	edi,ebx
	cmp	edi,edx
	jae	short setlogpskip4
  endm
	sub	[edi],al
	add	edi,ebx
align 4
setlogpskip4:
	sub	edi,ebp			;ds:[_sieveBsize]
	mov	[esi+12],edi

	add	esi,primeunitbytes
	dec	ecx
	jnz	primeloop2


	; sieve main2 end

	; sieve main3 start

sievemain3:
	mov	cx,ds:[_primes2]
	jcxz	short sievedone
align 4
primeloop3:
;	xor	eax,eax
	movzx	eax,word ptr [esi]
	add	ebx,eax

	mov	edi,[esi+8]
	cmp	edi,edx
	jae	short setlogpskip5
	mov	al,[esi+2]		;log
	sub	[edi],al
	add	edi,ebx
setlogpskip5:
	sub	edi,ebp			;ds:[_sieveBsize]
	mov	[esi+8],edi

	mov	edi,[esi+12]
	cmp	edi,edx
	jae	short setlogpskip6
	mov	al,[esi+2]		;log
	sub	[edi],al
	add	edi,ebx
setlogpskip6:
	sub	edi,ebp			;ds:[_sieveBsize]
	mov	[esi+12],edi

	add	esi,primeunitbytes
	loop	primeloop3

	; sieve main3 end

sievedone:
	mov	al,ds:[_cutlog]
	mov	edx,ds:[_sievetop]

	mov	ebx,sieveansarea
	mov	word ptr [ebx],0
	mov	esi,sieveansarea+4
	mov	edi,edx			;ds:[_sievetop]
	mov	ecx,ebp			;ds:[_sieveBsize]
align 4
sieveanslp:
	scasb
	ja	short sieveansfind
sieveansnext:
	loop	sieveanslp

	; return to real mode
sieveansret:
	pop	ebp

	db	0eah
	dd	104h		;32 bit offset
	dw	18h		;gdt_code16 - gdttbl


sieveansfind:
	sub	edi,edx		;ds:[_sievetop]
	dec	edi
	mov	[esi],edi	;result
	add	edi,edx		;ds:[_sievetop]
	inc	edi
	add	esi,4
	inc	word ptr [ebx]	;# of results
	jmp	sieveansnext



	org	700h
decompose:
	mov	ax,10h
	mov	ds,ax
	mov	es,ax

	push	ebp

;	xor	eax,eax
	movzx	eax,word ptr ds:[_absQ+2]
	shl	eax,4
;	xor	esi,esi
	movzx	esi,word ptr ds:[_absQ]
	add	esi,eax		;absolute adr of W#
	push	esi		;*

	mov	ebx,_W
	mov	edi,ebx		;set in _W by dword format
	xor	eax,eax
	lodsw
	mov	ecx,eax
	inc	eax
	shr	eax,1
	stosd
	rep	movsw
	xor	eax,eax
	stosw

	; divide by p=2
decomp2:
	cmp	dword ptr [ebx+4],0
	jne	short decomp4
	mov	ecx,[ebx]
	dec	ecx
	mov	[ebx],ecx
	lea	edi,[ebx+4]
	lea	esi,[edi+4]
	rep	movsd
	jmp	decomp2
decomp4:
	std

	mov	eax,[ebx+4]
	mov	ebp,1
decomp5:
	shl	ebp,1
	shr	eax,1
	jnc	decomp5
	shr	ebp,1
	cmp	ebp,1
	je	short decomp8

	mov	esi,ebx
	mov	eax,[esi]
	mov	ecx,eax
	shl	eax,2
	add	esi,eax		;highest dword adr
	xor	edx,edx
	lodsd
	div	ebp
	push	eax		;new highest value
	jmp	short decomp7
align 4
decomp6:
	lodsd
	div	ebp
decomp7:
	mov	[esi+4],eax
	loop	decomp6
	pop	eax
	or	eax,eax
	jnz	short decomp75
	dec	dword ptr [ebx]
decomp75:
	mov	eax,[ebx]
	or	eax,[ebx+4]
	dec	eax
	jz	short decompout	;if result=1

	; divide by odd primes
decomp8:
	mov	eax,ds:[_sieveConst]
	sub	eax,ds:[_result]
	mov	ds:[_result],ebx
	mov	ebx,eax

	mov	edi,primeadr+2*primeunitbytes	;skip sign & 2
	mov	ecx,ds:[_primes]
	sub	ecx,2		;skip sign & 2
	mov	ebp,2

decomp10:
;	xor	eax,eax
	movzx	eax,word ptr [edi]
	add	ebp,eax

	xor	edx,edx
	mov	eax,[edi+8]
	add	eax,ebx
	div	ebp
	or	edx,edx
	jz	short godecomp	;divide exactly

	xor	edx,edx
	mov	eax,[edi+12]
	add	eax,ebx
	div	ebp
	or	edx,edx
	jz	short godecomp	;divide exactly
decomp40:
	add	edi,primeunitbytes
	loop	decomp10
	mov	ebx,ds:[_result]

decompout:
	cld

	mov	esi,ebx
	pop	ebx		;*transfer to 16bit format
	mov	edi,ebx
	lodsd
	shl	eax,1
	mov	ecx,eax		;word length
	stosw
	rep	movsw
	cmp	word ptr [edi-2],0
	jne	short decomp50		;if highest not 0
	dec	word ptr [ebx]
decomp50:
	pop	ebp

	; return to real mode

	db	0eah
	dd	104h		;32 bit offset
	dw	18h		;gdt_code16 - gdttbl


decomp90:
	mov	esi,ebx
	mov	eax,[esi]
	mov	ecx,eax
	shl	eax,2
	add	esi,eax		;highest dword adr
	xor	edx,edx
align 4
decomp95:
	lodsd
	div	ebp
	loop	decomp95
	or	edx,edx
	jz	short decomp100
	pop	ecx
	pop	ebx
	jmp	decomp40
align 4
godecomp:
	push	ebx
	push	ecx
	mov	ebx,ds:[_result]
decomp100:
	mov	esi,ebx
	mov	eax,[esi]
	mov	ecx,eax
	shl	eax,2
	add	esi,eax		;highest dword adr
	xor	edx,edx
	lodsd
	div	ebp
	push	eax		;new highest value
	jmp	short decomp120
align 4
decomp110:
	lodsd
	div	ebp
decomp120:
	mov	[esi+4],eax
	loop	decomp110
	pop	eax
	or	eax,eax
	jnz	short decomp130	;if same length
	dec	dword ptr [ebx]
decomp130:
	mov	eax,[ebx]
	or	eax,[ebx+4]
	dec	eax
	jz	short decomp140	;if result=1
	cmp	ebp,1024
	jbe	decomp90	;check divisible ^2,^3 ?
	pop	ecx
	pop	ebx
	jmp	decomp40
decomp140:
	pop	eax		;dummy
	pop	eax		;dummy
	jmp	decompout	;if result=1

	public	decompend
decompend:

	org	900h
;
; quick sort
;
_s	equ	0
_e	equ	2
_i	equ	4
_j	equ	6
_pivot	equ	8		;4bytes

qsort:
	mov	ax,10h
	mov	ds,ax
	mov	es,ax

	mov	bx,1
	call	qsortmain

	; return to real mode

	db	0eah
	dd	104h		;32 bit offset
	dw	18h		;gdt_code16 - gdttbl


	org	0a00h
;
; insertion sort
;
_s	equ	0
_e	equ	2
_i	equ	4
_j	equ	6
_ww	equ	8	;4 bytes
_m1	equ	12	;4 bytes

isort:
	mov	ax,10h
	mov	ds,ax
	mov	es,ax

	push	bp
	sub	sp,16
	mov	bp,sp
	mov	bx,1
	mov	[bp+_s],bx
	mov	[bp+_e],cx	;passed from real mode
	mov	ax,bx
	dec	ax
	call	getlpv
	mov	[bp+_m1],eax

	mov	cx,[bp+_e]
	mov	ax,[bp+_s]
	sub	cx,ax
	inc	ax
	mov	[bp+_i],ax
isort10:
	push	cx
	mov	ax,[bp+_i]
	call	getlpv
	mov	edx,eax
	mov	[bp+_ww],eax
	mov	ax,[bp+_s]
	dec	ax
	call	putlpv

	mov	ax,[bp+_i]
	dec	ax
	mov	[bp+_j],ax

  ;while
isort40:
	mov	ax,[bp+_j]
	call	getlpv
	cmp	eax,[bp+_ww]
	jbe	isort50

	xor	eax,eax
	mov	ax,[bp+_j]
	shl	eax,1
	add	eax,ds:[_lpvindexadr]
	mov	esi,eax
	lea	edi,[esi+2]
	mov	ax,[esi]
	xchg	ax,[edi]
	mov	[esi],ax
	dec	word ptr [bp+_j]
	jmp	isort40
  ;wend

isort50:
	pop	cx
	inc	word ptr [bp+_i]
	loop	isort10

	mov	ax,[bp+_s]
	dec	ax
	mov	edx,[bp+_m1]
	call	putlpv

	add	sp,16
	pop	bp

	; return to real mode

	db	0eah
	dd	104h		;32 bit offset
	dw	18h		;gdt_code16 - gdttbl


;
; initialize for INIT
;

	org	0b00h

	mov	ax,10h
	mov	fs,ax
	mov	gs,ax

	; return to real mode

	db	0eah
	dd	104h		;32 bit offset
	dw	18h		;gdt_code16 - gdttbl


;
;* subroutines
;
getlpv:
	and	eax,0000ffffh
	shl	eax,1
	add	eax,ds:[_lpvindexadr]
	mov	ebx,eax
	xor	eax,eax
	mov	ax,[ebx]
	mov	edx,t_byte
	mul	edx
	add	eax,ds:[_lpvdataadr]
	mov	esi,eax
	mov	eax,[esi]
	ret

putlpv:
	push	edx
	and	eax,0000ffffh
	shl	eax,1
	add	eax,ds:[_lpvindexadr]
	mov	ebx,eax
	xor	eax,eax
	mov	ax,[ebx]
	mov	edx,t_byte
	mul	edx
	add	eax,ds:[_lpvdataadr]
	mov	esi,eax
	pop	eax
	mov	[esi],eax
	ret

;
;  long integer @ 32bit integer
;  edx = [esi] @ ebx
;  [esi] > 0 must not be 0
; destroy : eax,ecx,edx

longmod_bx:
	mov	eax,[esi]
	and	eax,07fffh
	mov	ecx,eax
	shl	eax,2
	add	esi,eax
	xor	edx,edx
	std
align 4
longmodlp:
	lodsd
	div	ebx
	loop	longmodlp
	cld
	ret

;
; inverse modulo prime
; 
; inp : eax
; out : eax = 1/eax mod ebx
; destroy : ecx,esi,edi

ax_modinv_bx:
	cmp	eax,1
	je	modinvret
	mov	ecx,ebx
	cmp	eax,ebx
	jb	modinv10
	xor	edx,edx
	div	ebx
	mov	eax,edx
	cmp	eax,1
	je	modinvret
modinv10:
	xchg	eax,ecx
	xor	esi,esi		;coef1
	mov	edi,1		;coef2
modinv20:
	xor	edx,edx
	div	ecx
	push	edx		;remainder
	push	edi		;coef2
	mul	edi
	div	ebx
	mov	eax,esi
	sub	eax,edx
	jae	modinv30
	add	eax,ebx
modinv30:
	mov	edi,eax		;new coef2=coef1-quotient*coef2
	pop	esi		;new coef1=old coef2
	mov	eax,ecx
	pop	ecx
	cmp	ecx,1
	jne	modinv20	;GCD must 1 otherwise endlessloop
	mov	eax,edi
modinvret:
	ret


qsortmain:				;bx=S,cx=E
	push	ebp
	sub	sp,12

	mov	bp,sp
	mov	[bp+_s],bx
	mov	[bp+_i],bx
	mov	[bp+_e],cx
	mov	[bp+_j],cx
	mov	ax,bx
	add	ax,10
	cmp	ax,cx
	jae	qsortret
	sub	ax,10
	add	ax,cx
	shr	ax,1		;ax=(E+S)/2
	call	getlpv
	mov	[bp+_pivot],eax
qsort10:
	mov	ax,[bp+_i]
	call	getlpv
	cmp	eax,[bp+_pivot]
	jnb	qsort20
	inc	word ptr [bp+_i]
	jmp	qsort10
qsort20:			
	mov	ax,[bp+_j]
	call	getlpv
	cmp	eax,[bp+_pivot]
	jna	qsort30
	dec	word ptr [bp+_j]
	jmp	qsort20
qsort30:
	mov	ax,[bp+_i]
	cmp	ax,[bp+_j]
	ja	qsort50
	and	eax,0000ffffh
	shl	eax,1
	add	eax,ds:[_lpvindexadr]
	mov	esi,eax

	mov	ax,[bp+_j]
	and	eax,0000ffffh
	shl	eax,1
	add	eax,ds:[_lpvindexadr]
	mov	edi,eax

	mov	ax,[esi]
	xchg	ax,[edi]
	mov	[esi],ax
	inc	word ptr [bp+_i]
	dec	word ptr [bp+_j]
qsort50:
	mov	ax,[bp+_i]
	cmp	ax,[bp+_j]
	jbe	qsort10
	mov	bx,[bp+_s]
	mov	cx,[bp+_j]
	call	qsortmain
	mov	bx,[bp+_i]
	mov	cx,[bp+_e]
	call	qsortmain
qsortret:
	add	sp,12
	pop	ebp
	ret

pmode	ends


end	start
