	opt	w-,l-,d-		* Warnings Off, Linkable code, Debugging Off

  ***************************************************************
* *                                                       	* *
* *                                                       	* *
* * 		   Speed 2.00, by Jez San			* *
* * 		(c)1989 Argonaut Software Ltd.			* *
* *                                                       	* *
* *		Mill Hill, London...   ENGLAND			* *
* *                                                       	* *
* *    FrillFree Shareware Software For Amiga Computers    	* *
* *                                                       	* *
* *                                                      	* *
  ***************************************************************


*
* Blatant ad:
*
* This program was assembled using ArgAsm, a new assembler coming soon from
* Argonaut Software.  It is compatible with its contemporaries, but at
* least 10 times faster than the fastest competition.  We're talking up
* to 1 million lines per minute on a standard Amiga... typically 250,000.
* ArgAsm v1 assembles this file in 0.22 secs, Devpac v2 in 2.50 secs,
* and Cape v2 in 4.60 secs all figures tested on a standard A2000 with
* no turbo cards fitted!   ArgAsm is The Fastest Assembler, Bar None!
*
* We ship ArgAsm v1.0 on September 1st.
*
* Orders are being taken right now for 60 pounds UK STERLING, ie:
* US $85 including tax, postage etc.  Write with cheque or money
* to Argonaut, 7 Millway, Mill Hill, London NW7 3QR, ENGLAND.
* or tel: (01) 906 3744, international 011 +44 1 906 3744
*
* Foreign (US & Europe) distributors sought.
*

	output	speed			* ArgAsm & Devpac allow filenames!

*
* To save time having to use includes files, and for the benefit of
* those programmers who dont have access to Commodore Includes :-
*

_SysBase	equ	4
_LVOOpenLibrary	equ	-552		* library offsets from _SysBase
_LVOCloseLibrary	equ	-414
_LVOGetMsg	equ	-372
_LVOReplyMsg	equ	-378
_LVOWaitPort	equ	-384
_LVOFindPort	equ	-390

_LVOCloseWindow	equ	-72
_LVOOpenWindow	equ	-204

_LVOInput	equ	-54
_LVOOutput	equ	-60
_LVOWrite	equ	-48
_LVODateStamp	equ	-192

AttnFlags	equ	$00000128	* Word Off of Execbase
TDNestCnt	equ	$00000127	* Byte

forbid	macro
	move.l	4,a6
	addq.b	#1,TDNestCnt(a6)
	endm

permit	macro
	move.l	4,a6
	subq.b	#1,TDNestCnt(a6)
	endm

* Open DOS library.

	bra.s	startup

	dc.b	12,10,"* THIS IS SHAREWARE !*",0

startup	move.l	_SysBase,a6	* Pointer to Exec Library
	lea	thedos(pc),a1	* Point to title of DOS
	moveq 	#0,d0		* Version number
	jsr	_LVOOpenLibrary(a6)
	move.l	d0,_DosBase	* DOS library Pointer
	beq	openerr		* Error, can't open library!

* Open Console for I/O.

	move.l	_DosBase,a6
	jsr	_LVOInput(a6)
	move.l	d0,ohandle

	jsr	_LVOOutput(a6)
	move.l	d0,chandle

	bsr	p
	dc.b	12,"SPEED 2.00 - Amiga Performance Analyser by Jez San (c)1989 Argonaut Software.",10,10
	dc.b	"This tests two aspects of your Amiga system :-",10
	dc.b	"a) The approximate clockspeed the processor is running at.",10
	dc.b	"b) The approximate system performance in the form of a benchmark.",10
	dc.b	10
	dc.b	"Both figures a & b are important for different reasons eg: Despite",10
	dc.b	"having a fast clockspeed, your machine may not run at its full",10
	dc.b	"system performance, due to Waitstates, Cache's, Burstfetches etc",10
	dc.b	"Hence the need for a good measure of both clockspeed AND realworld",10
	dc.b	"performance when compared to a normally aspirated Amiga.",10
	dc.b	10

	dc.b	0
	even

*
* First determine which processor we are running on, 68000, 68010, 68020 or 68030.
*

	bsr	p
	dc.b	"The Amiga EXEC reckons that a ",0
	even

	move.l	4,a6
	moveq	#0,d0
	move.w	AttnFlags(a6),d0

	move.l	_DosBase,a6

	btst	#3,d0			* 68040?
	beq.s	trynext0

	bsr	p
	dc.b	"68040",0
	even
	move.l	#30*20,procrate		* Tick rate * Mhz (dead) <<-- GUESSWORK!
	bra.s	definiteproccy

trynext0
	btst	#2,d0			* 68030?
	beq.s	trynext1

	bsr	p
	dc.b	"68030",0
	even
	move.l	#37*20,procrate		* Tick rate * Mhz (approx)
	bra.s	definiteproccy

trynext1
	btst	#1,d0
	beq.s	trynext2

	bsr	p			* 68020
	dc.b	"68020",0
	even
	move.l	#53*14+17+3,procrate	* Tickrate * Mhz + frac
	bra.s	definiteproccy


trynext2
	btst	#0,d0
	beq.s	trynext3

	bsr	p			* 68010
	dc.b	"68010",0
	even
	move.l	#242*7,procrate		* (at a guess!)
	bra.s	definiteproccy

trynext3
	bsr	p			* 68000
	dc.b	"68000",0
	even
	move.l	#242*7+38,procrate	* Tickrate * Mhz + frac

definiteproccy
	bsr	p
	dc.b	" processor is installed in this system.",10,0
	even

*
* next, Determine clockspeed...
*

	bsr	p
	dc.b	"Analysing clockspeed... ",0
	even

	bsr	gettimer
	move.l	d0,-(sp)

	forbid
	bsr	lotsadivs
	permit

	bsr	gettimer
	sub.l	(sp)+,d0

	move.l	d0,-(sp)

	bsr	p
	dc.b	"Took ",0
	even

	move.l	(sp),d0
	bsr	printbin2dec

	bsr	p
	dc.b	" ticks, which is ",0
	even

	move.l	(sp)+,d6
	move.l	procrate,d0
	move.l	d0,d5
	bsr	printfrac

	bsr	p
	dc.b	" Mhz +-2% (fig A)",10,10,0
	even


*
* Next, Determine system throughput.... which is a realworld comparitive figure
* of Amiga to Amiga performance.   Ie: Tests processor speed, ram fetch speed,
* and execution speed in an aggregate benchmark of integer performance that
* real world programs should realise.
*

	bsr	gettimer
	move.l	d0,-(sp)

	bsr	p
	dc.b	"Computing 10,000 SlightlyMoistSmallRocks in... ",0
	even

	forbid
	bsr	SlightlyMoistSmallRocks		* YAB
	permit

	bsr	gettimer
	sub.l	(sp)+,d0
	move.l	d0,d6

	move.l	d6,-(sp)

	bsr	printbin2dec

	bsr	p
	dc.b	" ticks.",10,10,0
	even

	move.l	(sp),d6
	move.l	#702,d0
	move.l	d0,d5

	bsr	printfrac

	bsr	p
	dc.b	" times (fig B) the speed of a normal Amiga at 7.159 Mhz +-2%.",10,0
	even

	move.l	(sp)+,d6
	move.l	#234,d0
	move.l	d0,d5
	bsr	printfrac

	bsr	p
	dc.b	" times the speed of an A2620 at 14.318 Mhz +-2%.",10
	dc.b	10
	dc.b	"Rating against an A2620 : ",0
	even

* Is it fast enough to be worth commenting about...?

	move.w	integer,d0
	dbra	d0,notworth1
	bsr	p
	dc.b	"Yawn.",10,0
	even

	bra	arnold

notworth1
	dbra	d0,notworth2

	bsr	p
	dc.b	"About the same.",10,0
	even

	bra	arnold

notworth2
	dbra	d0,notworth3

	bsr	p
	dc.b	"Quite fast.",10,0
	even

	bra	arnold

notworth3
	dbra	d0,notworth4

	bsr	p
	dc.b	"Pretty fast.",10,0
	even

	bra	arnold

notworth4
	dbra	d0,notworth5

	bsr	p
	dc.b	"Quite Impressive.",10,0
	even

	bra	arnold


notworth5
	dbra	d0,notworth6

	bsr	p
	dc.b	"Mighty Impressive.",10,0
	even

	bra	arnold

notworth6
	dbra	d0,notworth7

	bsr	p
	dc.b	"Like wow man!",10,0
	even

	bra	arnold

notworth7
	dbra	d0,notworth8

	bsr	p
	dc.b	"This is my kinda machine!!",10,0
	even

	bra	arnold

notworth8
	bsr	p
	dc.b	"Off the scale, this machine has some serious speed!",10,0
	even


* Terminator
arnold

close2	move.l	_SysBase,a6
	move.l	_DosBase,a1	 * Close the DOS library
	jsr	_LVOCloseLibrary(a6)

openerr	rts

closeerr
	bsr	p
	dc.b	"Hmmm, Error in Calculation (Zero time taken!).",10,0
	even
	bra.s	close2


*
* This entire loop takes up about 50000 * 5 * 62 = 15,500,000 cycles
* ie: 0.78 seconds at 20 mhz on a 68030 system
*
* The use of registerised Divides is to reduce ramfetching to a minimum
* and really give the processor something to think about, hence useful
* for calculating raw clockspeed as opposed to system performance!
*


lotsadivs
	move.w	#50000,d7		* Perform the loop 50000 times
	move.l	#65535,d2		* Preload the parameters to ensure fair timings
	move.l	#65535,d3

* This loop will be cached perfectly and shouldnt have much overhead
* so is a good test of clockspeed rather than system throughput.

	cnop	0,4

lotsadivs2
	move.l	d2,d0			* Total loop time is 5* 62 cycles
	move.l	d3,d1
	divs	d0,d1

	move.l	d2,d0
	move.l	d3,d1
	divs	d0,d1

	move.l	d2,d0
	move.l	d3,d1
	divs	d0,d1

	move.l	d2,d0
	move.l	d3,d1
	divs	d0,d1

	move.l	d2,d0
	move.l	d3,d1
	divs	d0,d1

	dbra	d7,lotsadivs2
	rts


*
* SlightlyMoistSmallRocks:-
* The Performance test benchmark consists of some commonly used math, logic
* and general instructions in a mishmash that hopefully represents a reasonable
* average program in genre mix.
*
* ie: 1 mul, 1 div, 2 shifts, 5 logic, and 5 general instructions in a mix.
* Also note the General instructions are opened out to ensure that a
* 256 byte cache will miss 50% thus having to fetch from RAM.  On a 68040
* the cache will catch it all unless we open it out to 8k
*
* This benchmark tests general system performance rather than mere clockspeed!
*

SlightlyMoistSmallRocks
	move.w	#10000,d7		* Number of times to do this benchmark loop

benchmark
	bsr	domuls
	bsr	dodivs
	bsr	doshifts
	bsr	doshifts
	bsr	dologic
	bsr	dologic
	bsr	dologic
	bsr	dologic
	bsr	dologic
	bsr	dogeneral
	bsr	dogeneral
	bsr	dogeneral
	bsr	dogeneral
	bsr	dogeneral

	dbra	d7,benchmark
	rts

	cnop	0,4

* Benchmark frags...

domuls	move.l	#32767,d0
	move.l	#32767,d1
	muls	d0,d1
	move.l	#32767,d0
	moveq	#-1,d1
	mulu	d0,d1
	rts

	cnop	0,4

dodivs	move.l	#65535,d0
	move.l	#32767,d1
	divs	d1,d0
	move.l	#65535,d0
	move.l	#32767,d1
	divu	d1,d0
	rts

	cnop	0,4

doshifts
	asl.l	#8,d0
	asr.l	#8,d0
	rol.l	#8,d0
	ror.l	#8,d0
	rts

	cnop	0,4

dogeneral
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	#12345,d0
	sub.l	#12345,d0
	lea	tentable(pc),a0
	move.l	4(a0),d0
	move.l	8(a0),d0
	move.l	12(a0),d0
	bsr	dogen2
	ext.w	d0
	ext.l	d0
	move.l	#12345,d0
	swap	d0
	exg	d0,d1
	clr.l	d1
	neg.l	d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	#12345,d0
	sub.l	#12345,d0
	lea	tentable(pc),a0
	move.l	4(a0),d0
	move.l	8(a0),d0
	move.l	12(a0),d0
	bsr	dogen2
	ext.w	d0
	ext.l	d0
	move.l	#12345,d0
	swap	d0
	exg	d0,d1
	clr.l	d1
	neg.l	d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	#12345,d0
	sub.l	#12345,d0
	lea	tentable(pc),a0
	move.l	4(a0),d0
	move.l	8(a0),d0
	move.l	12(a0),d0
	bsr	dogen2
	ext.w	d0
	ext.l	d0
	move.l	#12345,d0
	swap	d0
	exg	d0,d1
	clr.l	d1
	neg.l	d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	#12345,d0
	sub.l	#12345,d0
	lea	tentable(pc),a0
	move.l	4(a0),d0
	move.l	8(a0),d0
	move.l	12(a0),d0
	bsr	dogen2
	ext.w	d0
	ext.l	d0
	move.l	#12345,d0
	swap	d0
	exg	d0,d1
	clr.l	d1
	neg.l	d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	#12345,d0
	sub.l	#12345,d0
	lea	tentable(pc),a0
	move.l	4(a0),d0
	move.l	8(a0),d0
	move.l	12(a0),d0
	bsr	dogen2
	ext.w	d0
	ext.l	d0
	move.l	#12345,d0
	swap	d0
	exg	d0,d1
	clr.l	d1
	neg.l	d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	#12345,d0
	sub.l	#12345,d0
	lea	tentable(pc),a0
	move.l	4(a0),d0
	move.l	8(a0),d0
	move.l	12(a0),d0
	bsr	dogen2
	ext.w	d0
	ext.l	d0
	move.l	#12345,d0
	swap	d0
	exg	d0,d1
	clr.l	d1
	neg.l	d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	#12345,d0
	sub.l	#12345,d0
	lea	tentable(pc),a0
	move.l	4(a0),d0
	move.l	8(a0),d0
	move.l	12(a0),d0
	bsr	dogen2
	ext.w	d0
	ext.l	d0
	move.l	#12345,d0
	swap	d0
	exg	d0,d1
	clr.l	d1
	neg.l	d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	d0,d0
	add.l	#12345,d0
	sub.l	#12345,d0
	lea	tentable(pc),a0
	move.l	4(a0),d0
	move.l	8(a0),d0
	move.l	12(a0),d0
	bsr	dogen2
	ext.w	d0
	ext.l	d0
	move.l	#12345,d0
	swap	d0
	exg	d0,d1
	clr.l	d1
	neg.l	d0
	bra	dogen1
	nop
dogen1	move.w	#123,d1
dogen2	rts

	cnop	0,4

dologic	and.l	#65535,d0
	or.l	#65535,d0
	eor.l	#65535,d0
	and.l	d1,d0
	not.l	d0
	rts


gettimer
	lea	datevals(pc),a0

	move.l	a0,d1
	move.l	_DosBase,a6		* Get DOS's 1/50th sec timestamp
	jsr	_LVODateStamp(a6)	*

	lea	datevals(pc),a0

	move.l	4(a0),d0		* Minutes
	and.l	#$ffff,d0
	mulu	#60*50,d0		* Find it in fiftieths of Seconds!

	move.l	8(a0),d1		* Ticks, 50 per second!
	and.l	#$ffff,d1

	add.l	d1,d0

	rts




*
* fastish print string..like JSR p on the old beebon
* all registers unaffected.
*
* Enter with text immediately following the BSR P subroutine call.
* This then prints the text out, and returns to the PC immediate after
* where P was called from.  Note: Some debuggers have trouble single stepping
* this code.  Their fault, not ours!
*

p	move.l	a3,-(a7)
	move.l	4(a7),a3		* posn. of text to be printed
	movem.l	d1-d3/a0/a1,-(a7)	* save d1,d2,d3,a0,a1
	move.l	a3,d2
p1	tst.b	(a3)+			* } find length of text
	bne.s	p1			* }

	move.l	a3,d3
	subq.l	#1,d3			* dont include NULL
	sub.l	d2,d3			* now., d3=length of text
	move.l	chandle,d1		* now, d1=handle, d2=posn of text, d3=length
	move.l	_DosBase,a6
	jsr	_LVOWrite(a6)
	movem.l	(a7)+,d1-d3/a0/a1	 * restore regs..
	move.w	a3,d1
	and.b	#1,d1			* see if its a non-word aligned
	beq.s	p3
	addq.l	#1,a3
p3	move.l	a3,4(a7)		* stick next instuction posn back on stack
	move.l	(a7)+,a3

	rts				* finish!


*
* Prints out the time in a nn.nn quantity
*
* Uses fixed point 'Rithmetic.
*

printfrac

	tst.w	d6
	beq.s	nodiv3
	divu	d6,d0

	move.l	d0,d4			* Save off remainder into d4
	clr.w	d4
	swap	d4

	lea	printbuffer(pc),a4
	move.w	d0,integer

	bsr	bin2decw
	move.b	#".",(a4)+		* Decimal point

	mulu	#100,d4			* Calculate n/100th Fractional part
	tst.w	d6
	beq.s	nodiv3
	divu	d6,d4

	cmp.w	#100,d4			* Error check... Greater than 100/100 ?
	bge.s	nodiv3			* Sounds fishy to me!

	cmp.w	#10,d4			* Single lone decimal digit
	bge.s	nolead0

	move.b	#"0",(a4)+		* ...needs leading 0

nolead0	move.l	d4,d0			* Display it
	bsr	bin2decw
	bra.s	nodiv4

nodiv3	move.b	#"0",(a4)+		* Force .0 if integer is whole

nodiv4	lea	printbuffer(pc),a5	* Okeydoke, now prindit
	move.l	a5,d2			* d2 = Address
	sub.l	a5,a4
	move.l	a4,d3			* d3 = length
	move.l	chandle,d1		* now, d1=handle, d2=posn of text, d3=length
	move.l	_DosBase,a6
	jmp	_LVOWrite(a6)


printbin2dec
	move.l	d0,-(sp)

	lea	printbuffer(pc),a4
	bsr	bin2decw

	lea	printbuffer(pc),a5
	move.l	a5,d2			* d2 = Address
	sub.l	a5,a4
	move.l	a4,d3			* d3 = length
	move.l	chandle,d1		* now, d1=handle, d2=posn of text, d3=length
	move.l	_DosBase,a6
	jsr	_LVOWrite(a6)

	move.l	(sp)+,d0
	rts


	cnop	0,4

* Power of Ten table! (for binary/ascii routines!)

tentable
	dc.l	 1			* P
	dc.l	 10			*  r
	dc.l	 100			*   e
	dc.l	 1000			*    t
	dc.l	 10000			*     t
tenbckw	dc.l	 100000			*      y
	dc.l	 1000000		*       
	dc.l	 10000000		*       h
	dc.l	 100000000		*        u
tenback	dc.l	 1000000000		*         h
					*          ?

* Print a number that is contained in binary in d0
bin2decw	movem.l	d0-d7/a0,-(sp)
	and.l	#$ffff,d0
	clr.w	d4
	lea	tenbckw(pc),a0
	moveq	#4,d3
	tst.w	d0
	beq.s	binzero
	bpl.s	bindecl
	neg.w	d0
	bra.s	bindecw

* Print up a Long Word in binary
bin2decl	movem.l	d0-d7/a0,-(sp)
	clr.w	d4
	lea	tenback(pc),a0
	moveq	#8,d3
	tst.l	d0
	beq.s	binzero
	bpl.s	bindecl
	neg.l	d0

bindecw	move.b	#'-',(a4)+

bindecl	move.l	-(a0),d1
	clr.w	d2

bindec1	sub.l	d1,d0
	bmi.s	bindec2
	addq.w	#1,d2
	bra.s	bindec1

bindec2	add.l	d1,d0

	tst.w	d2
	bne.s	bindecz
	tst.w	d4
	beq.s	bindec3

bindecz	move.w	d2,d1
	add.w	#$30,d1
	move.b	d1,(a4)+
	moveq	#1,d4

bindec3	dbra	d3,bindecl
	movem.l	(sp)+,d0-d7/a0
	rts

* Special Case of Zero

binzero	move.b	#'0',(a4)+
	clr.b	(a4)+
	movem.l	(sp)+,d0-d7/a0
	rts




thedos	dc.b	"dos.library",0

integer		ds.w	1

	cnop	0,4

_DosBase	ds.l	1
chandle		ds.l	1
ohandle		ds.l	1
procrate	ds.l	1


datevals	ds.l	4

printbuffer	ds.l	10


	end

