; Tiny Truecolor VBE2.0+ interface
; (c) 1999 Peter Korsgaard aka jacmet (jacmet@kom.auc.dk)
; -------------------------------------------------------
; Purpose:
; --------
; Easy, simple interface. User request a resolution, and a screenmode of the same or higher resolution is set.
; with a color depth of 32,24,16 or 15 bits. The sceen can be updated by calling gfx_flip with the address of
; a 32BGRA buffer of the requested dimensions.
;
; History:
; --------
; [25.07.99]: First try. Basic structures set up, but nothing much.
; [26.07.99]: Vesa detection added. It uses the DJGPP (go32) transfer buffer in realmode.
; [27.07.99]: Modelist sorting added.
; [25.08.99]: Back from holidays. Clean up of sourcecode (trying to understand it ;)
; [28.08.99]: System up and running (32bit BGRA/RGBA, 24bit BGR/RGB and 16bit BGR)
; [01.09.99]: Working on vga flip
; [13.09.99]: Added vga palette
; [14.09.99]: Implemented vga flip routine
; [17.09.99]: Fixed a bug in centering code + singlebuffer.
; [19.09.99]: Added a check for width too - now you can ask for screens with strange dimensions like 1280x2

; Interface:
; ----------
; void gfx_init();			// initialize system
; void gfx_open(int width,int height);	// open requested screenmode (try 32,24,16,15 in a screen with yres>=height and xres>=width)
; void gfx_flip(int *source);		// flip image to screen 
; void gfx_close();			// close screenmode and return to textmode
; void gfx_deinit();			// deinitialize system
; int vesa_detected;			// is vbe2+ detected (1=yes,0=no)
; mode modelist[];			// modelist (see mode structure) terminated with dimensions=-1
; int framecounter;			// counts how many frames has been displayed since start
; void* screenaddress;			// a pointer to the framebuffer
;
; Example:
; --------
;
; void main()
; {
;   int buffer[320*200];
;   gfx_init();
;   gfx_open(320,200);
;
;   while (1==1)
;    {
;      // fill buffer
;      gfx_flip(buffer);
;    }    
;
;   gfx_close();
;   gfx_deinit();
; }

[BITS 32]

; Defines:
; --------

; number of modes to reserve space for. A total of 256 vbe modes could
; theoretically be defined, but since we only use the direct_color modes
; this isn't very likely. Total memory used is max_number_of_modes*mode_size
max_number_of_modes	equ	256

true			equ	1

; Structures:
; -----------

; mode structure. This one is used internally to keep track of available gfxmodes.
; The dimensions is divided like this: 
; bits 0..1  : depth (00b=32,01b=24,10b=16,11b=15)
; bits 2..17 : height
; bits 18..31: width
; bits 32..64: number
struc mode
	.dimensions 		:	resd 1
	.number			:	resd 1
endstruc

; dpmi realmode register structure. Needed for realmode ints (vesa)              
struc dpmi_realmoderegister
	.edi			:	resd 1
	.esi			:	resd 1
	.ebp			:	resd 1
	.reserved		:	resd 1
	.ebx			:	resd 1
	.edx			:	resd 1
	.ecx			:	resd 1
	.eax			:	resd 1
	.flags			:	resw 1
	.es			:	resw 1
	.ds			:	resw 1
	.fs			:	resw 1
	.gs			:	resw 1
	.ip			:	resw 1
	.cs			:	resw 1
	.sp			:	resw 1
	.ss			:	resw 1
endstruc

; go32 extender infoblock. Information about various things (like size and
; position of the realmode transfer buffer used by vbe call 0x4f00 and 0x4f01
struc go32_infoblock
	.size_of		:	resd 1
	.primary_screen		:	resd 1
	.secondary_screen	:	resd 1
	.transfer_buffer	:	resd 1
	.size_of_tb		:	resd 1  
	.pid			:	resd 1
	.master_intctrl		:	resb 1
	.slave_intctrl		:	resb 1
	.dos_selector		:	resw 1
	.stub_info		:	resd 1
	.psp			:	resd 1
	.run_mode		:	resw 1
	.run_mode_info		:	resw 1
endstruc

; VESA controller information block structure.
struc vbe_controllerinfo
	.signature		:	resb 4 ; VESA
    	.version		:	resw 1
	.oem_str		:	resd 1
	.capabilities		:	resd 1
	.mode_pointer		:	resd 1
	.memory			:	resw 1 ; 64 kb blocks
	.software_ver		:	resw 1
	.vendor_str		:	resd 1
	.product_str		:	resd 1	
	.product_ver_str	:	resd 1
	.filler			:	resb 222
	.oemdata		:	resb 256	
endstruc

; VESA mode information block structure.
struc vbe_modeinfo
	.mode_attr		:	resw 1
	.wina_attr		:	resb 1
	.winb_attr		:	resb 1
	.win_granularity	:	resw 1
	.win_size		:	resw 1
	.wina_segment		:	resw 1
	.winb_segment		:	resw 1
	.win_function_ptr	:	resd 1
	.pitch			:	resw 1
	.x_res			:	resw 1
	.y_res			:	resw 1
	.x_charsize		:	resb 1
	.y_charsize		:	resb 1
	.n_planes		:	resb 1
	.z_res			:	resb 1
	.n_banks		:	resb 1
	.memory_model		:	resb 1
	.bank_size		:	resb 1
	.n_images		:	resb 1
	.reserved		:	resb 1
	.red_mask_size		:	resb 1
	.red_field_pos		:	resb 1
	.green_mask_size	:	resb 1
	.green_field_pos	:	resb 1
	.blue_mask_size		:	resb 1
	.blue_field_pos		:	resb 1
	.rsvd_mask_size		:	resb 1
	.rsvd_field_pos		:	resb 1
	.dir_color_info		:	resb 1
	.physical_base		:	resd 1
	.offscreen_ofs		:	resd 1	
	.offscreen_size		:	resw 1
	.filler			:	resb 206
endstruc

[SECTION .text]

; External references:
; --------------------
extern __go32_info_block
extern ___djgpp_base_address
extern ___djgpp_nearptr_enable
extern ___djgpp_nearptr_disable

; void gfx_init(); - detects vbe version and fills modelist with available modes.
[GLOBAL _gfx_init]
_gfx_init:
	pushad

	call	___djgpp_nearptr_enable
; use go32 transfer buffer (in realmode space) as buffer for vesa 0x4f00 call
	mov 	eax,[__go32_info_block+go32_infoblock.transfer_buffer]
	mov	esi,eax
	mov 	ebx,eax
	sub	esi,[___djgpp_base_address]				; calculate address relative to ds
	mov	dword [esi+vbe_controllerinfo.signature],'VBE2'		; we vant vbe2.0+ information
	mov 	dword [realmoderegs+dpmi_realmoderegister.sp],0		; ss:sp=0 -> use dpmi server's stack
	mov	dword [realmoderegs+dpmi_realmoderegister.eax],0x4f00	; vesa get controllerinfo call
	shr	eax,4							; eax = realmode segment of transfer buffer
	mov 	word [realmoderegs+dpmi_realmoderegister.es],ax
	and	ebx,0xf							; ebx = realmode offset of transfer buffer
	mov	dword [realmoderegs+dpmi_realmoderegister.edi],ebx

	mov 	eax,0x300						; simulate realmode int dpmi call
	mov	ebx,0x10
	xor	ecx,ecx							; don't copy stack
	lea	edi,[realmoderegs]
	int	0x31
	jc 	.cleanupstack						; did it fail?

	add 	word [realmoderegs+dpmi_realmoderegister.es],(vbe_controllerinfo_size+15)/16 ; point realmode regs to modeinfo structure
	
	cmp 	word [esi+vbe_controllerinfo.version],0x200		; is it version 2.00+ ?
	jb	.cleanupstack						; no, quit

	mov	byte [_vesa_detected],true				; great, we got vesa 2.00+, now detect modes available

	lea	edx,[_modelist]						; point edx to modelist
	mov	ebx,0xff						; first mode to check (+1)

.fillmodelist:  ; search through all modes and fill modelist with the detected direct color modes
	inc 	ebx							; next mode
	cmp 	ebx,0x200						; are we finished?
	je	.terminatemodelist

	call	getmodeinfo

	cmp	byte [realmoderegs+dpmi_realmoderegister.eax+1],0	; ah=0 -> is mode supported?
	jne	.fillmodelist						; no, try next mode

direct_color	equ 6
	cmp	byte [esi+vbe_controllerinfo_size+vbe_modeinfo.memory_model],direct_color ; is it a direct(true)color mode?
	jne	.fillmodelist						; no, try next mode
	
; mode is supported and a direct color type - now insert it in the modelist
	mov	ecx,dword [esi+vbe_controllerinfo_size+vbe_modeinfo.x_res] ; ecx = yres | xres
	ror	ecx,14							; ecx = xres(18..31)| yres(2..17)
	mov	al,39							; convert zres (15,16,24,32) to 2 bits
	sub	al,[esi+vbe_controllerinfo_size+vbe_modeinfo.z_res]	; 32=00b,24=01b,16=10b and 15=11b
	shr	al,3
	add	cl,al							; ecx = xres(18..31)| yres(2..17)| zres(0..1)
	
	mov	dword [edx+mode.dimensions],ecx				; fill dimensions
	mov	dword [edx+mode.number],ebx
	
	add	edx,mode_size						; point edx to next mode
	jmp	short .fillmodelist
	
.terminatemodelist:
	mov	dword [edx+mode.dimensions],-1				; terminate with dimensions = -1

	jmp	short .sortmodelist

.cleanupstack:
	popad
	ret	

.sortmodelist:
; this shouldn't be needed
;	; make sure modelist atleast contains 2 modes
;	cmp	edx,(_modelist+2*mode_size)
;	jb	.cleanupstack                                          ; it doesn't -> then we don't need to sort
; ---
	lea	edi,[_modelist-mode_size]				; point edi to just before modelist
.sort:
	add	edi,mode_size
	cmp	edi,edx							; are we finished sorting?
	je	.cleanupstack

	mov	esi,edi							; esi = pointer into modelist (from index)
.sort_inner:
	add	esi,mode_size						; next mode
	cmp	esi,edx							; have we reached the end of the modelist
	je	.sort							; yes, continue with next index

	mov	eax,[esi+mode.dimensions]
	mov	ebx,[edi+mode.dimensions]
	cmp	eax,ebx							; is the dimensions of this mode larger than the first one?
	ja	.sort_inner						; yes, try next mode
        ; switch
	mov	[edi+mode.dimensions],eax
	mov	[esi+mode.dimensions],ebx
	mov	eax,[esi+mode.number]
	mov	ebx,[edi+mode.number]
	mov	[edi+mode.number],eax
	mov	[esi+mode.number],ebx

	jmp	.sort_inner

[GLOBAL _gfx_open]
_gfx_open:
width	equ	4+32
height	equ	8+32
	pushad

	mov	eax,[esp+width]						; store width and height (for flips)
	mov	[xres],eax
	mov	eax,[esp+height]
	mov	[yres],eax
		
	cmp	[_vesa_detected],byte true				; did we detect vbe (2.0+)?
	je 	.usevesa						; yes, use vesa

.usevgainstead:		
	mov	ax,0x13
	int	0x10							; set mode 13h

	xor	ecx,ecx
	mov	dx,0x3c8						; vga palette index register
	mov	al,191
	mov	bl,63
.setpalette1:
	mov	cl,3
.setpalette2:
	out	dx,al							; port[0x3c8] = index

	xchg	eax,ebx
	inc	edx

	out	dx,al							; port[0x3c9] = r
	out	dx,al							; port[0x3c9] = g
	out	dx,al							; port[0x3c9] = b

	dec	edx							; update port (0x3c8)
	xchg	eax,ebx

	dec	eax	

	loop	.setpalette2

	dec	bl
	jns	.setpalette1

	; set up variables for flipper (width,height)	
.setupvariables:
	mov 	dword [_flipper],_flipvga
	mov	eax,0xa0000
	sub	eax,[___djgpp_base_address]
	lea	edi,[_address1]
	mov	[edi],eax
	mov	[edi+4],eax

	xor	edx,edx
	mov	eax,[esp+width]
	shl	eax,8
	mov	ebx,(320/4)
	div	ebx
	mov	[_deltax],eax
	
	mov	eax,[esp+height]
	shl	eax,8
	mov	ebx,200
	div	ebx
	mov	[_deltay],eax
	
	popad
	ret

.usevesa:
	mov	eax,[esp+width]
	shl	eax,16
	add	eax,[esp+height]
	shl	eax,2							; eax = |X|Y|Z

	lea	esi,[_modelist]						; point esi to modelist

.searchmodes:
	cmp	[esi+mode.dimensions],eax
	jnb	.foundmode
.continuesearch:
	add	esi,mode_size						; advance pointer to next mode
	jmp	.searchmodes
	
.foundmode:
	cmp	[esi+mode.dimensions],ax
	jb	.continuesearch	
	cmp	[esi+mode.dimensions],dword -1				; did we reach end of modelist?
	je	.tovga      						; yes, use vga instead

	mov	ebx,[esi+mode.number]
	call	getmodeinfo						; get more information about mode

	; set mode
	or	bx,0x4000						; use LFB
	mov	ax,0x4f02
	int	0x10							; set mode
	cmp	ah,0
	je	.keepvesa
.tovga:
	jmp	.usevgainstead                                          ; we are out of range
.keepvesa:
	; get baseaddress
	mov 	edx,[__go32_info_block+go32_infoblock.transfer_buffer]
	sub	edx,[___djgpp_base_address]

	; map memory through dpmi
	mov	ax,0x800
	mov	ecx,[edx+vbe_controllerinfo_size+vbe_modeinfo.physical_base]
	mov	ebx,ecx
	shr	ebx,16							; bx:cx = 32bit physical address
	
	mov	esi,[edx+vbe_controllerinfo.memory]
	xor	edi,edi							; si:di = size in bytes
	
	int	0x31							; map physical memory
	; we could check carry flag, but to what need? (use vga instead, perhaps, but we are out of range of a conditional jump)

	shl	ebx,16
	add	bx,cx				
	sub	ebx,[___djgpp_base_address]				; ebx = mapped address relative to ds
	mov	[_screenaddress],ebx

; set right flipper

	
	xor	ebx,ebx
	mov	al,[edx+vbe_controllerinfo_size+vbe_modeinfo.z_res]
	mov	ah,[edx+vbe_controllerinfo_size+vbe_modeinfo.red_field_pos]
	test	ah,ah								; is it bgr?
	je	.itsrgb								; no, then it's rgb
.itsbgr:

	cmp	al,32								; is it a 32bit mode?
	jne	.try24bgr							; no, try 24bit

	mov	bl,4								; ebx = bytesperpixel
	mov	eax,_flip32bgr							; set right flipper
	jmp	short .calculate_rest

.try24bgr:
	cmp	al,24								; is it a 24bit mode?
	jne	.try16bgr							; no, try 16bit

	mov	bl,3								; ebx = bytesperpixel
	mov	eax,_flip24bgr							; set right flipper
	jmp	short .calculate_rest

.try16bgr:
	mov	bl,2								; ebx = bytesperpixel
	cmp	al,16								; is it a 16bit mode?
	jne	.its15bgr							; no, then it's 15bit

	mov	eax,_flip16bgr							; set right flipper
	jmp	short .calculate_rest

.its15bgr:
	mov	eax,_flip15bgr							; set right flipper
	jmp	short .calculate_rest

.itsrgb:
	cmp	al,32								; is it a 32bit mode?
	jne	.try24rgb							; no, try 24bit

	mov	bl,4								; ebx = bytesperpixel
	mov	eax,_flip32rgb							; set right flipper
	jmp	short .calculate_rest

.try24rgb:
	cmp	al,24								; is it a 24bit mode?
	jne	.try16rgb							; no, try 16bit

	mov	bl,3								; ebx = bytesperpixel
	mov	eax,_flip24rgb							; set right flipper
	jmp	short .calculate_rest

.try16rgb:
	mov	bl,2								; ebx = bytesperpixel
	cmp	al,16								; is it a 16bit mode?
	jne	.its15rgb							; no, then it's 15bit

	mov	eax,_flip16rgb							; set right flipper
	jmp	short .calculate_rest

.its15rgb:
	mov	eax,_flip15rgb							; set right flipper

.calculate_rest:
	mov	[_flipper],eax							; set flipper

	mov	edi,edx				
	movzx	ecx,word [edi+vbe_controllerinfo_size+vbe_modeinfo.pitch]	; ecx = pitch
	push	ecx								; stack = pitch
	movzx	eax,word [edi+vbe_controllerinfo_size+vbe_modeinfo.y_res]	; eax = y2
	push	eax								; stack = y2,pitch
	sub	eax,[esp+height+8]						; eax = y2-y1
	shr	eax,1								; eax = *(y2-y1)
	mul	ecx								; edx = 0, eax = (*(y2-y1))*pitch
	push 	eax								; stack = (*(y2-y1))*pitch,y2,pitch
	mov	eax,ebx								; eax = bytesperpixel
	mov	esi,[esp+width+12]						; esi = x1
	mul	esi								; edx = 0, eax = x1*bytesperpixel
	sub	ecx,eax								; ecx = deltaline
	mov	[_deltaline],ecx						; [deltaline] = deltaline	
	movzx	eax,word [edi+vbe_controllerinfo_size+vbe_modeinfo.x_res]	; eax = x2
	sub	eax,esi								; eax = x2-x1
	shr	eax,1								; eax = *(x2-x1)
	mul	ebx								; edx = 0, eax = (*(x2-x1))*bytesperpixel
	pop	ecx								; ecx = (*(y2-y1))*pitch, stack = y2,pitch
	add	eax,ecx								; eax = offset
	add	eax,[_screenaddress]						; ebx = screenaddress
	mov	[_address1],eax							; [address1] = address1
	mov	[_pos1],edx							; [pos1] = 0 (pos1)	
	cmp	[edi+vbe_controllerinfo_size+vbe_modeinfo.n_images],dl		; could we use doublebuffer?
	je	.usesinglebuffer

	mov	ebx,eax								; ebx = address1
	pop	eax								; eax = y2, stack = pitch
	mov	esi,eax								; esi = y2
	pop	edx								; edx = pitch, stack =
	mul	edx								; edx = 0, eax = y2*pitch
	add	eax,ebx								; eax = address2
	mov	edx,esi								; edx = y2 (pos2)
	jmp	short .filladdress2
		
.usesinglebuffer:
	add	esp,8								; remove variables from stack

.filladdress2:
	mov	[_address2],eax
	mov	[_pos2],edx

	popad
	ret

[GLOBAL _gfx_flip]
_gfx_flip:
source	equ	4+32
	pushad
	mov	eax,[_framecounter]					; update framecounter
	inc	eax
	mov	[_framecounter],eax
	and	eax,byte 1
	mov	esi,[esp+source]
	mov	edi,[_address1+eax*4]

	jmp	dword [_flipper]					; make flip
		
[GLOBAL _gfx_close]
_gfx_close:
	cmp	dword [_flipper],_flipvga
	je	.settextmode
	mov	ebx,[_screenaddress]
	add	ebx,[___djgpp_base_address]
	mov	ecx,ebx
	shr	ebx,16							; bx:cx = screenaddress

	mov	ax,0x801
	int	0x31							; free memory mapping
	; no need to check return value
	
.settextmode:
	mov	ax,0x3
	int	0x10
	ret
	
[GLOBAL _gfx_deinit]
_gfx_deinit:
	call	___djgpp_nearptr_disable
	ret

getmodeinfo:
; fills vbe_modeinfo structure with mode info
; input : ebx = mode
; output: succes = carry clear
	pushad
	mov	word [realmoderegs+dpmi_realmoderegister.eax],0x4f01
	mov	[realmoderegs+dpmi_realmoderegister.ecx],ebx
	lea	edi,[realmoderegs]
	mov	eax,0x300
	mov	ebx,0x10
	xor	ecx,ecx
	int	0x31
	popad
	ret

vesaupdate:
	mov	eax,[_framecounter]
	xor	ecx,ecx
	and	eax,1
	xor	ebx,ebx
	mov	edx,[_pos1+eax*4]
	mov	ax,0x4f07		
	mov	bl,0x80						; set during vertical retrace
	int	10h
	
	popad
	ret
			
; flips: they are called with esi = source and edi = destination
[GLOBAL _flip32bgr]
_flip32bgr:
	mov	ebx,[yres]
	mov	edx,[xres]
	mov	eax,[_deltaline]
	
.doline:
	mov	ecx,edx
	rep	movsd
	add	edi,eax
	dec	ebx
	jnz	.doline
	jmp	short vesaupdate	

__flip32bgr:
	push	dword [yres]
	sub	edi,4
.outer:	
	mov	ebp,[xres]
	shr	ebp,2
	cmp	ebp,0
	jz	.endofline
	
.inner:
	add	edi,byte 16					; update destination index
	
	mov	eax,[esi]					; eax = 1st
	mov	ebx,[esi+4]					; ebx = 2nd

	mov	ecx,[esi+8]					; ecx = 3rd
	mov	edx,[esi+12]					; edx = 4th

	add	esi,byte 16					; update source index

	mov	[edi-12],eax
	mov	[edi-8],ebx

	mov	[edi-4],ecx
	mov	[edi],edx
	
	dec	ebp						; next 4 pixels
	jnz	.inner

.endofline:
	mov	ecx,[xres]					; if xres%4 > 0 then plot these last few pixels
	and	ecx,byte 11b
	jz	.nextloop

	add	edi,4
	rep	movsd	
	sub	edi,4
	
.nextloop:
	add	edi,[_deltaline]
	dec	dword [esp]					; next scanline
	jnz	.outer
	
	add	esp,4						; remove ycounter from stack		
	jmp	vesaupdate

[GLOBAL _flip32rgb]
_flip32rgb:
	push	dword [yres]
	sub	edi,4
.outer:	
	mov	ebp,[xres]
	shr	ebp,2
	
.inner:
	add	edi,byte 16					; update destination index
	
	mov	eax,[esi]					; eax = [B1][G1][R1][A1]
	mov	ebx,[esi+4]					; ebx = [B2][G2][R2][A2]

	bswap	eax						; eax = [A1][R1][G1][B1]
	mov	ecx,[esi+8]					; ecx = [B3][G3][R3][A3]

	bswap	ebx						; ebx = [A2][B2][G2][B2]
	mov	edx,[esi+16]					; edx = [B4][G4][R4][A4]

	ror	eax,8						; eax = [R1][G1][B1][A1]
	bswap	ecx						; ecx = [A3][B3][G3][B3]

	add	esi,byte 16					; update source index
	bswap	edx						; edx = [A4][B4][G4][B4]

	ror	ebx,8						; ebx = [R2][G2][B2][A2]
	mov	[edi-12],eax

	ror	ecx,8						; ecx = [R3][G3][B3][A3]
	mov	[edi-8],ebx
	
	ror	edx,8						; edx = [R4][G4][B4][A4]
	mov	[edi-4],ecx

	mov	[edi],edx
	
	dec	ebp
	jnz	.inner

	mov	ecx,[xres]
	and	ecx,byte 11b
	jz	.nextloop
.finishline:
	lodsd
	bswap	eax
	ror	eax,8
	stosd
	loop	.finishline

.nextloop:
	add	edi,[_deltaline]
	dec	dword [esp]
	jnz	.outer
	
	add	esp,4		
	jmp	vesaupdate

[GLOBAL _flip24bgr]	
_flip24bgr:
	push	dword [yres]
.outer:	
	mov	ebp,[xres]
	shr	ebp,2
	
.inner:
	mov	ebx,[esi+4]					; ebx = [B2][G2][R2][..]
	mov	eax,[esi]					; eax = [B1][G1][R1][..]

	mov	ecx,ebx						; ecx = [B2][G2][R2][..]
	shl	ebx,24						; ebx = [..][..][..][B2]
	
	mov	edx,[esi+8]					; edx = [B3][G3][R3][..]
	shr	ecx,8						; ecx = [G2][R2][..][..]
	
	add	eax,ebx						; eax = [B1][G1][R1][B2] ok
	mov	ebx,[esi+12]					; ebx = [B4][G4][R4][..]

	mov	[edi],eax
	mov	eax,edx						; eax = [B3][G3][R3][..]

	shl	edx,16						; edx = [..][..][B3][G3]
	add	esi,byte 16					; update source index
	
	shr	eax,16						; eax = [R3][..][..][..]
	add	ecx,edx						; ecx = [G2][R2][B3][G3] ok

	shl	ebx,8						; ebx = [..][B4][G4][R4]
	mov	[edi+4],ecx

	add	eax,ebx						; eax =	[R3][B4][G4][R4] ok
	mov	[edi+8],eax	

	add	edi,byte 12					; update destination index
	dec	ebp

	jnz	.inner

	mov	ecx,[xres]
	and	ecx,byte 11b
	jz	.nextloop

.finishline:
	mov	al,[esi]
	mov	bl,[esi+1]
	mov	dl,[esi+2]
	add	esi,byte 4

	mov	[edi],al
	mov	[edi+1],bl	
	mov	[edi+2],dl
	add	edi,byte 3
		
	loop	.finishline

.nextloop:
	add	edi,[_deltaline]
	dec	dword [esp]
	jnz	.outer
	
	add	esp,4		
	jmp	vesaupdate

[GLOBAL _flip24rgb]
_flip24rgb:
	push	dword [yres]
.outer:	
	mov	ebp,[xres]
	shr	ebp,2
	
.inner:
	mov	ebx,[esi+4]					; ebx = [B2][G2][R2][..]
	mov	eax,[esi]					; eax = [B1][G1][R1][..]

	bswap	ebx						; ebx = [..][R2][G2][B2]
	bswap	eax						; eax = [..][R1][G1][B1]
	
	mov	ecx,ebx						; ecx = [..][R2][G2][B2]
	shl	ebx,16						; ebx = [..][..][..][R2]
	
	shr	eax,8						; eax = [R1][G1][B1][..]
	mov	edx,[esi+8]					; edx = [B3][G3][R3][..]

	shr	ecx,16						; ecx = [G2][B2][..][..]
	bswap	edx						; edx = [..][R3][G3][B3]
	
	add	eax,ebx						; eax = [R1][G1][B1][R2] ok
	mov	ebx,[esi+12]					; ebx = [B4][G4][R4][..]

	mov	[edi],eax
	mov	eax,edx						; eax = [..][R3][G3][B3]

	bswap	ebx						; ebx = [..][R4][G4][B4]
	shl	edx,16						; edx = [..][..][R3][G3]

	add	esi,byte 16					; update source index
	shr	eax,16						; eax = [B3][..][..][..]

	add	ecx,edx						; ecx = [G2][B2][R3][G3] ok
	mov	[edi+4],ecx

	add	eax,ebx						; eax =	[B3][R4][G4][B4]
	mov	[edi+8],eax

	add	edi,byte 12					; update destination index
	dec	ebp

	jnz	.inner

	mov	ecx,[xres]
	and	ecx,byte 11b
	jz	.nextloop

.finishline:
	mov	al,[esi]
	mov	bl,[esi+1]
	mov	dl,[esi+2]
	add	esi,byte 4

	mov	[edi],dl
	mov	[edi+1],bl	
	mov	[edi+2],al
	add	edi,byte 3
		
	loop	.finishline

.nextloop:
	add	edi,[_deltaline]
	dec	dword [esp]
	jnz	.outer
	
	add	esp,4		
	jmp	vesaupdate

[GLOBAL _flip16bgr]
_flip16bgr:
	push	dword [yres]
.outer:	
	mov	ebp,[xres]
	shr	ebp,1
	
.inner:
	mov	eax,[esi]

        shr	ah,2
        mov	ebx,[esi+4]

        shr	eax,3
        mov	edx,ebx

        shr	bh,2
        mov	dl,[esi+2]

        shl	ebx,13
        and	eax,0x7FF
        
        shl	edx,8
        and	ebx,0x7FF0000

        and	edx,0xF800F800
        add	eax,ebx

	add	esi,byte 8
        add 	eax,edx
	
	mov	[edi],eax
	add	edi,4

	dec	ebp
	jnz	.inner

	mov	ecx,[xres]
	and	ecx,byte 11b
	jz	.nextloop

.finishline:
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,byte 4
	add edi,byte 2
		
.nextloop:
	add	edi,[_deltaline]
	dec	dword [esp]
	jnz	.outer
	
	add	esp,4		
	jmp	vesaupdate
		
[GLOBAL _flip16rgb]
_flip16rgb:
	push	dword [yres]
.outer:	
	mov	ebp,[xres]
	shr	ebp,1
	
.inner:

[GLOBAL _flip15bgr]
_flip15bgr:
	popad
	ret

[GLOBAL _flip15rgb]
_flip15rgb:
	popad
	ret

[GLOBAL _flipvga]
_flipvga:
ycount	equ	0
ypos	equ	4
pitch	equ	8
base	equ	12

.wait1:
.wait2:
;l1:
;    in al,dx
;    and al,08h
;    jnz l1
;l2:
;    in al,dx
;    and al,08h
;    jz  l2
;end;


	mov	edx,esi
	shl	edx,8			; edx = xpos = start address
	push	edx			; start address in 24.8
	
	mov	eax,[xres]
	shl	eax,10			; xres*256*4 because of 24.8 format and 32bit bgr
	push	eax			; pitch

	mov	ebp,[_deltax]

	xor	ebx,ebx
	push	ebx			; ypos
	push	dword 200		; ycount
	
.outer:
	xor	eax,eax

	mov	ecx,320			; xcount
	add	edi,320
	neg	ecx
.inner:
	mov	al,[esi]		; B
	mov	bl,[esi+1]		; G
	add	edx,ebp			; xpos+=deltax
	add	eax,ebx			; B+G
	mov	bl,[esi+2]		; R
	mov	esi,edx
	add	eax,ebx			; B+G+R
	shr	esi,8			; esi = (int)xpos
	shr	eax,2
;	shl	esi,2
;	add	esi,4
	mov	[edi+ecx],al
	inc	ecx
	jnz	.inner

	mov	eax,[esp+ypos]		
	add	eax,[_deltay]		; ypos+=deltay
	mov	[esp+ypos],eax

	shr	eax,8			; integer part of ypos
	mov	edx,[esp+pitch]
	mul	edx			; (int)ypos * pitch
	add	eax,[esp+base]		; 
	mov	edx,eax

	mov	esi,eax			; update source index
	shr	esi,8
	
	dec	dword [esp+ycount]
	jnz	.outer

	add	esp,16
	popad
	ret
	
[SECTION .data]

[GLOBAL _vesa_detected]
_vesa_detected			:	dd 0

[GLOBAL _framecounter]
_framecounter			:	dd 0

[SECTION .bss]

[GLOBAL _flipper]
_flipper			:	resd 1

[GLOBAL _modelist]
_modelist			:	resb max_number_of_modes*mode_size

[GLOBAL _screenaddress]
_screenaddress			:	resd 1

[GLOBAL _address1]
_address1			:	resd 1

[GLOBAL _address2]
_address2			:	resd 1

[GLOBAL _pos1]
_pos1				:	resd 1

[GLOBAL _pos2]
_pos2				:	resd 1

[GLOBAL _deltaline]
_deltaline			:	resd 1

xres				:	resd 1

yres				:	resd 1

[GLOBAL _deltax]
_deltax				:	resd 1

[GLOBAL _deltay]
_deltay				:	resd 1

realmoderegs			:	resb dpmi_realmoderegister
