395 lines
		
	
	
		
			9.7 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			395 lines
		
	
	
		
			9.7 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| ; PowerPC optimized drawing methods for Goom
 | |
| ; © 2003 Guillaume Borios
 | |
| ; This library is free software; you can redistribute it and/or
 | |
| ; modify it under the terms of the GNU Library General Public
 | |
| ; License as published by the Free Software Foundation; either
 | |
| ; version 2 of the License, or (at your option) any later version.
 | |
| ;
 | |
| ; This library is distributed in the hope that it will be useful,
 | |
| ; but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
| ; Library General Public License for more details.
 | |
| ;
 | |
| ; You should have received a copy of the GNU Library General Public
 | |
| ; License along with this library; if not, write to the
 | |
| ; Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
 | |
| ; Boston, MA 02110-1301, USA.
 | |
| 
 | |
| ; Change log :
 | |
| ; 30 May 2003 : File creation
 | |
| 
 | |
| ; Section definition : We use a read only code section for the whole file
 | |
| .section __TEXT,__text,regular,pure_instructions
 | |
| 
 | |
| 
 | |
| ; --------------------------------------------------------------------------------------
 | |
| ; Single 32b pixel drawing macros
 | |
| ; Usage :
 | |
| ; 	DRAWMETHOD_XXXX_MACRO *pixelIN, *pixelOUT, COLOR, WR1, WR2, WR3, WR4
 | |
| ;	Only the work registers (WR) can be touched by the macros
 | |
| ;
 | |
| ; Available methods :
 | |
| ;	DRAWMETHOD_DFLT_MACRO : Default drawing method (Actually OVRW)
 | |
| ;	DRAWMETHOD_PLUS_MACRO : RVB Saturated per channel addition (SLOWEST)
 | |
| ;	DRAWMETHOD_HALF_MACRO : 50% Transparency color drawing
 | |
| ;	DRAWMETHOD_OVRW_MACRO : Direct COLOR drawing (FASTEST)
 | |
| ;	DRAWMETHOD_B_OR_MACRO : Bitwise OR
 | |
| ;	DRAWMETHOD_BAND_MACRO : Bitwise AND
 | |
| ;	DRAWMETHOD_BXOR_MACRO : Bitwise XOR
 | |
| ;	DRAWMETHOD_BNOT_MACRO : Bitwise NOT
 | |
| ; --------------------------------------------------------------------------------------
 | |
| 
 | |
| .macro DRAWMETHOD_OVRW_MACRO
 | |
|     stw		$2,0($1)	;; *$1 <- $2
 | |
| .endmacro
 | |
| 
 | |
| .macro DRAWMETHOD_B_OR_MACRO
 | |
|     lwz		$3,0($0)	;; $3 <- *$0
 | |
|     or		$3,$3,$2	;; $3 <- $3 | $2
 | |
|     stw		$3,0($1)	;; *$1 <- $3
 | |
| .endmacro
 | |
| 
 | |
| .macro DRAWMETHOD_BAND_MACRO
 | |
|     lwz		$3,0($0)	;; $3 <- *$0
 | |
|     and		$3,$3,$2	;; $3 <- $3 & $2
 | |
|     stw		$3,0($1)	;; *$1 <- $3
 | |
| .endmacro
 | |
| 
 | |
| .macro DRAWMETHOD_BXOR_MACRO
 | |
|     lwz		$3,0($0)	;; $3 <- *$0
 | |
|     xor		$3,$3,$2	;; $3 <- $3 ^ $2
 | |
|     stw		$3,0($1)	;; *$1 <- $3
 | |
| .endmacro
 | |
| 
 | |
| .macro DRAWMETHOD_BNOT_MACRO
 | |
|     lwz		$3,0($0)	;; $3 <- *$0
 | |
|     nand	$3,$3,$3	;; $3 <- ~$3
 | |
|     stw		$3,0($1)	;; *$1 <- $3
 | |
| .endmacro
 | |
| 
 | |
| .macro DRAWMETHOD_PLUS_MACRO
 | |
|     lwz		$4,0($0)	;; $4 <- *$0
 | |
|     andi.	$3,$4,0xFF00	;; $3 <- $4 & 0x0000FF00
 | |
|     andi.	$5,$2,0xFF00	;; $5 <- $2 & 0x0000FF00
 | |
|     add		$3,$3,$5	;; $3 <- $3 + $5
 | |
|     rlwinm	$5,$3,15,0,0	;; $5 <- 0 | ($3[15] << 15)
 | |
|     srawi	$5,$5,23	;; $5 <- $5 >> 23 (algebraic for sign extension)
 | |
|     or		$3,$3,$5	;; $3 <- $3 | $5
 | |
|     lis		$5,0xFF		;; $5 <- 0x00FF00FF
 | |
|     addi	$5,$5,0xFF
 | |
|     and		$4,$4,$5	;; $4 <- $4 & $5
 | |
|     and		$6,$2,$5	;; $6 <- $2 & $5
 | |
|     add		$4,$4,$6	;; $4 <- $4 + $6
 | |
|     rlwinm	$6,$4,7,0,0	;; $6 <- 0 | ($4[7] << 7)
 | |
|     srawi	$6,$6,15	;; $6 <- $6 >> 15 (algebraic for sign extension)
 | |
|     rlwinm	$5,$4,23,0,0	;; $5 <- 0 | ($4[23] << 23)
 | |
|     srawi	$5,$5,31	;; $5 <- $5 >> 31 (algebraic for sign extension)
 | |
|     rlwimi	$6,$5,0,24,31	;; $6[24..31] <- $5[24..31]
 | |
|     or		$4,$4,$6	;; $4 <- $4 | $6
 | |
|     rlwimi	$4,$3,0,16,23	;; $4[16..23] <- $3[16..23]
 | |
|     stw		$4,0($1)	;; *$1 <- $4
 | |
| .endmacro
 | |
| 
 | |
| .macro	DRAWMETHOD_HALF_MACRO
 | |
|     lwz		$4,0($0)	;; $4 <- *$0
 | |
|     andi.	$3,$4,0xFF00	;; $3 <- $4 & 0x0000FF00
 | |
|     andi.	$5,$2,0xFF00	;; $5 <- $2 & 0x0000FF00
 | |
|     add		$3,$3,$5	;; $3 <- $3 + $5
 | |
|     lis		$5,0xFF		;; $5 <- 0x00FF00FF
 | |
|     addi	$5,$5,0xFF
 | |
|     and		$4,$4,$5	;; $4 <- $4 & $5
 | |
|     and		$5,$2,$5	;; $5 <- $2 & $5
 | |
|     add		$4,$4,$5	;; $4 <- $4 + $5
 | |
|     srwi	$4,$4,1		;; $4 <- $4 >> 1
 | |
|     rlwimi	$4,$3,31,16,23	;; $4[16..23] <- $3[15..22]
 | |
|     stw		$4,0($1)	;; *$1 <- $4
 | |
| .endmacro
 | |
| 
 | |
| .macro DRAWMETHOD_DFLT_MACRO
 | |
|     DRAWMETHOD_PLUS_MACRO
 | |
| .endmacro
 | |
| 
 | |
| ; --------------------------------------------------------------------------------------
 | |
| 
 | |
| 
 | |
| 
 | |
| ; **************************************************************************************
 | |
| ; void DRAWMETHOD_PLUS_PPC(unsigned int * buf, unsigned int _col);
 | |
| ; void DRAWMETHOD_PLUS_2_PPC(unsigned * in, unsigned int * out, unsigned int _col);
 | |
| ; **************************************************************************************
 | |
| .globl _DRAWMETHOD_PLUS_2_PPC
 | |
| .align 3
 | |
| _DRAWMETHOD_PLUS_2_PPC:
 | |
|     DRAWMETHOD_PLUS_MACRO	r3,r4,r5,r6,r7,r8,r9
 | |
|     blr				;; return
 | |
| 
 | |
| .globl _DRAWMETHOD_PLUS_PPC
 | |
| .align 3
 | |
| _DRAWMETHOD_PLUS_PPC:
 | |
|     DRAWMETHOD_PLUS_MACRO	r3,r3,r4,r5,r6,r7,r9
 | |
|     blr				;; return
 | |
| 
 | |
| 
 | |
| ; **************************************************************************************
 | |
| ; void DRAWMETHOD_HALF_PPC(unsigned int * buf, unsigned int _col);
 | |
| ; void DRAWMETHOD_HALF_2_PPC(unsigned * in, unsigned int * out, unsigned int _col);
 | |
| ; **************************************************************************************
 | |
| .globl _DRAWMETHOD_HALF_2_PPC
 | |
| .align 3
 | |
| _DRAWMETHOD_HALF_2_PPC:
 | |
|     DRAWMETHOD_HALF_MACRO	r3,r4,r5,r6,r7,r8
 | |
|     blr				;; return
 | |
| 
 | |
| .globl _DRAWMETHOD_HALF_PPC
 | |
| .align 3
 | |
| _DRAWMETHOD_HALF_PPC:
 | |
|     DRAWMETHOD_HALF_MACRO	r3,r3,r4,r5,r6,r7
 | |
|     blr				;; return
 | |
| 
 | |
| 
 | |
| ; **************************************************************************************
 | |
| ; void DRAW_LINE_PPC(unsigned int *data, int x1, int y1, int x2, int y2, unsigned int col,
 | |
| ; 			unsigned int screenx, unsigned int screeny)
 | |
| ; **************************************************************************************
 | |
| .globl _DRAW_LINE_PPC
 | |
| .align 3
 | |
| _DRAW_LINE_PPC:
 | |
|     ;; NOT IMPLEMENTED YET
 | |
|     blr				;; return
 | |
| 
 | |
| 
 | |
| ; **************************************************************************************
 | |
| ; void _ppc_brightness(Pixel * src, Pixel * dest, unsigned int size, unsigned int coeff)
 | |
| ; **************************************************************************************
 | |
| 
 | |
| 
 | |
| .const
 | |
| .align 4
 | |
| vectorZERO:
 | |
|     .long 0,0,0,0
 | |
|     .long 0x10101000, 0x10101001, 0x10101002, 0x10101003
 | |
|     .long 0x10101004, 0x10101005, 0x10101006, 0x10101007
 | |
|     .long 0x10101008, 0x10101009, 0x1010100A, 0x1010100B
 | |
|     .long 0x1010100C, 0x1010100D, 0x1010100E, 0x1010100F
 | |
| 
 | |
| 
 | |
| .section __TEXT,__text,regular,pure_instructions
 | |
| 
 | |
| .globl _ppc_brightness_G4
 | |
| .align 3
 | |
| _ppc_brightness_G4:
 | |
| 
 | |
| 
 | |
| ;; PowerPC Altivec code
 | |
|     srwi    r5,r5,2
 | |
|     mtctr   r5
 | |
| 
 | |
| ;;vrsave
 | |
|     mfspr   r11,256
 | |
|     lis     r12,0xCFFC
 | |
|     mtspr   256,r12
 | |
| 
 | |
|         mflr r0
 | |
|         bcl 20,31,"L00000000001$pb"
 | |
| "L00000000001$pb":
 | |
|         mflr r10
 | |
|         mtlr r0
 | |
| 
 | |
|     addis   r9,r10,ha16(vectorZERO-"L00000000001$pb")
 | |
|     addi    r9,r9,lo16(vectorZERO-"L00000000001$pb")
 | |
|     
 | |
|     vxor    v0,v0,v0 ;; V0 = NULL vector
 | |
| 
 | |
|     addi    r9,r9,16
 | |
|     lvx     v10,0,r9
 | |
|     addi    r9,r9,16
 | |
|     lvx     v11,0,r9
 | |
|     addi    r9,r9,16
 | |
|     lvx     v12,0,r9
 | |
|     addi    r9,r9,16
 | |
|     lvx     v13,0,r9
 | |
| 
 | |
|     addis   r9,r10,ha16(vectortmpwork-"L00000000001$pb")
 | |
|     addi    r9,r9,lo16(vectortmpwork-"L00000000001$pb")
 | |
|     stw     r6,0(r9)
 | |
|     li      r6,8
 | |
|     stw     r6,4(r9)
 | |
|     lvx     v9,0,r9
 | |
|     li      r9,128
 | |
|     vspltw  v8,v9,0
 | |
|     vspltw  v9,v9,1
 | |
| 
 | |
| ;; elt counter
 | |
|     li      r9,0
 | |
|     lis     r7,0x0F01
 | |
|     b L7
 | |
| .align 4
 | |
| L7:
 | |
|     lvx     v1,r9,r3
 | |
| 
 | |
|     vperm   v4,v1,v0,v10
 | |
|     ;*********************
 | |
|      add r10,r9,r3
 | |
|     ;*********************
 | |
|     vperm   v5,v1,v0,v11
 | |
|     vperm   v6,v1,v0,v12
 | |
|     vperm   v7,v1,v0,v13
 | |
| 
 | |
|     vmulouh  v4,v4,v8
 | |
|     ;*********************
 | |
|      dst     r10,r7,3
 | |
|     ;*********************
 | |
|     vmulouh  v5,v5,v8
 | |
|     vmulouh  v6,v6,v8
 | |
|     vmulouh  v7,v7,v8
 | |
|     vsrw     v4,v4,v9
 | |
|     vsrw     v5,v5,v9
 | |
|     vsrw     v6,v6,v9
 | |
|     vsrw     v7,v7,v9 
 | |
|     
 | |
|     vpkuwus v4,v4,v5
 | |
|     vpkuwus v6,v6,v7
 | |
|     vpkuhus v1,v4,v6
 | |
| 
 | |
|     stvx    v1,r9,r4
 | |
|     addi    r9,r9,16
 | |
| 
 | |
|     bdnz L7
 | |
| 
 | |
|     mtspr   256,r11
 | |
|     blr
 | |
| 
 | |
| 
 | |
| .globl _ppc_brightness_G5
 | |
| .align 3
 | |
| _ppc_brightness_G5:
 | |
| 
 | |
| ;; PowerPC Altivec G5 code
 | |
|     srwi    r5,r5,2
 | |
|     mtctr   r5
 | |
| 
 | |
| ;;vrsave
 | |
|     mfspr   r11,256
 | |
|     lis     r12,0xCFFC
 | |
|     mtspr   256,r12
 | |
| 
 | |
|         mflr r0
 | |
|         bcl 20,31,"L00000000002$pb"
 | |
| "L00000000002$pb":
 | |
|         mflr r10
 | |
|         mtlr r0
 | |
| 
 | |
|     addis   r9,r10,ha16(vectorZERO-"L00000000002$pb")
 | |
|     addi    r9,r9,lo16(vectorZERO-"L00000000002$pb")
 | |
|     
 | |
|     vxor    v0,v0,v0 ;; V0 = NULL vector
 | |
| 
 | |
|     addi    r9,r9,16
 | |
|     lvx     v10,0,r9
 | |
|     addi    r9,r9,16
 | |
|     lvx     v11,0,r9
 | |
|     addi    r9,r9,16
 | |
|     lvx     v12,0,r9
 | |
|     addi    r9,r9,16
 | |
|     lvx     v13,0,r9
 | |
| 
 | |
|     addis   r9,r10,ha16(vectortmpwork-"L00000000002$pb")
 | |
|     addi    r9,r9,lo16(vectortmpwork-"L00000000002$pb")
 | |
|     stw     r6,0(r9)
 | |
|     li      r6,8
 | |
|     stw     r6,4(r9)
 | |
|     lvx     v9,0,r9
 | |
|     li      r9,128
 | |
|     vspltw  v8,v9,0
 | |
|     vspltw  v9,v9,1
 | |
| 
 | |
| ;; elt counter
 | |
|     li      r9,0
 | |
|     lis     r7,0x0F01
 | |
|     b L6
 | |
| .align 4
 | |
| L6:
 | |
|     lvx     v1,r9,r3
 | |
| 
 | |
|     vperm   v4,v1,v0,v10
 | |
|     ;*********************
 | |
|     add r10,r9,r3
 | |
|     ;*********************
 | |
|     vperm   v5,v1,v0,v11
 | |
|     vperm   v6,v1,v0,v12
 | |
|     vperm   v7,v1,v0,v13
 | |
| 
 | |
|     vmulouh  v4,v4,v8
 | |
|     vmulouh  v5,v5,v8
 | |
|     vmulouh  v6,v6,v8
 | |
|     vmulouh  v7,v7,v8
 | |
|     vsrw     v4,v4,v9
 | |
|     vsrw     v5,v5,v9
 | |
|     vsrw     v6,v6,v9
 | |
|     vsrw     v7,v7,v9 
 | |
|     
 | |
|     vpkuwus v4,v4,v5
 | |
|     vpkuwus v6,v6,v7
 | |
|     vpkuhus v1,v4,v6
 | |
| 
 | |
|     stvx    v1,r9,r4
 | |
|     addi    r9,r9,16
 | |
| 
 | |
|     bdnz L6
 | |
| 
 | |
|     mtspr   256,r11
 | |
|     blr
 | |
| 
 | |
| 
 | |
| .globl _ppc_brightness_generic
 | |
| .align 3
 | |
| _ppc_brightness_generic:
 | |
|     lis   r12,0x00FF
 | |
|     ori   r12,r12,0x00FF
 | |
|     subi  r3,r3,4
 | |
|     subi  r4,r4,4
 | |
|     mtctr r5
 | |
|     b L1
 | |
| .align 4
 | |
| L1:
 | |
|     lwzu  r7,4(r3)
 | |
| 
 | |
|     rlwinm  r8,r7,16,24,31
 | |
|     rlwinm  r9,r7,24,24,31
 | |
|     mullw   r8,r8,r6
 | |
|     rlwinm  r10,r7,0,24,31
 | |
|     mullw   r9,r9,r6
 | |
|     srwi    r8,r8,8
 | |
|     mullw   r10,r10,r6
 | |
|     srwi    r9,r9,8
 | |
| 
 | |
|     rlwinm. r11,r8,0,0,23
 | |
|     beq     L2
 | |
|     li      r8,0xFF
 | |
| L2:
 | |
|     srwi    r10,r10,8
 | |
|     rlwinm. r11,r9,0,0,23
 | |
|     beq     L3
 | |
|     li      r9,0xFF
 | |
| L3:
 | |
|     rlwinm  r7,r8,16,8,15
 | |
|     rlwinm. r11,r10,0,0,23
 | |
|     beq     L4
 | |
|     li      r10,0xFF
 | |
| L4:
 | |
|     rlwimi  r7,r9,8,16,23
 | |
|     rlwimi  r7,r10,0,24,31
 | |
| 
 | |
|     stwu    r7,4(r4)
 | |
|     bdnz L1
 | |
| 
 | |
|     blr
 | |
| 
 | |
| 
 | |
| 
 | |
| .static_data
 | |
| .align 4
 | |
| vectortmpwork:
 | |
|     .long 0,0,0,0
 | |
| 
 |