| 
		
   
| Author | 
Message | 
 
 
	
	Jamie 
		Member		 | 
	
	
	 
		I liked the discussion on 68060 texture mapping in a previous topic, i have some idea that i never tested.
 
 ************************************************** **********
 here is a 7 cycle 2 pixel loop which has 8.8 precision for U and 8.16 for V: 
 ************************************************** **********
 .pixelLoop:
                 rept 4
                 add.l	a0,d0              ;cycle 1 poep 
 	lsr.l	#8,d2             ;cycle 1 soep 
 	add.l	a1,d1              ;cycle 2 poep 
 	move.b	(a2,d2.w),d4    ;cycle 2 soep 
 	move.l	d1,d3              ;cycle 3 poep 
 	move.b	d4,(a3)+         ;cycle 3 soep 
 	move.w	d0,d3              ;cycle 4 poep 
 	add.l	a0,d0              ;cycle 4 soep 
 	lsr.l	#8,d3             ;cycle 5 poep  
 	add.l	a1,d1             ;cycle 5 soep  
 	move.b	(a2,d3.w),d4   ;cycle 6 poep 
 	move.l	d1,d2             ;cycle 6 soep 
 	move.b	d4,(a3)+        ;cycle 7 poep  
 	move.w	d0,d2             ;cycle 7 soep 
                 endr
 
                 subq.l #8,d7
                 bgt.b  .pixelLoop
                 rts
 
 * I like the fact that i don't need to have some ror and eor operation in the outerloop for preparing addx pixel instruction loop, when you have lot of polygon the outerloop is really important too.
 
 * I can have multisize texture mapping with this loop
 
 * I can't have tiled texture
 
 ************************************************** **********
 here is a ? cycle pixel loop which has 8.24 precision for UV  
 ************************************************** **********
 .pixelLoop:add.l	d0,(a0)
 	add.l	d1,(a1)
 	move.w	(a1),d2
 	move.b	(a0),d2
 	move.b	(a2,d2.l),(a3)+
 
 * It's really accurate
 * few register is needed
 * texture is UV tiled
 * V can be multisize
 
 * I'm sure it's slow:)		 
	 | 
	 
 
	
	Kalms 
		Member		 | 
	
	
	 
		If you accept dropping V precision to 8.8 in your first example, you can use:
 
 	add.l	d1,d0			; 1 p
 	move.b	(a0,d2.w),d4		; 1   s
 	move.l	d0,d2			; 2 p
 	lsr.w	#8,d2			; 2   s
 	add.l	d1,d0			; 3 p
 	rol.l	#8,d2			; 3   s
 	move.b	(a0,d3.w),d5		; 4 p
 	move.l	d0,d3			; 4   s
 	lsr.w	#8,d3			; 5 p
 	move.b	d4,(a1)+		; 5   s
 	rol.l	#8,d3			; 6 p
 	move.b	d5,(a1)+		; 6   s
 
 -> 6 cycles for 2 pixels.
 here, VVvv and UUuu are packed into the same register (counter in d0, gradient in d1).
 Same restrictions apply as for your suggested loop, except that you might need to do the [reasonably cheap, but still] UV-packing per line (due to too large U->V overflow errors otherwise).		 
	 | 
	 
 
	
	Jamie 
		Member		 | 
	
	
	 
		add.l	a0,a2
 add.l	a1,a3
 move.l	a2,d0
 move.l	a3,d1
 add.l	a0,a2
 add.l	a1,a3
 move.l	a2,d2
 move.l	a3,d3
 ror.l	#8,d0
 rol.l	#8,d1
 ror.l	#8,d2
 rol.l	#8,d3
 move.b	d0,d1
 move.b	d2,d3
 move.b	(a4,d1.w),(a5)+
 move.b	(a4,d3.w),(a5)+
 
 -> 4,5 but you can have 24 bit precision, multisize and tile		 
	 | 
	 
 
	
	Jamie 
		Member		 | 
	
	
	 
		Finally 4 cycle with 8:16 for u and 8:24 for v
 
 XX U1 P1 P2
 V1 P1 P2 P3
 
 	move.l	d1,d2
 	lsr.l	d5,d3
 	and.l	d4,d2
 	add.l	a1,d1
 	move.b	(a2,d3.l ),d6
 	or.l	d0,d2
 	move.b	d6,(a3)+
 	add.l	a0,d0
 
 	move.l	d1,d3
 	lsr.l	d5,d2
 	and.l	d4,d3
 	add.l	a1,d1
 	move.b	(a2,d2.l ),d6
 	or.l	d0,d3
 	move.b	d6,(a3)+
 	add.l	a0,d0		 
	 | 
	 
 
	
	Jamie 
		Member		 | 
	
	
	 
		maybe an interesting solution for uv 16:16 
 
 d0 = U1 P1 P2 V1
 d1 = P1 P2 xx xx
 
 rol.l #8,d0
 move.b (a0,d0.w),(a1)+
 ror.l #8,d0
 add.l d3,d1
 addx.l  d2,d0		 
	 | 
	 
 
  
	
	
	
	
			
		 | 
		 |