diff --git a/draw.h b/draw.h index 35c8674..b678d76 100644 --- a/draw.h +++ b/draw.h @@ -398,6 +398,68 @@ typedef struct _TMPVERT gte_intpl(); \ gte_stbv(&(dst).u); +#define lerpVert2(dst, src0, src1) __asm__ volatile ( \ + "lh $12, 0( %1 );" \ + "lh $13, 2( %1 );" \ + "lh $14, 4( %1 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "mtc2 $14, $11;" \ + "lh $12, 0( %2 );" \ + "lh $13, 2( %2 );" \ + "lh $14, 4( %2 );" \ + "ctc2 $12, $21;" \ + "ctc2 $13, $22;" \ + "ctc2 $14, $23;" \ + "nop;" \ + "nop;" \ + "cop2 0x0980011;" \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "mfc2 $14, $11;" \ + "sh $12, 0( %0 );" \ + "sh $13, 2( %0 );" \ + "sh $14, 4( %0 );" \ + "lbu $12, 8( %1 );" \ + "lbu $13, 9( %1 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "lbu $12, 8( %2 );" \ + "lbu $13, 9( %2 );" \ + "ctc2 $12, $21;" \ + "ctc2 $13, $22;" \ + "nop;" \ + "nop;" \ + "cop2 0x0980011;" \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "sb $12, 8( %0 );" \ + "sb $13, 9( %0 );" \ + "lbu $12, 10( %1 );" \ + "lbu $13, 11( %1 );" \ + "lbu $14, 12( %1 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "mtc2 $14, $11;" \ + "lbu $12, 10( %2 );" \ + "lbu $13, 11( %2 );" \ + "lbu $14, 12( %2 );" \ + "ctc2 $12, $21;" \ + "ctc2 $13, $22;" \ + "ctc2 $14, $23;" \ + "nop;" \ + "nop;" \ + "cop2 0x0980011;" \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "mfc2 $14, $11;" \ + "sb $12, 10( %0 );" \ + "sb $13, 11( %0 );" \ + "sb $14, 12( %0 );" \ + : \ + : "r"(dst), "r"(src0), "r"(src1) \ + : "$12", "$13", "$14", "memory" ) + #define blitVert(dst, i, src) \ blit32(&(dst)->x ## i, &(src).vx); \ blit32(&(dst)->r ## i, &(src).r); \ @@ -447,11 +509,11 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p copyVertFast(tmp[8], pv3, v3); // TODO Optimization: start loading vertices into GTE as soon as we're done with them - lerpVert(tmp[1], tmp[0], tmp[2]); - lerpVert(tmp[5], tmp[2], tmp[8]); // After this, 0 1 2 are ready for GTE - lerpVert(tmp[3], tmp[0], tmp[6]); - lerpVert(tmp[4], tmp[3], tmp[5]); // After this, 3 4 5 are ready for GTE - lerpVert(tmp[7], tmp[6], tmp[8]); + lerpVert2(&tmp[1], &tmp[0], &tmp[2]); + lerpVert2(&tmp[5], &tmp[2], &tmp[8]); // After this, 0 1 2 are ready for GTE + lerpVert2(&tmp[3], &tmp[0], &tmp[6]); + lerpVert2(&tmp[4], &tmp[3], &tmp[5]); // After this, 3 4 5 are ready for GTE + lerpVert2(&tmp[7], &tmp[6], &tmp[8]); // Transform the vertices in groups of three gte_ldv3(&tmp[0], &tmp[1], &tmp[2]);