|
|
|
@ -27,6 +27,19 @@ |
|
|
|
: "r"( r0 ) \ |
|
|
|
: "$12", "$13", "$14" ) |
|
|
|
|
|
|
|
#define gte_stcuv( r0 ) __asm__ volatile ( \ |
|
|
|
"mfc2 $12, $9;" \ |
|
|
|
"mfc2 $13, $10;" \ |
|
|
|
"mfc2 $14, $11;" \ |
|
|
|
"sb $12, 0( %0 );" \ |
|
|
|
"sb $13, 1( %0 );" \ |
|
|
|
"sb $14, 2( %0 );" \ |
|
|
|
"sb $14, 3( %0 );" \ |
|
|
|
"sb $14, 4( %0 );" \ |
|
|
|
: \ |
|
|
|
: "r"( r0 ) \ |
|
|
|
: "$12", "$13", "$14", "memory" ) |
|
|
|
|
|
|
|
// Macros for quickly blitting RGB and UV values with a single copy |
|
|
|
// This is faster than copying each value individually |
|
|
|
#define setColorFast(pr, r) *((u_int*)(pr)) = *((u_int*)(r)) |
|
|
|
@ -360,16 +373,16 @@ static INLINE void draw_quadstrip_textured(const ps1bsp_vertex_t *vertices, cons |
|
|
|
typedef struct _TMPVERT |
|
|
|
{ |
|
|
|
int16_t vx, vy, vz, vpad; |
|
|
|
uint8_t r, g, b, cpad; |
|
|
|
uint8_t u, v; |
|
|
|
uint8_t r, g, b, cpad; |
|
|
|
uint16_t pad; |
|
|
|
} TMPVERT; |
|
|
|
|
|
|
|
#define copyVertFast(dst, pv, v) \ |
|
|
|
blit32(&(dst)->r, &(pv)->r); \ |
|
|
|
blit32(&(dst)->vx, &(v)->vx); \ |
|
|
|
blit32(&(dst)->vz, &(v)->vz); \ |
|
|
|
blit16(&(dst)->u, &(pv)->u); |
|
|
|
blit32(&(dst).r, &(pv)->r); \ |
|
|
|
blit32(&(dst).vx, &(v)->vx); \ |
|
|
|
blit32(&(dst).vz, &(v)->vz); \ |
|
|
|
blit16(&(dst).u, &(pv)->u); |
|
|
|
|
|
|
|
#define lerpVert(dst, src0, src1) \ |
|
|
|
gte_ld_intpol_sv1(&(src0).vx); \ |
|
|
|
@ -404,8 +417,10 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p |
|
|
|
i3 = head++; |
|
|
|
|
|
|
|
TMPVERT *tmp = (TMPVERT*)(scratchpad); |
|
|
|
const short half = ONE >> 1; |
|
|
|
|
|
|
|
// Interpolation factor will always be 0.5 so we need to set this register only once |
|
|
|
gte_lddp(ONE >> 1); |
|
|
|
|
|
|
|
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad |
|
|
|
// NOTE: testing has shown that the PS1 is faster just rendering quads and accepting the odd collapsed quad, rather than being clever with pointer comparisons and drawing a single triangle at the end. |
|
|
|
u_char numQuads = (numVerts - 1) >> 1; |
|
|
|
@ -426,13 +441,10 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p |
|
|
|
v2 = &vertices[pv2->index]; |
|
|
|
v3 = &vertices[pv3->index]; |
|
|
|
|
|
|
|
copyVertFast(&tmp[0], pv0, v0); |
|
|
|
copyVertFast(&tmp[2], pv1, v1); |
|
|
|
copyVertFast(&tmp[6], pv2, v2); |
|
|
|
copyVertFast(&tmp[8], pv3, v3); |
|
|
|
|
|
|
|
// Interpolation factor will always be 0.5 so we need to set this register only once |
|
|
|
gte_lddp(half); |
|
|
|
copyVertFast(tmp[0], pv0, v0); |
|
|
|
copyVertFast(tmp[2], pv1, v1); |
|
|
|
copyVertFast(tmp[6], pv2, v2); |
|
|
|
copyVertFast(tmp[8], pv3, v3); |
|
|
|
|
|
|
|
// TODO Optimization: start loading vertices into GTE as soon as we're done with them |
|
|
|
lerpVert(tmp[1], tmp[0], tmp[2]); |
|
|
|
|