Browse Source

Some rearrangement that will hopefully allow single light values to be lerped right alongside UVs, saving on one lerp per vertex.

master
Nico de Poel 3 years ago
parent
commit
909b32f4c0
  1. 38
      draw.h

38
draw.h

@ -27,6 +27,19 @@
: "r"( r0 ) \
: "$12", "$13", "$14" )
#define gte_stcuv( r0 ) __asm__ volatile ( \
"mfc2 $12, $9;" \
"mfc2 $13, $10;" \
"mfc2 $14, $11;" \
"sb $12, 0( %0 );" \
"sb $13, 1( %0 );" \
"sb $14, 2( %0 );" \
"sb $14, 3( %0 );" \
"sb $14, 4( %0 );" \
: \
: "r"( r0 ) \
: "$12", "$13", "$14", "memory" )
// Macros for quickly blitting RGB and UV values with a single copy
// This is faster than copying each value individually
#define setColorFast(pr, r) *((u_int*)(pr)) = *((u_int*)(r))
@ -360,16 +373,16 @@ static INLINE void draw_quadstrip_textured(const ps1bsp_vertex_t *vertices, cons
typedef struct _TMPVERT
{
int16_t vx, vy, vz, vpad;
uint8_t r, g, b, cpad;
uint8_t u, v;
uint8_t r, g, b, cpad;
uint16_t pad;
} TMPVERT;
#define copyVertFast(dst, pv, v) \
blit32(&(dst)->r, &(pv)->r); \
blit32(&(dst)->vx, &(v)->vx); \
blit32(&(dst)->vz, &(v)->vz); \
blit16(&(dst)->u, &(pv)->u);
blit32(&(dst).r, &(pv)->r); \
blit32(&(dst).vx, &(v)->vx); \
blit32(&(dst).vz, &(v)->vz); \
blit16(&(dst).u, &(pv)->u);
#define lerpVert(dst, src0, src1) \
gte_ld_intpol_sv1(&(src0).vx); \
@ -404,8 +417,10 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p
i3 = head++;
TMPVERT *tmp = (TMPVERT*)(scratchpad);
const short half = ONE >> 1;
// Interpolation factor will always be 0.5 so we need to set this register only once
gte_lddp(ONE >> 1);
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
// NOTE: testing has shown that the PS1 is faster just rendering quads and accepting the odd collapsed quad, rather than being clever with pointer comparisons and drawing a single triangle at the end.
u_char numQuads = (numVerts - 1) >> 1;
@ -426,13 +441,10 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p
v2 = &vertices[pv2->index];
v3 = &vertices[pv3->index];
copyVertFast(&tmp[0], pv0, v0);
copyVertFast(&tmp[2], pv1, v1);
copyVertFast(&tmp[6], pv2, v2);
copyVertFast(&tmp[8], pv3, v3);
// Interpolation factor will always be 0.5 so we need to set this register only once
gte_lddp(half);
copyVertFast(tmp[0], pv0, v0);
copyVertFast(tmp[2], pv1, v1);
copyVertFast(tmp[6], pv2, v2);
copyVertFast(tmp[8], pv3, v3);
// TODO Optimization: start loading vertices into GTE as soon as we're done with them
lerpVert(tmp[1], tmp[0], tmp[2]);

Loading…
Cancel
Save