Browse Source

First successful experiment with vertex interpolation using GTE intpl instructions.

master
Nico de Poel 3 years ago
parent
commit
cdc205f59a
  1. 31
      draw.h

31
draw.h

@ -350,14 +350,15 @@ typedef struct _TMPVERT
blit16(&(dst)->u, &(pv)->u);
#define lerpVert(dst, src0, src1) \
(dst)->vx = (int16_t)(((int32_t)(src0)->vx + (int32_t)(src1)->vx) >> 1); \
(dst)->vy = (int16_t)(((int32_t)(src0)->vy + (int32_t)(src1)->vy) >> 1); \
(dst)->vz = (int16_t)(((int32_t)(src0)->vz + (int32_t)(src1)->vz) >> 1); \
(dst)->r = (uint8_t)(((uint16_t)(src0)->r + (uint16_t)(src1)->r) >> 1); \
(dst)->g = (uint8_t)(((uint16_t)(src0)->g + (uint16_t)(src1)->g) >> 1); \
(dst)->b = (uint8_t)(((uint16_t)(src0)->b + (uint16_t)(src1)->b) >> 1); \
(dst)->u = (uint8_t)(((uint16_t)(src0)->u + (uint16_t)(src1)->u) >> 1); \
(dst)->v = (uint8_t)(((uint16_t)(src0)->v + (uint16_t)(src1)->v) >> 1);
gte_ld_intpol_sv1(&(src0).vx); \
gte_ld_intpol_sv0(&(src1).vx); \
gte_intpl(); \
gte_stsv(&(dst).vx); \
(dst).r = (uint8_t)(((uint16_t)(src0).r + (uint16_t)(src1).r) >> 1); \
(dst).g = (uint8_t)(((uint16_t)(src0).g + (uint16_t)(src1).g) >> 1); \
(dst).b = (uint8_t)(((uint16_t)(src0).b + (uint16_t)(src1).b) >> 1); \
(dst).u = (uint8_t)(((uint16_t)(src0).u + (uint16_t)(src1).u) >> 1); \
(dst).v = (uint8_t)(((uint16_t)(src0).v + (uint16_t)(src1).v) >> 1);
#define blitVert(dst, i, src) \
blit32(&(dst)->x ## i, &(src).vx); \
@ -378,6 +379,7 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p
i3 = head++;
TMPVERT *tmp = (TMPVERT*)(scratchpad);
const short half = ONE >> 1;
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
// NOTE: testing has shown that the PS1 is faster just rendering quads and accepting the odd collapsed quad, rather than being clever with pointer comparisons and drawing a single triangle at the end.
@ -404,12 +406,15 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p
copyVertFast(&tmp[6], pv2, v2);
copyVertFast(&tmp[8], pv3, v3);
// Interpolation factor will always be 0.5 so we need to set this register only once
gte_lddp(half);
// TODO Optimization: start loading vertices into GTE as soon as we're done with them
lerpVert(&tmp[1], &tmp[0], &tmp[2]);
lerpVert(&tmp[5], &tmp[2], &tmp[8]); // After this, 0 1 2 are ready for GTE
lerpVert(&tmp[3], &tmp[0], &tmp[6]);
lerpVert(&tmp[4], &tmp[3], &tmp[5]); // After this, 3 4 5 are ready for GTE
lerpVert(&tmp[7], &tmp[6], &tmp[8]);
lerpVert(tmp[1], tmp[0], tmp[2]);
lerpVert(tmp[5], tmp[2], tmp[8]); // After this, 0 1 2 are ready for GTE
lerpVert(tmp[3], tmp[0], tmp[6]);
lerpVert(tmp[4], tmp[3], tmp[5]); // After this, 3 4 5 are ready for GTE
lerpVert(tmp[7], tmp[6], tmp[8]);
// Transform the vertices in groups of three
gte_ldv3(&tmp[0], &tmp[1], &tmp[2]);

Loading…
Cancel
Save