From cdc205f59afa557e4d7f1fbf5f246106589bd348 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Fri, 10 Feb 2023 12:25:13 +0100 Subject: [PATCH] First successful experiment with vertex interpolation using GTE intpl instructions. --- draw.h | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/draw.h b/draw.h index adb2c2a..03a2582 100644 --- a/draw.h +++ b/draw.h @@ -350,14 +350,15 @@ typedef struct _TMPVERT blit16(&(dst)->u, &(pv)->u); #define lerpVert(dst, src0, src1) \ - (dst)->vx = (int16_t)(((int32_t)(src0)->vx + (int32_t)(src1)->vx) >> 1); \ - (dst)->vy = (int16_t)(((int32_t)(src0)->vy + (int32_t)(src1)->vy) >> 1); \ - (dst)->vz = (int16_t)(((int32_t)(src0)->vz + (int32_t)(src1)->vz) >> 1); \ - (dst)->r = (uint8_t)(((uint16_t)(src0)->r + (uint16_t)(src1)->r) >> 1); \ - (dst)->g = (uint8_t)(((uint16_t)(src0)->g + (uint16_t)(src1)->g) >> 1); \ - (dst)->b = (uint8_t)(((uint16_t)(src0)->b + (uint16_t)(src1)->b) >> 1); \ - (dst)->u = (uint8_t)(((uint16_t)(src0)->u + (uint16_t)(src1)->u) >> 1); \ - (dst)->v = (uint8_t)(((uint16_t)(src0)->v + (uint16_t)(src1)->v) >> 1); + gte_ld_intpol_sv1(&(src0).vx); \ + gte_ld_intpol_sv0(&(src1).vx); \ + gte_intpl(); \ + gte_stsv(&(dst).vx); \ + (dst).r = (uint8_t)(((uint16_t)(src0).r + (uint16_t)(src1).r) >> 1); \ + (dst).g = (uint8_t)(((uint16_t)(src0).g + (uint16_t)(src1).g) >> 1); \ + (dst).b = (uint8_t)(((uint16_t)(src0).b + (uint16_t)(src1).b) >> 1); \ + (dst).u = (uint8_t)(((uint16_t)(src0).u + (uint16_t)(src1).u) >> 1); \ + (dst).v = (uint8_t)(((uint16_t)(src0).v + (uint16_t)(src1).v) >> 1); #define blitVert(dst, i, src) \ blit32(&(dst)->x ## i, &(src).vx); \ @@ -378,6 +379,7 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p i3 = head++; TMPVERT *tmp = (TMPVERT*)(scratchpad); + const short half = ONE >> 1; // Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad // NOTE: testing has shown that the PS1 is faster just rendering quads and accepting the odd collapsed quad, rather than being clever with pointer comparisons and drawing a single triangle at the end. @@ -404,12 +406,15 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p copyVertFast(&tmp[6], pv2, v2); copyVertFast(&tmp[8], pv3, v3); + // Interpolation factor will always be 0.5 so we need to set this register only once + gte_lddp(half); + // TODO Optimization: start loading vertices into GTE as soon as we're done with them - lerpVert(&tmp[1], &tmp[0], &tmp[2]); - lerpVert(&tmp[5], &tmp[2], &tmp[8]); // After this, 0 1 2 are ready for GTE - lerpVert(&tmp[3], &tmp[0], &tmp[6]); - lerpVert(&tmp[4], &tmp[3], &tmp[5]); // After this, 3 4 5 are ready for GTE - lerpVert(&tmp[7], &tmp[6], &tmp[8]); + lerpVert(tmp[1], tmp[0], tmp[2]); + lerpVert(tmp[5], tmp[2], tmp[8]); // After this, 0 1 2 are ready for GTE + lerpVert(tmp[3], tmp[0], tmp[6]); + lerpVert(tmp[4], tmp[3], tmp[5]); // After this, 3 4 5 are ready for GTE + lerpVert(tmp[7], tmp[6], tmp[8]); // Transform the vertices in groups of three gte_ldv3(&tmp[0], &tmp[1], &tmp[2]);