From e0e43d6e39ee12b51d47e67c1db3d00e37971bf1 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Thu, 9 Feb 2023 14:01:57 +0100 Subject: [PATCH] Reordered some vertex code to allow for optimized GTE load/stores --- draw.h | 80 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/draw.h b/draw.h index 559afe1..adb2c2a 100644 --- a/draw.h +++ b/draw.h @@ -368,6 +368,7 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p { const ps1bsp_polyvertex_t *pv0, *pv1, *pv2, *pv3; const ps1bsp_vertex_t *v0, *v1, *v2, *v3; + POLY_GT4 *p0, *p1, *p2, *p3; u_char i0, i1, i2, i3; u_char head = 0; u_char tail = numVerts; @@ -403,11 +404,12 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p copyVertFast(&tmp[6], pv2, v2); copyVertFast(&tmp[8], pv3, v3); + // TODO Optimization: start loading vertices into GTE as soon as we're done with them lerpVert(&tmp[1], &tmp[0], &tmp[2]); + lerpVert(&tmp[5], &tmp[2], &tmp[8]); // After this, 0 1 2 are ready for GTE lerpVert(&tmp[3], &tmp[0], &tmp[6]); - lerpVert(&tmp[5], &tmp[2], &tmp[8]); + lerpVert(&tmp[4], &tmp[3], &tmp[5]); // After this, 3 4 5 are ready for GTE lerpVert(&tmp[7], &tmp[6], &tmp[8]); - lerpVert(&tmp[4], &tmp[3], &tmp[5]); // Transform the vertices in groups of three gte_ldv3(&tmp[0], &tmp[1], &tmp[2]); @@ -428,49 +430,51 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p gte_stsxy1(&tmp[7].vx); gte_stsxy2(&tmp[8].vx); + // TODO Optimization: interperse quad building instructions while waiting on GTE store/load delays + // Draw the first quad - POLY_GT4 *poly = (POLY_GT4*)mem_prim(sizeof(POLY_GT4)); - blitVert(poly, 0, tmp[0]); - poly->clut = quake_clut; - blitVert(poly, 1, tmp[3]); - poly->tpage = tpage; - blitVert(poly, 2, tmp[1]); - blitVert(poly, 3, tmp[4]); - setPolyGT4(poly); - addPrim(ot, poly); + p0 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4)); + blitVert(p0, 0, tmp[0]); + p0->clut = quake_clut; + blitVert(p0, 1, tmp[3]); + p0->tpage = tpage; + blitVert(p0, 2, tmp[1]); + blitVert(p0, 3, tmp[4]); + setPolyGT4(p0); + addPrim(ot, p0); // Second quad - poly = (POLY_GT4*)mem_prim(sizeof(POLY_GT4)); - blitVert(poly, 0, tmp[1]); - poly->clut = quake_clut; - blitVert(poly, 1, tmp[4]); - poly->tpage = tpage; - blitVert(poly, 2, tmp[2]); - blitVert(poly, 3, tmp[5]); - setPolyGT4(poly); - addPrim(ot, poly); + p1 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4)); + blitVert(p1, 0, tmp[1]); + p1->clut = quake_clut; + blitVert(p1, 1, tmp[4]); + p1->tpage = tpage; + blitVert(p1, 2, tmp[2]); + blitVert(p1, 3, tmp[5]); + setPolyGT4(p1); + addPrim(ot, p1); // Third quad - poly = (POLY_GT4*)mem_prim(sizeof(POLY_GT4)); - blitVert(poly, 0, tmp[3]); - poly->clut = quake_clut; - blitVert(poly, 1, tmp[6]); - poly->tpage = tpage; - blitVert(poly, 2, tmp[4]); - blitVert(poly, 3, tmp[7]); - setPolyGT4(poly); - addPrim(ot, poly); + p2 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4)); + blitVert(p2, 0, tmp[3]); + p2->clut = quake_clut; + blitVert(p2, 1, tmp[6]); + p2->tpage = tpage; + blitVert(p2, 2, tmp[4]); + blitVert(p2, 3, tmp[7]); + setPolyGT4(p2); + addPrim(ot, p2); // Fourth quad - poly = (POLY_GT4*)mem_prim(sizeof(POLY_GT4)); - blitVert(poly, 0, tmp[4]); - poly->clut = quake_clut; - blitVert(poly, 1, tmp[7]); - poly->tpage = tpage; - blitVert(poly, 2, tmp[5]); - blitVert(poly, 3, tmp[8]); - setPolyGT4(poly); - addPrim(ot, poly); + p3 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4)); + blitVert(p3, 0, tmp[4]); + p3->clut = quake_clut; + blitVert(p3, 1, tmp[7]); + p3->tpage = tpage; + blitVert(p3, 2, tmp[5]); + blitVert(p3, 3, tmp[8]); + setPolyGT4(p3); + addPrim(ot, p3); polyCount += 4; }