Reordered some vertex code to allow for optimized GTE load/stores

3 years ago · e0e43d6e39
1 changed files with 42 additions and 38 deletions
--- a/draw.h
+++ b/draw.h
@ -368,6 +368,7 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p
 {
    const ps1bsp_polyvertex_t *pv0, *pv1, *pv2, *pv3;
    const ps1bsp_vertex_t *v0, *v1, *v2, *v3;
+    POLY_GT4 *p0, *p1, *p2, *p3;
    u_char i0, i1, i2, i3;
    u_char head = 0;
    u_char tail = numVerts;
@ -403,11 +404,12 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p
        copyVertFast(&tmp[6], pv2, v2);
        copyVertFast(&tmp[8], pv3, v3);

+        // TODO Optimization: start loading vertices into GTE as soon as we're done with them
        lerpVert(&tmp[1], &tmp[0], &tmp[2]);
+        lerpVert(&tmp[5], &tmp[2], &tmp[8]);    // After this, 0 1 2 are ready for GTE
        lerpVert(&tmp[3], &tmp[0], &tmp[6]);
-        lerpVert(&tmp[5], &tmp[2], &tmp[8]);
+        lerpVert(&tmp[4], &tmp[3], &tmp[5]);    // After this, 3 4 5 are ready for GTE
        lerpVert(&tmp[7], &tmp[6], &tmp[8]);
-        lerpVert(&tmp[4], &tmp[3], &tmp[5]);

        // Transform the vertices in groups of three
        gte_ldv3(&tmp[0], &tmp[1], &tmp[2]);
@ -428,49 +430,51 @@ static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const p
        gte_stsxy1(&tmp[7].vx);
        gte_stsxy2(&tmp[8].vx);

+        // TODO Optimization: interperse quad building instructions while waiting on GTE store/load delays
+
        // Draw the first quad
-        POLY_GT4 *poly = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
-        blitVert(poly, 0, tmp[0]);
-        poly->clut = quake_clut;
-        blitVert(poly, 1, tmp[3]);
-        poly->tpage = tpage;
-        blitVert(poly, 2, tmp[1]);
-        blitVert(poly, 3, tmp[4]);
-        setPolyGT4(poly);
-        addPrim(ot, poly);
+        p0 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
+        blitVert(p0, 0, tmp[0]);
+        p0->clut = quake_clut;
+        blitVert(p0, 1, tmp[3]);
+        p0->tpage = tpage;
+        blitVert(p0, 2, tmp[1]);
+        blitVert(p0, 3, tmp[4]);
+        setPolyGT4(p0);
+        addPrim(ot, p0);

        // Second quad
-        poly = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
-        blitVert(poly, 0, tmp[1]);
-        poly->clut = quake_clut;
-        blitVert(poly, 1, tmp[4]);
-        poly->tpage = tpage;
-        blitVert(poly, 2, tmp[2]);
-        blitVert(poly, 3, tmp[5]);
-        setPolyGT4(poly);
-        addPrim(ot, poly);
+        p1 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
+        blitVert(p1, 0, tmp[1]);
+        p1->clut = quake_clut;
+        blitVert(p1, 1, tmp[4]);
+        p1->tpage = tpage;
+        blitVert(p1, 2, tmp[2]);
+        blitVert(p1, 3, tmp[5]);
+        setPolyGT4(p1);
+        addPrim(ot, p1);

        // Third quad
-        poly = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
-        blitVert(poly, 0, tmp[3]);
-        poly->clut = quake_clut;
-        blitVert(poly, 1, tmp[6]);
-        poly->tpage = tpage;
-        blitVert(poly, 2, tmp[4]);
-        blitVert(poly, 3, tmp[7]);
-        setPolyGT4(poly);
-        addPrim(ot, poly);
+        p2 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
+        blitVert(p2, 0, tmp[3]);
+        p2->clut = quake_clut;
+        blitVert(p2, 1, tmp[6]);
+        p2->tpage = tpage;
+        blitVert(p2, 2, tmp[4]);
+        blitVert(p2, 3, tmp[7]);
+        setPolyGT4(p2);
+        addPrim(ot, p2);

        // Fourth quad
-        poly = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
-        blitVert(poly, 0, tmp[4]);
-        poly->clut = quake_clut;
-        blitVert(poly, 1, tmp[7]);
-        poly->tpage = tpage;
-        blitVert(poly, 2, tmp[5]);
-        blitVert(poly, 3, tmp[8]);
-        setPolyGT4(poly);
-        addPrim(ot, poly);
+        p3 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
+        blitVert(p3, 0, tmp[4]);
+        p3->clut = quake_clut;
+        blitVert(p3, 1, tmp[7]);
+        p3->tpage = tpage;
+        blitVert(p3, 2, tmp[5]);
+        blitVert(p3, 3, tmp[8]);
+        setPolyGT4(p3);
+        addPrim(ot, p3);

        polyCount += 4;
    }