Browse Source

Unrolled quad strip drawing loop to up to three quads at a time, sharing transformed vertices between them.

Not actually a performance improvement and I'm not sure why. Sidelining this for now.
unrollquadloop
Nico de Poel 3 years ago
parent
commit
2344e8ba42
  1. 167
      draw.h

167
draw.h

@ -209,66 +209,153 @@ static INLINE void draw_triangle_textured(STVECTOR *verts, u_short tpage, u_long
static INLINE void draw_quadstrip_textured(const ps1bsp_vertex_t *vertices, const ps1bsp_polyvertex_t *polyVerts, u_char numVerts, u_short tpage, u_long *ot)
{
const ps1bsp_polyvertex_t *v0, *v1, *v2, *v3;
u_char i0, i1, i2, i3;
POLY_GT4 *p0, *p1, *p2;
const ps1bsp_polyvertex_t *v0, *v1, *v2;
u_char head = 0;
u_char tail = numVerts;
// Initialize the first two vertices
i2 = --tail;
i3 = head++;
// Initialize the first vertices
v0 = &polyVerts[head++];
v1 = &polyVerts[--tail];
v2 = &polyVerts[head++];
// Transform the first three vertices
gte_ldv3(&vertices[v0->index], &vertices[v1->index], &vertices[v2->index]);
gte_rtpt();
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
// NOTE: testing has shown that the PS1 is faster just rendering quads and accepting the odd collapsed quad, rather than being clever with pointer comparisons and drawing a single triangle at the end.
u_char numQuads = (numVerts - 1) >> 1;
for (u_char quadIdx = 0; quadIdx < numQuads; ++quadIdx)
u_char quadIdx = 0;
for (;;)
{
i0 = i2;
i1 = i3;
i2 = --tail;
i3 = head++;
v0 = &polyVerts[i0];
v1 = &polyVerts[i1];
v2 = &polyVerts[i2];
v3 = &polyVerts[i3];
// Transform the first three vertices
gte_ldv3(&vertices[v0->index], &vertices[v1->index], &vertices[v2->index]);
gte_rtpt(); // Rotation, translation, perspective projection
if (quadIdx++ == numQuads)
return;
// Draw a gouraud shaded textured quad
POLY_GT4 *poly = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
p0 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
// Fill out the quad's data fields in struct order, to optimize data access
// First vertex and texture CLUT
setColorFast(&poly->r0, &v0->r);
setUVFast(&poly->u0, &v0->u);
gte_stsxy0(&poly->x0);
poly->clut = quake_clut;
setColorFast(&p0->r0, &v0->r);
setUVFast(&p0->u0, &v0->u);
gte_stsxy0(&p0->x0);
p0->clut = quake_clut;
// Second vertex and texture page
setColorFast(&poly->r1, &v1->r);
gte_stsxy1(&poly->x1);
setUVFast(&poly->u1, &v1->u);
poly->tpage = tpage;
setColorFast(&p0->r1, &v1->r);
gte_stsxy1(&p0->x1);
setUVFast(&p0->u1, &v1->u);
p0->tpage = tpage;
// Third vertex
setColorFast(&poly->r2, &v2->r);
gte_stsxy2(&poly->x2);
setUVFast(&poly->u2, &v2->u);
setColorFast(&p0->r2, &v2->r);
gte_stsxy2(&p0->x2);
setUVFast(&p0->u2, &v2->u);
// Transform the fourth vertex to complete the quad
gte_ldv0(&vertices[v3->index]);
gte_rtps();
if (quadIdx++ == numQuads)
{
v0 = &polyVerts[--tail];
// Transform the fourth vertex to complete the quad
gte_ldv0(&vertices[v0->index]);
gte_rtps();
// Fourth vertex
setColorFast(&p0->r3, &v0->r);
setUVFast(&p0->u3, &v0->u);
gte_stsxy(&p0->x3);
// Finalize the quad
setPolyGT4(p0);
addPrim(ot, p0);
++polyCount;
return;
}
// Start the second quad, sharing two vertices with the first quad
p1 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
setColorFast(&p1->r0, &v2->r);
gte_stsxy2(&p1->x0);
setUVFast(&p1->u0, &v2->u);
p1->clut = quake_clut;
// Transform the next three vertices
v0 = &polyVerts[--tail];
v1 = &polyVerts[head++];
v2 = &polyVerts[--tail];
gte_ldv3(&vertices[v0->index], &vertices[v1->index], &vertices[v2->index]);
gte_rtpt();
// Fourth vertex of the first quad
setColorFast(&p0->r3, &v0->r);
setUVFast(&p0->u3, &v0->u);
gte_stsxy0(&p0->x3);
// Finalize the first quad
setPolyGT4(p0);
addPrim(ot, p0);
++polyCount;
// Second vertex of the second quad
setColorFast(&p1->r1, &v0->r);
setUVFast(&p1->u1, &v0->u);
gte_stsxy0(&p1->x1);
p1->tpage = tpage;
// Third vertex
setColorFast(&p1->r2, &v1->r);
gte_stsxy1(&p1->x2);
setUVFast(&p1->u2, &v1->u);
// Fourth vertex
setColorFast(&poly->r3, &v3->r);
setUVFast(&poly->u3, &v3->u);
gte_stsxy(&poly->x3);
setColorFast(&p1->r3, &v2->r);
gte_stsxy2(&p1->x3);
setUVFast(&p1->u3, &v2->u);
setPolyGT4(poly);
addPrim(ot, poly);
// Finalize the second quad
setPolyGT4(p1);
addPrim(ot, p1);
++polyCount;
if (quadIdx++ == numQuads)
return;
// Start the third quad, sharing two vertices with the second quad and two with the next quad
p2 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
setColorFast(&p2->r0, &v1->r);
gte_stsxy1(&p2->x0);
setUVFast(&p2->u0, &v1->u);
p2->clut = quake_clut;
// Second vertex and texture page
setColorFast(&p2->r1, &v2->r);
setUVFast(&p2->u1, &v2->u);
gte_stsxy2(&p2->x1);
p2->tpage = tpage;
// Transform the next three vertices
v0 = &polyVerts[head++];
v1 = &polyVerts[--tail];
v2 = &polyVerts[head++];
gte_ldv3(&vertices[v0->index], &vertices[v1->index], &vertices[v2->index]);
gte_rtpt();
// Third vertex
setColorFast(&p2->r2, &v0->r);
gte_stsxy0(&p2->x2);
setUVFast(&p2->u2, &v0->u);
// Fourth vertex
setColorFast(&p2->r3, &v1->r);
gte_stsxy1(&p2->x3);
setUVFast(&p2->u3, &v1->u);
// Finalize the second quad
setPolyGT4(p2);
addPrim(ot, p2);
++polyCount;
}
}

Loading…
Cancel
Save