Browse Source

Found a 10% performance boost by simply reordering data access when filling in polygon data, and by blitting UV and RGB values with a single copy.

unrollquadloop
Nico de Poel 3 years ago
parent
commit
74a36f0938
  1. 45
      draw.h
  2. 4
      ps1bsp.h
  3. BIN
      test.ps1bsp

45
draw.h

@ -112,10 +112,10 @@ static INLINE void draw_quadstrip_lit(const ps1bsp_vertex_t *vertices, const ps1
gte_rtps(); gte_rtps();
gte_stsxy(&poly->x3); gte_stsxy(&poly->x3);
poly->r0 = poly->g0 = poly->b0 = (uint8_t)v0->light;
poly->r1 = poly->g1 = poly->b1 = (uint8_t)v1->light;
poly->r2 = poly->g2 = poly->b2 = (uint8_t)v2->light;
poly->r3 = poly->g3 = poly->b3 = (uint8_t)v3->light;
poly->r0 = poly->g0 = poly->b0 = (uint8_t)v0->r;
poly->r1 = poly->g1 = poly->b1 = (uint8_t)v1->r;
poly->r2 = poly->g2 = poly->b2 = (uint8_t)v2->r;
poly->r3 = poly->g3 = poly->b3 = (uint8_t)v3->r;
addPrim(ot, poly); addPrim(ot, poly);
++polyCount; ++polyCount;
@ -232,25 +232,36 @@ static INLINE void draw_quadstrip_textured(const ps1bsp_vertex_t *vertices, cons
// Draw a gouraud shaded textured quad // Draw a gouraud shaded textured quad
POLY_GT4 *poly = (POLY_GT4*)mem_prim(sizeof(POLY_GT4)); POLY_GT4 *poly = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
setPolyGT4(poly);
gte_stsxy3_gt3(poly);
// Fill out the quad's data fields in struct order, to optimize data access
// RGB and UV fields are blitted as a single value, which is faster than copying each value individually
// First vertex and texture CLUT
*((u_int*)&poly->r0) = *((u_int*)&v0->r);
gte_stsxy0(&poly->x0);
*((u_short*)&poly->u0) = *((u_short*)&v0->u);
poly->clut = quake_clut;
// Second vertex and texture page
*((u_int*)&poly->r1) = *((u_int*)&v1->r);
gte_stsxy1(&poly->x1);
*((u_short*)&poly->u1) = *((u_short*)&v1->u);
poly->tpage = tpage;
// Third vertex
*((u_int*)&poly->r2) = *((u_int*)&v2->r);
gte_stsxy2(&poly->x2);
*((u_short*)&poly->u2) = *((u_short*)&v2->u);
// Transform the fourth vertex to complete the quad // Transform the fourth vertex to complete the quad
gte_ldv0(&vertices[v3->index]); gte_ldv0(&vertices[v3->index]);
gte_rtps(); gte_rtps();
gte_stsxy(&poly->x3);
// Texture UVs
setUV4(poly, v0->u, v0->v, v1->u, v1->v, v2->u, v2->v, v3->u, v3->v);
poly->clut = quake_clut;
poly->tpage = tpage;
// Vertex color lighting
poly->r0 = poly->g0 = poly->b0 = (uint8_t)v0->light;
poly->r1 = poly->g1 = poly->b1 = (uint8_t)v1->light;
poly->r2 = poly->g2 = poly->b2 = (uint8_t)v2->light;
poly->r3 = poly->g3 = poly->b3 = (uint8_t)v3->light;
// Fourth vertex
*((u_int*)&poly->r3) = *((u_int*)&v3->r);
gte_stsxy(&poly->x3);
*((u_short*)&poly->u3) = *((u_short*)&v3->u);
setPolyGT4(poly);
addPrim(ot, poly); addPrim(ot, poly);
++polyCount; ++polyCount;
} }

4
ps1bsp.h

@ -60,8 +60,8 @@ typedef struct
typedef struct typedef struct
{ {
unsigned short index; unsigned short index;
unsigned short light; // Can be made into u_char if we need to store more data; currently u_short for 32-bit alignment purposes
unsigned short u, v; // Can be made into u_char if we need to store more data; currently u_short for 32-bit alignment purposes
unsigned char u, v;
unsigned char r, g, b, pad;
} ps1bsp_polyvertex_t; } ps1bsp_polyvertex_t;
// Faces are broken up into one or more polygons, each of which can be drawn as a quad/triangle strip with a single texture. // Faces are broken up into one or more polygons, each of which can be drawn as a quad/triangle strip with a single texture.

BIN
test.ps1bsp

Loading…
Cancel
Save