Browse Source

First pass at leaf-based rendering order:

- Sort leafs first into a linked list, before drawing their faces
- Depth value for faces is determined by the leaf order, OT position is calculated once per leaf
- Removes the need for per-primitive depth calculation & check, so those are removed
- Moved primitive buffer check out of the primitive drawing routines and to the start of each face drawing function
- Triangle drawing routines now only draw a single triangle, no loops involved
- Simplified drawing routines where possible
- Face drawing is still happening in back-to-front order, so this needs another good look at
unrollquadloop
Nico de Poel 3 years ago
parent
commit
a8d3bf567b
  1. 141
      draw.h
  2. 19
      ps1bsp.h
  3. BIN
      test.ps1bsp
  4. 124
      world.c

141
draw.h

@ -5,32 +5,17 @@
#include "display.h"
#include <inline_c.h>
static INLINE void draw_trianglefan_lit(SVECTOR *verts, u_char numVerts)
static INLINE void draw_triangle_lit(SVECTOR *verts, u_long *ot)
{
int p;
if (!mem_checkprim(sizeof(POLY_G3), numVerts - 2))
return;
// Draw the face as a triangle fan
u_char maxVert = numVerts - 1;
for (u_char vertIdx = 1; vertIdx < maxVert; ++vertIdx)
{
// Draw a single triangle
const SVECTOR *v0 = &verts[0];
const SVECTOR *v1 = &verts[vertIdx];
const SVECTOR *v2 = &verts[vertIdx + 1];
const SVECTOR *v1 = &verts[1];
const SVECTOR *v2 = &verts[2];
// Naively draw the triangle with GTE, nothing special or optimized about this
gte_ldv3(v0, v1, v2);
gte_rtpt(); // Rotation, translation, perspective projection
// Average Z for depth sorting and culling
gte_avsz3();
gte_stotz(&p);
short depth = p >> 2;
if (depth <= 0 || depth >= OTLEN)
continue;
// Draw a flat-shaded untextured colored triangle
POLY_G3 *poly = (POLY_G3*)mem_prim(sizeof(POLY_G3));
setPolyG3(poly);
@ -40,19 +25,12 @@ static INLINE void draw_trianglefan_lit(SVECTOR *verts, u_char numVerts)
poly->r1 = poly->g1 = poly->b1 = (uint8_t)v1->pad;
poly->r2 = poly->g2 = poly->b2 = (uint8_t)v2->pad;
addPrim(curOT + depth, poly);
addPrim(ot, poly);
++polyCount;
}
}
static INLINE void draw_trianglestrip_lit(SVECTOR *verts, u_char numVerts)
static INLINE void draw_trianglestrip_lit(SVECTOR *verts, u_char numVerts, u_long *ot)
{
int p;
u_char numTris = numVerts - 2;
if (!mem_checkprim(sizeof(POLY_G3), numTris))
return;
// Draw the face as a triangle strip
const SVECTOR *v0, *v1, *v2;
const SVECTOR *head = verts;
@ -61,6 +39,7 @@ static INLINE void draw_trianglestrip_lit(SVECTOR *verts, u_char numVerts)
v2 = head++; // Initialize first vertex to index 0 and set head to index 1
u_char numTris = numVerts - 2;
for (u_char triIdx = 0; triIdx < numTris; ++triIdx)
{
if (reverse ^= 1)
@ -80,13 +59,6 @@ static INLINE void draw_trianglestrip_lit(SVECTOR *verts, u_char numVerts)
gte_ldv3(v0, v1, v2);
gte_rtpt(); // Rotation, translation, perspective projection
// Average Z for depth sorting and culling
gte_avsz3();
gte_stotz(&p);
short depth = p >> 2;
if (depth <= 0 || depth >= OTLEN)
continue;
// Draw a flat-shaded untextured colored triangle
POLY_G3 *poly = (POLY_G3*)mem_prim(sizeof(POLY_G3));
setPolyG3(poly);
@ -96,19 +68,13 @@ static INLINE void draw_trianglestrip_lit(SVECTOR *verts, u_char numVerts)
poly->r1 = poly->g1 = poly->b1 = (uint8_t)v1->pad;
poly->r2 = poly->g2 = poly->b2 = (uint8_t)v2->pad;
addPrim(curOT + depth, poly);
addPrim(ot, poly);
++polyCount;
}
}
static INLINE void draw_quadstrip_lit(SVECTOR *verts, u_char numVerts)
static INLINE void draw_quadstrip_lit(SVECTOR *verts, u_char numVerts, u_long *ot)
{
int p;
u_char numQuads = (numVerts - 1) >> 1;
if (!mem_checkprim(sizeof(POLY_G4), numQuads))
return;
// Draw the face as a quad strip
const SVECTOR *v0, *v1, *v2, *v3;
const SVECTOR *head = verts;
@ -119,6 +85,7 @@ static INLINE void draw_quadstrip_lit(SVECTOR *verts, u_char numVerts)
v3 = head++;
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
u_char numQuads = (numVerts - 1) >> 1;
for (u_char quadIdx = 0; quadIdx < numQuads; ++quadIdx)
{
v0 = v2;
@ -130,19 +97,10 @@ static INLINE void draw_quadstrip_lit(SVECTOR *verts, u_char numVerts)
gte_ldv3(v0, v1, v2);
gte_rtpt(); // Rotation, translation, perspective projection
// Average Z for depth sorting and culling
gte_avsz3();
gte_stotz(&p);
short depth = p >> 2;
if (depth <= 0 || depth >= OTLEN)
continue;
// Draw a flat-shaded untextured colored quad
POLY_G4 *poly = (POLY_G4*)mem_prim(sizeof(POLY_G4));
setPolyG4(poly);
gte_stsxy0(&poly->x0);
gte_stsxy1(&poly->x1);
gte_stsxy2(&poly->x2);
gte_stsxy3_g3(poly);
// Transform the fourth vertex to complete the quad
gte_ldv0(v3);
@ -154,22 +112,44 @@ static INLINE void draw_quadstrip_lit(SVECTOR *verts, u_char numVerts)
poly->r2 = poly->g2 = poly->b2 = (uint8_t)v2->pad;
poly->r3 = poly->g3 = poly->b3 = (uint8_t)v3->pad;
addPrim(curOT + depth, poly);
addPrim(ot, poly);
++polyCount;
}
}
static INLINE void draw_quadstrip_textured(STVECTOR *verts, u_char numVerts, u_short tpage)
static INLINE void draw_triangle_textured(STVECTOR *verts, u_short tpage, u_long *ot)
{
int p;
// Draw a single textured triangle
const STVECTOR *v0 = &verts[0];
const STVECTOR *v1 = &verts[1];
const STVECTOR *v2 = &verts[2];
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
// NOTE: testing has shown that the PS1 is faster just rendering quads and accepting the odd collapsed quad, rather than being clever with pointer comparisons and drawing a single triangle at the end.
u_char numQuads = (numVerts - 1) >> 1;
if (!mem_checkprim(sizeof(POLY_GT4), numQuads))
return;
// Naively draw the triangle with GTE, nothing special or optimized about this
gte_ldv3(v0, v1, v2);
gte_rtpt(); // Rotation, translation, perspective projection
// Draw a gouraud shaded textured triangle
POLY_GT3 *poly = (POLY_GT3*)mem_prim(sizeof(POLY_GT3));
setPolyGT3(poly);
gte_stsxy3_gt3(poly);
// Texture UVs
setUV3(poly, v0->u, v0->v, v1->u, v1->v, v2->u, v2->v);
poly->clut = quake_clut;
poly->tpage = tpage;
poly->r0 = poly->g0 = poly->b0 = (uint8_t)v0->pad;
poly->r1 = poly->g1 = poly->b1 = (uint8_t)v1->pad;
poly->r2 = poly->g2 = poly->b2 = (uint8_t)v2->pad;
addPrim(ot, poly);
++polyCount;
}
static INLINE void draw_quadstrip_textured(STVECTOR *verts, u_char numVerts, u_short tpage, u_long *ot)
{
// Draw the face as a quad strip
STVECTOR *v0, *v1, *v2, *v3;
u_char i0, i1, i2, i3;
u_char head = 0;
u_char tail = numVerts;
@ -178,7 +158,9 @@ static INLINE void draw_quadstrip_textured(STVECTOR *verts, u_char numVerts, u_s
i2 = --tail;
i3 = head++;
STVECTOR *v0, *v1, *v2, *v3;
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
// NOTE: testing has shown that the PS1 is faster just rendering quads and accepting the odd collapsed quad, rather than being clever with pointer comparisons and drawing a single triangle at the end.
u_char numQuads = (numVerts - 1) >> 1;
for (u_char quadIdx = 0; quadIdx < numQuads; ++quadIdx)
{
i0 = i2;
@ -194,19 +176,10 @@ static INLINE void draw_quadstrip_textured(STVECTOR *verts, u_char numVerts, u_s
gte_ldv3(v0, v1, v2);
gte_rtpt(); // Rotation, translation, perspective projection
// Average Z for depth sorting and culling
gte_avsz3();
gte_stotz(&p);
short depth = p >> 2;
if (depth <= 0 || depth >= OTLEN)
return;
// Draw a flat-shaded untextured colored quad
POLY_GT4 *poly = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
setPolyGT4(poly);
gte_stsxy0(&poly->x0);
gte_stsxy1(&poly->x1);
gte_stsxy2(&poly->x2);
gte_stsxy3_gt3(poly);
v3 = &verts[i3];
@ -226,21 +199,15 @@ static INLINE void draw_quadstrip_textured(STVECTOR *verts, u_char numVerts, u_s
poly->r2 = poly->g2 = poly->b2 = (uint8_t)v2->pad;
poly->r3 = poly->g3 = poly->b3 = (uint8_t)v3->pad;
addPrim(curOT + depth, poly);
addPrim(ot, poly);
++polyCount;
}
}
static INLINE void draw_quadstrip_water(STVECTOR *verts, u_char numVerts, u_short tpage)
static INLINE void draw_quadstrip_water(STVECTOR *verts, u_char numVerts, u_short tpage, u_long *ot)
{
int p;
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
u_char numQuads = (numVerts - 1) >> 1;
if (!mem_checkprim(sizeof(POLY_FT4), numQuads))
return;
// Draw the face as a quad strip
const STVECTOR *v0, *v1, *v2, *v3;
u_char i0, i1, i2, i3;
u_char head = 0;
u_char tail = numVerts;
@ -249,7 +216,8 @@ static INLINE void draw_quadstrip_water(STVECTOR *verts, u_char numVerts, u_shor
i2 = --tail;
i3 = head++;
const STVECTOR *v0, *v1, *v2, *v3;
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
u_char numQuads = (numVerts - 1) >> 1;
for (u_char quadIdx = 0; quadIdx < numQuads; ++quadIdx)
{
i0 = i2;
@ -265,13 +233,6 @@ static INLINE void draw_quadstrip_water(STVECTOR *verts, u_char numVerts, u_shor
gte_ldv3(v0, v1, v2);
gte_rtpt(); // Rotation, translation, perspective projection
// Average Z for depth sorting and culling
gte_avsz3();
gte_stotz(&p);
short depth = p >> 2;
if (depth <= 0 || depth >= OTLEN)
continue;
// Draw a flat-shaded untextured colored quad
POLY_FT4 *poly = (POLY_FT4*)mem_prim(sizeof(POLY_FT4));
setPolyFT4(poly);
@ -294,7 +255,7 @@ static INLINE void draw_quadstrip_water(STVECTOR *verts, u_char numVerts, u_shor
// Unlit
poly->r0 = poly->g0 = poly->b0 = 255;
addPrim(curOT + depth, poly);
addPrim(ot, poly);
++polyCount;
}
}

19
ps1bsp.h

@ -84,19 +84,9 @@ typedef struct
unsigned char b : 5;
} ps1bsp_facevertex_t;
#define SURF_PLANEBACK 0x2
#define SURF_DRAWSKY 0x4
#define SURF_DRAWSPRITE 0x8
#define SURF_DRAWTURB 0x10
#define SURF_DRAWTILED 0x20
#define SURF_DRAWBACKGROUND 0x40
#define SURF_UNDERWATER 0x80
#define SURF_NOTEXTURE 0x100
#define SURF_DRAWFENCE 0x200
#define SURF_DRAWLAVA 0x400
#define SURF_DRAWSLIME 0x800
#define SURF_DRAWTELE 0x1000
#define SURF_DRAWWATER 0x2000
#define SURF_DRAWSKY 0x2
#define SURF_DRAWTURB 0x4
#define SURF_DRAWWATER 0x8
// High quality: Face -> polygons -> polygon vertex indices (index + UV + light) -> vertices
// Low quality: Face -> face vertex indices (index + color) -> vertices
@ -108,13 +98,14 @@ typedef struct
// Used for high-quality tesselated textured drawing
unsigned short firstPolygon;
unsigned char numPolygons;
unsigned char totalQuads;
// Used for low-quality untextured vertex colored drawing
unsigned short firstFaceVertex;
unsigned char numFaceVertices;
unsigned char textureId;
unsigned short flags;
unsigned char flags;
// Used for backface culling
SVECTOR center;

BIN
test.ps1bsp

124
world.c

@ -92,9 +92,11 @@ static INLINE char world_cull_backface(const world_t *world, const ps1bsp_face_t
return camDot < 0;
}
static void world_drawface_fast(const world_t *world, const ps1bsp_face_t *face)
static void world_drawface_fast(const world_t *world, const ps1bsp_face_t *face, u_long *ot)
{
// TODO: early primitive buffer check (POLY_G4)
// Early primitive buffer check
if (!mem_checkprim(sizeof(POLY_G4), face->totalQuads))
return;
short dot;
if (world_cull_backface(world, face, &dot))
@ -114,14 +116,16 @@ static void world_drawface_fast(const world_t *world, const ps1bsp_face_t *face)
}
if (face->numFaceVertices == 3)
draw_trianglefan_lit(verts, 3);
draw_triangle_lit(verts, ot);
else
draw_quadstrip_lit(verts, face->numFaceVertices);
draw_quadstrip_lit(verts, face->numFaceVertices, ot);
}
static void world_drawface_lit(const world_t *world, const ps1bsp_face_t *face)
static void world_drawface_lit(const world_t *world, const ps1bsp_face_t *face, u_long *ot)
{
// TODO: early primitive buffer check (POLY_G4)
// Early primitive buffer check
if (!mem_checkprim(sizeof(POLY_G4), face->totalQuads))
return;
short dot;
if (world_cull_backface(world, face, &dot))
@ -144,28 +148,50 @@ static void world_drawface_lit(const world_t *world, const ps1bsp_face_t *face)
}
if (poly->numPolyVertices == 3)
draw_trianglefan_lit(verts, 3);
draw_triangle_lit(verts, ot);
else
draw_quadstrip_lit(verts, poly->numPolyVertices);
draw_quadstrip_lit(verts, poly->numPolyVertices, ot);
}
}
static void world_drawface_textured(const world_t *world, const ps1bsp_face_t *face)
static void world_drawface_textured(const world_t *world, const ps1bsp_face_t *face, u_long *ot)
{
// Early primitive buffer check
if (!mem_checkprim(sizeof(POLY_GT4), face->totalQuads))
return;
// NOTE: this value could be REALLY useful for determining the tessellation subdivisions. It has camera distance *and* angle in it.
// Just include the face size/area for an approximate screen size. Maybe also separate x/y/z for angle-dependent tessellation.
short dot;
if (world_cull_backface(world, face, &dot))
return;
// TODO: do an early primitive buffer check here (POLY_GT4), so we can skip vertex copying if it's already full
// When doing tessellation, we need the above dot product to decide how many polys to draw, so we can only safely check the primbuffer size here
// Though since we're drawing front-to-back and tessellation will only happen close to the camera, I suppose we could get away with just checking for non-tessellated polycounts
// Draw textured, vertex colored polygons
STVECTOR *verts = (STVECTOR*)(scratchpad + 256);
ps1bsp_texture_t *faceTexture = &world->textures[face->textureId];
const ps1bsp_polygon_t* poly = &world->polygons[face->firstPolygon];
if (face->flags & SURF_DRAWWATER)
{
for (u_char polyIdx = 0; polyIdx < face->numPolygons; ++polyIdx, ++poly)
{
ps1bsp_polyvertex_t *polyVertex = &world->polyVertices[poly->firstPolyVertex];
STVECTOR *curVert = verts;
for (u_char vertIdx = 0; vertIdx < poly->numPolyVertices; ++vertIdx, ++polyVertex, ++curVert)
{
const ps1bsp_vertex_t *vert = &world->vertices[polyVertex->index];
curVert->vx = vert->x;
curVert->vy = vert->y;
curVert->vz = vert->z;
curVert->u = (u_short)polyVertex->u;
curVert->v = (u_short)polyVertex->v;
}
draw_quadstrip_water(verts, poly->numPolyVertices, faceTexture->tpage, ot);
}
}
else
{
for (u_char polyIdx = 0; polyIdx < face->numPolygons; ++polyIdx, ++poly)
{
ps1bsp_polyvertex_t *polyVertex = &world->polyVertices[poly->firstPolyVertex];
@ -181,44 +207,64 @@ static void world_drawface_textured(const world_t *world, const ps1bsp_face_t *f
curVert->pad = polyVertex->light;
}
if (face->flags & SURF_DRAWWATER)
draw_quadstrip_water(verts, poly->numPolyVertices, faceTexture->tpage);
if (poly->numPolyVertices == 3)
draw_triangle_textured(verts, faceTexture->tpage, ot);
else
draw_quadstrip_textured(verts, poly->numPolyVertices, faceTexture->tpage);
draw_quadstrip_textured(verts, poly->numPolyVertices, faceTexture->tpage, ot);
}
}
}
static void (*world_drawface)(const world_t*, const ps1bsp_face_t*) = &world_drawface_fast;
static void world_drawnode(const world_t *world, short nodeIdx, u_char *pvs)
{
if (nodeIdx < 0) // Leaf node
{
// Check if this leaf is visible from the current camera position
u_short test = ~nodeIdx - 1;
if ((pvs[test >> 3] & (1 << (test & 0x7))) == 0)
return;
static void (*world_drawface)(const world_t*, const ps1bsp_face_t*, u_long *ot) = &world_drawface_fast;
const ps1bsp_leaf_t *leaf = &world->leaves[~nodeIdx];
// if (!frustum_boxInside(&leaf->mins, &leaf->maxs)) // TODO: these additional frustum checks actually make things slower, probably not worth it
// return;
static ps1bsp_leaf_t *firstLeaf = NULL;
static u_short leafDepth = 0;
static void world_drawLeafs(const world_t *world)
{
u_long frameNum = time_getFrameNumber();
const u_short *leafFace = &world->leafFaces[leaf->firstLeafFace];
for (u_short leafFaceIdx = 0; leafFaceIdx < leaf->numLeafFaces; ++leafFaceIdx, ++leafFace)
// Draw each visible leaf's faces in front-to-back order
// This way if we run out of primitive buffer space, we will stop drawing at far-away faces
for (const ps1bsp_leaf_t *leaf = firstLeaf; leaf != NULL; leaf = leaf->nextLeaf)
{
ps1bsp_face_t *face = &world->faces[*leafFace];
u_long *ot = curOT + leaf->leafDepth;
const u_short *leafFaces = &world->leafFaces[leaf->firstLeafFace];
for (u_short leafFaceIdx = 0; leafFaceIdx < leaf->numLeafFaces; ++leafFaceIdx)
{
ps1bsp_face_t *face = &world->faces[leafFaces[leafFaceIdx]];
// Check if we've already drawn this face on the current frame
// NOTE: this can cause sorting issues when rendering front-to-back with leaf-based depth
if (face->drawFrame == frameNum)
continue;
world_drawface(world, face);
world_drawface(world, face, ot);
face->drawFrame = frameNum;
}
}
}
static void world_sortLeafs(const world_t *world, short nodeIdx, u_char *pvs)
{
if (nodeIdx < 0) // Leaf node
{
u_short leafIdx = ~nodeIdx;
if (leafIdx == 0) // Leaf 0 should not be drawn
return;
// PVS culling
u_short test = leafIdx - 1;
if ((pvs[test >> 3] & (1 << (test & 0x7))) == 0)
return;
// Add the leaf to the sorted linked list
// Since we're traversing the BSP tree front-to-back, adding each leaf at the start sorts the list in back-to-front order
ps1bsp_leaf_t *leaf = (ps1bsp_leaf_t*)&world->leaves[leafIdx];
leaf->nextLeaf = firstLeaf;
leaf->leafDepth = leafDepth++;
firstLeaf = leaf;
return;
}
@ -230,11 +276,10 @@ static void world_drawnode(const world_t *world, short nodeIdx, u_char *pvs)
const ps1bsp_plane_t *plane = &world->planes[node->planeId];
short dist = world_pointPlaneDist(&cam_pos, plane);
// Draw child nodes in front-to-back order; adding faces to the OT will reverse the drawing order
// PLAN: traverse back-to-front to determine leaf order & depth, then draw faces front-to-back with primbuf checks
// Sort leafs in front-to-back order, so that we can draw their faces at the correct depths
char order = dist < 0;
world_drawnode(world, node->children[order], pvs);
world_drawnode(world, node->children[order ^ 1], pvs);
world_sortLeafs(world, node->children[order], pvs);
world_sortLeafs(world, node->children[order ^ 1], pvs);
}
// Decompress PVS data for the given leaf ID and store it in RAM at the given buffer pointer location.
@ -315,5 +360,8 @@ void world_draw(const world_t *world)
else
world_drawface = &world_drawface_lit;
world_drawnode(world, 0, pvs);
firstLeaf = NULL;
leafDepth = 1;
world_sortLeafs(world, 0, pvs);
world_drawLeafs(world);
}
Loading…
Cancel
Save