diff --git a/memory.c b/memory.c index aa15432..c907400 100644 --- a/memory.c +++ b/memory.c @@ -7,7 +7,7 @@ static char primbuff[2][PRIMBUFLEN]; // Primitive buffer, just a raw buffer of b static char *nextpri; static char *primbuff_bounds; -char* const scratchpad = (char*)0x1F800000; +u_char* const scratchpad_root = (char*)0x1F800000; void *mem_scratch(char **scratch_offset, unsigned short size) { diff --git a/memory.h b/memory.h index 12b6768..21b4d94 100644 --- a/memory.h +++ b/memory.h @@ -1,7 +1,7 @@ #ifndef __MEMORY_H__ #define __MEMORY_H__ -extern char* const scratchpad; // Starting address of scratchpad memory +extern u_char* const scratchpad_root; // Starting address of scratchpad memory /** * @brief Allocate memory in the fast scratchpad RAM buffer. diff --git a/ps1bsp.h b/ps1bsp.h index a8ae1a0..a071133 100755 --- a/ps1bsp.h +++ b/ps1bsp.h @@ -35,6 +35,7 @@ typedef struct ps1bsp_dentry_t nodes; ps1bsp_dentry_t leaves; ps1bsp_dentry_t leafFaces; + ps1bsp_dentry_t visData; } ps1bsp_header_t; typedef struct diff --git a/qmath.h b/qmath.h index a4e4edb..d004af8 100644 --- a/qmath.h +++ b/qmath.h @@ -8,12 +8,12 @@ INLINE short m_dot12(const SVECTOR *a, const SVECTOR *b) return ((a->vx * b->vx) >> 12) + ((a->vy * b->vy) >> 12) + ((a->vz * b->vz) >> 12); } -INLINE short m_pointPlaneDist4(const VECTOR *point4, const SVECTOR *normal12, short dist4) +INLINE short m_pointPlaneDist2(const VECTOR *point2, const SVECTOR *normal12, short dist2) { - int x = ((int)point4->vx * normal12->vx) >> 12; - int y = ((int)point4->vy * normal12->vy) >> 12; - int z = ((int)point4->vz * normal12->vz) >> 12; - return (short)(x + y + z - dist4); + int x = ((int)point2->vx * normal12->vx) >> 12; + int y = ((int)point2->vy * normal12->vy) >> 12; + int z = ((int)point2->vz * normal12->vz) >> 12; + return (short)(x + y + z - dist2); } #endif // __QMATH_H__ diff --git a/test.ps1bsp b/test.ps1bsp index cc30168..f2ba1c8 100755 Binary files a/test.ps1bsp and b/test.ps1bsp differ diff --git a/world.c b/world.c index 4c861c6..79d214e 100644 --- a/world.c +++ b/world.c @@ -48,6 +48,9 @@ void world_load(const u_long *data, world_t *world) world->leafFaces = (u_short*)(bytes + header->leafFaces.offset); world->numLeafFaces = header->leafFaces.size / sizeof(u_short); + + world->visData = (u_char*)(bytes + header->visData.offset); + world->numVisData = header->visData.size / sizeof(u_char); } static INLINE void drawface_triangle_fan(const ps1bsp_face_t *face, SVECTOR *vecs) @@ -75,7 +78,7 @@ static INLINE void drawface_triangle_fan(const ps1bsp_face_t *face, SVECTOR *vec // Average Z for depth sorting and culling gte_avsz3(); gte_stotz(&p); - unsigned short depth = p >> 2; + short depth = p >> 2; if (depth <= 0 || depth >= OTLEN) continue; @@ -137,7 +140,7 @@ static INLINE void drawface_triangle_strip(const ps1bsp_face_t *face, SVECTOR *v // Average Z for depth sorting and culling gte_avsz3(); gte_stotz(&p); - unsigned short depth = p >> 2; + short depth = p >> 2; if (depth <= 0 || depth >= OTLEN) continue; @@ -193,7 +196,7 @@ static INLINE void drawface_quad_strip(const ps1bsp_face_t *face, SVECTOR *vecs) // Average Z for depth sorting and culling gte_avsz3(); gte_stotz(&p); - unsigned short depth = p >> 2; + short depth = p >> 2; if (depth <= 0 || depth >= OTLEN) continue; @@ -222,13 +225,15 @@ static INLINE void drawface_quad_strip(const ps1bsp_face_t *face, SVECTOR *vecs) } } -static void world_drawface(const world_t *world, const ps1bsp_face_t *face, char *scratchptr) +static void world_drawface(const world_t *world, const ps1bsp_face_t *face, u_char *scratchptr) { const CVECTOR *col = &colors[(u_long)face % numColors]; - SVECTOR *vecs = (SVECTOR*)mem_scratch(&scratchptr, sizeof(SVECTOR) * face->numFaceVertices); + SVECTOR *vecs = (SVECTOR*)scratchptr; + // scratchptr += sizeof(SVECTOR) * face->numFaceVertices; // No need to move the scratchpad pointer right now // Copy this face's vertices into scratch RAM for fast reuse + // TODO: this is the main performance bottleneck right now! ps1bsp_facevertex_t *faceVertex = &world->faceVertices[face->firstFaceVertex]; for (int vertIdx = 0; vertIdx < face->numFaceVertices; ++vertIdx, ++faceVertex) { @@ -243,12 +248,17 @@ static void world_drawface(const world_t *world, const ps1bsp_face_t *face, char drawface_quad_strip(face, vecs); } -static void world_drawnode(const world_t *world, short nodeIdx, char *scratchptr) +static void world_drawnode(const world_t *world, short nodeIdx, u_char *pvs, u_char *scratchptr) { u_long frameNum = time_getFrameNumber(); if (nodeIdx < 0) // Leaf node { + // Check if this leaf is visible from the current camera position + u_short test = ~nodeIdx - 1; + if ((pvs[test >> 3] & (1 << (test & 0x7))) == 0) + return; + const ps1bsp_leaf_t *leaf = &world->leaves[~nodeIdx]; const u_short *leafFace = &world->leafFaces[leaf->firstLeafFace]; @@ -280,9 +290,53 @@ static void world_drawnode(const world_t *world, short nodeIdx, char *scratchpt // world_drawface(world, face, scratchptr); // face->drawFrame = frameNum; // } + + const ps1bsp_plane_t *plane = &world->planes[node->planeId]; + short dist = m_pointPlaneDist2(&cam_pos, &plane->normal, plane->dist); + + // Draw child nodes in front-to-back order; adding faces to the OT will reverse the drawing order + if (dist > 0) + { + world_drawnode(world, node->front, pvs, scratchptr); + world_drawnode(world, node->back, pvs, scratchptr); + } + else + { + world_drawnode(world, node->back, pvs, scratchptr); + world_drawnode(world, node->front, pvs, scratchptr); + } +} + +// Decompress PVS data for the given leaf ID and store it in scratch RAM at the given scratch pointer location. +// Returns the memory location of decompressed PVS data, and moves the scratch pointer forward. +static u_char *world_loadVisData(const world_t *world, u_short leafIdx, u_char **scratchptr) +{ + u_char *head = *scratchptr; + u_char *tail = head; + + const ps1bsp_leaf_t *leaf = &world->leaves[leafIdx]; - world_drawnode(world, node->front, scratchptr); - world_drawnode(world, node->back, scratchptr); + const u_char *v = &world->visData[leaf->vislist]; + for (int l = 1; l < world->numLeaves; ) + { + u_char bits = *v++; + if (bits) + { + *tail++ = bits; + l += 8; + } + else + { + u_char skip = *v++; + for (u_char i = 0; i < skip; ++i, l += 8) + { + *tail++ = 0; + } + } + } + + *scratchptr = tail; + return head; } static u_short world_leafAtPoint(const world_t *world, const VECTOR *point) @@ -294,9 +348,9 @@ static u_short world_leafAtPoint(const world_t *world, const VECTOR *point) const ps1bsp_plane_t *plane = &world->planes[node->planeId]; // TODO: can be optimized for axis-aligned planes, no need for a dot product there - short dist = m_pointPlaneDist4(point, &plane->normal, plane->dist); + short dist = m_pointPlaneDist2(point, &plane->normal, plane->dist); - nodeIdx = dist < 0 ? node->back : node->front; // TODO: this can be done branchless with (dist < 0)^1 + nodeIdx = dist > 0 ? node->front : node->back; // TODO: this can be done branchless with (dist < 0)^1 } return ~nodeIdx; @@ -312,5 +366,8 @@ void world_draw(const world_t *world) cam_leaf = world_leafAtPoint(world, &cam_pos); - world_drawnode(world, 0, scratchpad); + u_char *scratchptr = scratchpad_root; + u_char *pvs = world_loadVisData(world, cam_leaf, &scratchptr); + + world_drawnode(world, 0, pvs, scratchptr); } diff --git a/world.h b/world.h index d9bbabd..fe23b3a 100644 --- a/world.h +++ b/world.h @@ -25,6 +25,9 @@ typedef struct u_short numLeafFaces; u_short *leafFaces; + + u_short numVisData; + u_char *visData; } world_t; void world_load(const u_long *data, world_t *world);