From a05a800da09c54ace19bc5232553c2917ca4c3bd Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 17 Jan 2023 21:28:56 +0100 Subject: [PATCH] Implemented PVS decompression, leaf visibility check using PVS data, and front-to-back traversal of the BSP tree. --- memory.c | 2 +- memory.h | 2 +- ps1bsp.h | 1 + qmath.h | 10 +++---- test.ps1bsp | Bin 120050 -> 136909 bytes world.c | 79 ++++++++++++++++++++++++++++++++++++++++++++-------- world.h | 3 ++ 7 files changed, 79 insertions(+), 18 deletions(-) diff --git a/memory.c b/memory.c index aa15432..c907400 100644 --- a/memory.c +++ b/memory.c @@ -7,7 +7,7 @@ static char primbuff[2][PRIMBUFLEN]; // Primitive buffer, just a raw buffer of b static char *nextpri; static char *primbuff_bounds; -char* const scratchpad = (char*)0x1F800000; +u_char* const scratchpad_root = (char*)0x1F800000; void *mem_scratch(char **scratch_offset, unsigned short size) { diff --git a/memory.h b/memory.h index 12b6768..21b4d94 100644 --- a/memory.h +++ b/memory.h @@ -1,7 +1,7 @@ #ifndef __MEMORY_H__ #define __MEMORY_H__ -extern char* const scratchpad; // Starting address of scratchpad memory +extern u_char* const scratchpad_root; // Starting address of scratchpad memory /** * @brief Allocate memory in the fast scratchpad RAM buffer. diff --git a/ps1bsp.h b/ps1bsp.h index a8ae1a0..a071133 100755 --- a/ps1bsp.h +++ b/ps1bsp.h @@ -35,6 +35,7 @@ typedef struct ps1bsp_dentry_t nodes; ps1bsp_dentry_t leaves; ps1bsp_dentry_t leafFaces; + ps1bsp_dentry_t visData; } ps1bsp_header_t; typedef struct diff --git a/qmath.h b/qmath.h index a4e4edb..d004af8 100644 --- a/qmath.h +++ b/qmath.h @@ -8,12 +8,12 @@ INLINE short m_dot12(const SVECTOR *a, const SVECTOR *b) return ((a->vx * b->vx) >> 12) + ((a->vy * b->vy) >> 12) + ((a->vz * b->vz) >> 12); } -INLINE short m_pointPlaneDist4(const VECTOR *point4, const SVECTOR *normal12, short dist4) +INLINE short m_pointPlaneDist2(const VECTOR *point2, const SVECTOR *normal12, short dist2) { - int x = ((int)point4->vx * normal12->vx) >> 12; - int y = ((int)point4->vy * normal12->vy) >> 12; - int z = ((int)point4->vz * normal12->vz) >> 12; - return (short)(x + y + z - dist4); + int x = ((int)point2->vx * normal12->vx) >> 12; + int y = ((int)point2->vy * normal12->vy) >> 12; + int z = ((int)point2->vz * normal12->vz) >> 12; + return (short)(x + y + z - dist2); } #endif // __QMATH_H__ diff --git a/test.ps1bsp b/test.ps1bsp index cc301685539f60ab88b4bd2521c11998a4280bca..f2ba1c8c53a9e0de7bf07b25ad458cf6c64cb451 100755 GIT binary patch delta 17064 zcmb_jeT-b!Rlo1eeoS_~o~@G9i;cV+8E7Putbs_0O4OO4R1_7-u7X7VDE0Co5!F$; zi(9EO@xT3e^y=GE(Iv&t zZ}nZ?)bt4ffXlwq&%ECA=DpSBUDL@eGbZ}m5p-+xx1ssg7TmfeMSoS?A%8Ct+a^S` zZB*I9Z%u^OY z{WOc3DAjD-CkCGBMJhe*)gH8aucOs3S-E|oxnhQbutq1-WmmWKqrR{ zVP#^M7@Src3Prja5;fHMZA=qWYVQ<-*Rd9eD!D`jLqt_t4L>d-a4a~6SPSjDH<|tBi@kKWxhuEsm=of}7D;MNaJ!#Z`Oz-R$iycXwcAGSSpd zQB)S}*VeZy1nzxzRa)>3$2kw|<;uIenTK^cUCq1e#F82CSW4>k%-v$@t_tr)FbLfp z>~m}PG|0NXE6Gt%3JKZOT!LI9>*WqMHXNVA#{k2pDJiNP^IMSi2a4`z&=o(m=7^Vtw zsHIaVMPut9)gv+Za&zWqG|@;6iQ7OqtlR8of_AfsJ-=0khFW7GgVEqeT_Wv0uSjxdK63Pbpmm1xepSRz zil{20B;Er_$0zaYAFKH~ZrN*4P+Mxc(mpW;YwLa#1iH(#-rqi{0N$+oF0(XyH=5bX z&CzGE%ihHceVKj{(R0+^(!%CDk;FtIJAW%xheUrQ3Kd9NI~$iCuC4IaXe3-!T12e0 z{W-HtX(50t$CHoL-jz(gYCw3aBj`!MkqV(mK0-;y=>ZXa>?dm{2~k&whi2m??c6j- zn}eEe>a>tIx;@toZ=%$-bW_OP`Krn#r1YZlA1s+m`TbN6P5$InA!tW3;x`Cx^471b za@W0v1eckUgv_O637#QQZ&EPyudJ#Z!f|MCYXtIWT7l_h_#^Q(FJKhw)jn_j}&fz#MN zshr~z0Q;Ci`%+zPpz-SGemuuKmKvOJ)ybNtRecwgZ%8kDE$;x|qFxr&zUT5ANdb$# zt`03kq94@^)XTd3qrNp3_$SadI`*7t8E1ohpzF2K7f1+|5IL1iJ*}Oo;DsaX7QP*>aRQ3w!P0sx`l}Bs&;$!gB|=B_>!QGYB;X#*9SM zy7EcfI_$;kO58ETur1M1sCCft`I1s#?W?JZK=khFR8U|7R#h!{o~IdDx}<-CtDy~Y zT&8}b)H_ZR7_zWxkPm30tY-u-D|t%2b|;o$yN1b7ccDuWeztk&lo+6iNIJk*M_~P^ zAYsokcHruRYW^4vBnw=wNMu*ntlghZHrpsvD@k*DK_88w+y$o`49UuoEI}HP3EWQ` z8_D?rrusK+0%>a}TMECe5L-&%5n@ZD+8npg@XJh9tUj%JtMb`1`q$&a}gzCn+8ja^B-;4k0XxJ1*QM`|Y-U00h(`K#Cm zXvD$RT!=A(va}}(dx~BBt_M$=aOI!PqNvv>^0a2dtXV4F70jJN4pNKLgT=6t9P2y2 zPLD}ec5*B?F`;UeF7Q^t1WsUVPmN0_Vv?A$NMB5@uDwTtV>gB4Ii2Jw9ENaubcfN{ zC+L^blN>SEG5<3kU-lp0rjK^iceG6O1EWg8fbrbmu_1OoxnzEJt+rg4oU5(WAx5G_ z;a7wh+z8C3gh&;cLA|qF*aOr7kd>8MoGD#r)Vf4L5Hud#R=ZC`BN&^IsMQ$XkrouH z^aCkqwK3AU9yAEDN|l04Qc6o+KJ_vvCMrkE;%+ffontoR)&2av^d!$XpTjqrA&x?r z3WPbTsYoZKK15grcA(H)E}Wjzp_vfHd3r&g1VYri>PIf%K>L|*EDTi^<@Wdu^a%TA z8x&5b%tVr@&$)0Sm6_{PEZf9U{qAKO+z#1tM212E5j-cw_*eE9ei6UZ`!4hTvv|KF zs(q*qDP&Ew2*Z<9>I<_CXmTpl1beX7L=9-ykX(ha+CADhBln>b;4hZ%=(Bx1eC3o7 zGJaM>!FT#fvg?cyF;18t#n-Lcm1QBj!mMGMA2Sdfk6om5%0jUH{NukJ&2S>~Z0Zxsmxn(EbP zWw4=VL>wcOoQM%gl~S7er8yBG!N0@%P(pAu)ns@?H6i>|b!u+vJKdX+o@LVSW0G0e z7jLwv`nZUrsn2q;Tx9?1JkXR00O!jYzYziDA>=5;zuGu5g<#fELm-x^ z7sz1(;TUwa2Pc3OGfII7c1zTBbvi5FRp9)U)`>Yy!{AP(4!;7bF{WkG5qpF!?opCC zB>ZY#ayV78#%q^n{yN`DHZ{P=7+lhWK(hg6do!D-Hiih4GG3x?&8gG1DU7s+&=V?S zhYnCH1=<>BV=EXL1Rk3q>1a{2X6T59NYqEso;V*aB4?g)%u##KGllVL{sJ(wA4stjwlX#-DTr60%{^9bde66@hfR&~;uO8&(!{#(?P2npGv$EKfDWvvRE-JAI zdQYJ&XwH{q6ScXR?m8WiI6=n^>Q!?hnjodm#&>cW^qRzGTX!RKHOA@u#OOD;`#N0dW~~mU!R9tiep%{{( zF)XiCI?c^{lXveUY!Chrx6?K+s2D90hxZ*DwGjquB<<$z`<1*t6mSSf8wcDO^w_4a zG{+Fn#C>9v%5>N$64<4#py%C#`D0h53@kQP7O4683U$hqgN}p$z~)nb)E4n~ZKEUS zR^F=Q_NHrJ5zvs*Zj zg&4#L23Hn@MY5M?^ma^)O311P_#q^MqhCFQaaG2t<{v^k9GQl6`>h=UjiiNw(gkFIfyZ4Ize(dwOyZXVWF?-Up#Q^0II-9aq zum}45L!D-%F%n;l>5hf9WQG$zz`lKBAzoMV{J^i!py=VorNzQM`2$L8 zxN{ouJ&hP6QxgJlAk`($g=|P801O;4PmEr`>${WQM!Q7z$1I`x$^bocZ&Ahl{t=Iy+7j5jl3z`x`2N%vh_ zdk#_lh$t*RCg2i+FV|r=db)+b>eyHqf^&?iF)!1Oe~^-Hy3vIbP?nRG z4%Mntx;m|IE^H{w=T8yUg;^>Hc+NpqSllO~kkJWA2!5dtRNUahsi8>03Xv&y}1+&^f)n47`3GLyZmw&jbRIjXXUcq5^s@7aa^dz4Gg8 z6QMthqz0VxwLcet3^5whNYZJXc3L11Gtd^L2R;ZU!y$uE6362}2R7<$uwZ)av^)m& ztVqX6XQHhp zr0XOKs#x;Lf2R)tqM8lZ&=bz*T>%2PYzLv6gID16wCaw<2}v*%sEEhq0f0G7Bbze$7qB2GAudNE;{A)%pl%el zs`(txL4@fv^;r?=40K^~iB`sSI>O*zYExYfGjPu2tV3jY1W}S!+*hg=6iyJlZl6yl z`IKcrfB$3x%QHzxI_i+IEZwKp49GYyln@>6&Z3z;@Bz_a+_5br zHlGO-9xBT?RFXnZkU}#5tS8;c4gOhI-X8Z>S`%TP6Q@9Nb!n%fqF{ t8+>s4b(`|Oo9ygYn?8<92rxX9kMX|?=VwnFm(I*}%{tp!-2M7QF delta 72 zcmX@RmgCb(b}L2(1_m1-UQo-xa0iHQ*fTI}*~`H2V-^F$ng~XQRdNgrVdabrYxEfy an3gj#ypd#Jcz1x2fkmXzX{!_Cjw=A*6ck?o diff --git a/world.c b/world.c index 4c861c6..79d214e 100644 --- a/world.c +++ b/world.c @@ -48,6 +48,9 @@ void world_load(const u_long *data, world_t *world) world->leafFaces = (u_short*)(bytes + header->leafFaces.offset); world->numLeafFaces = header->leafFaces.size / sizeof(u_short); + + world->visData = (u_char*)(bytes + header->visData.offset); + world->numVisData = header->visData.size / sizeof(u_char); } static INLINE void drawface_triangle_fan(const ps1bsp_face_t *face, SVECTOR *vecs) @@ -75,7 +78,7 @@ static INLINE void drawface_triangle_fan(const ps1bsp_face_t *face, SVECTOR *vec // Average Z for depth sorting and culling gte_avsz3(); gte_stotz(&p); - unsigned short depth = p >> 2; + short depth = p >> 2; if (depth <= 0 || depth >= OTLEN) continue; @@ -137,7 +140,7 @@ static INLINE void drawface_triangle_strip(const ps1bsp_face_t *face, SVECTOR *v // Average Z for depth sorting and culling gte_avsz3(); gte_stotz(&p); - unsigned short depth = p >> 2; + short depth = p >> 2; if (depth <= 0 || depth >= OTLEN) continue; @@ -193,7 +196,7 @@ static INLINE void drawface_quad_strip(const ps1bsp_face_t *face, SVECTOR *vecs) // Average Z for depth sorting and culling gte_avsz3(); gte_stotz(&p); - unsigned short depth = p >> 2; + short depth = p >> 2; if (depth <= 0 || depth >= OTLEN) continue; @@ -222,13 +225,15 @@ static INLINE void drawface_quad_strip(const ps1bsp_face_t *face, SVECTOR *vecs) } } -static void world_drawface(const world_t *world, const ps1bsp_face_t *face, char *scratchptr) +static void world_drawface(const world_t *world, const ps1bsp_face_t *face, u_char *scratchptr) { const CVECTOR *col = &colors[(u_long)face % numColors]; - SVECTOR *vecs = (SVECTOR*)mem_scratch(&scratchptr, sizeof(SVECTOR) * face->numFaceVertices); + SVECTOR *vecs = (SVECTOR*)scratchptr; + // scratchptr += sizeof(SVECTOR) * face->numFaceVertices; // No need to move the scratchpad pointer right now // Copy this face's vertices into scratch RAM for fast reuse + // TODO: this is the main performance bottleneck right now! ps1bsp_facevertex_t *faceVertex = &world->faceVertices[face->firstFaceVertex]; for (int vertIdx = 0; vertIdx < face->numFaceVertices; ++vertIdx, ++faceVertex) { @@ -243,12 +248,17 @@ static void world_drawface(const world_t *world, const ps1bsp_face_t *face, char drawface_quad_strip(face, vecs); } -static void world_drawnode(const world_t *world, short nodeIdx, char *scratchptr) +static void world_drawnode(const world_t *world, short nodeIdx, u_char *pvs, u_char *scratchptr) { u_long frameNum = time_getFrameNumber(); if (nodeIdx < 0) // Leaf node { + // Check if this leaf is visible from the current camera position + u_short test = ~nodeIdx - 1; + if ((pvs[test >> 3] & (1 << (test & 0x7))) == 0) + return; + const ps1bsp_leaf_t *leaf = &world->leaves[~nodeIdx]; const u_short *leafFace = &world->leafFaces[leaf->firstLeafFace]; @@ -280,9 +290,53 @@ static void world_drawnode(const world_t *world, short nodeIdx, char *scratchpt // world_drawface(world, face, scratchptr); // face->drawFrame = frameNum; // } + + const ps1bsp_plane_t *plane = &world->planes[node->planeId]; + short dist = m_pointPlaneDist2(&cam_pos, &plane->normal, plane->dist); + + // Draw child nodes in front-to-back order; adding faces to the OT will reverse the drawing order + if (dist > 0) + { + world_drawnode(world, node->front, pvs, scratchptr); + world_drawnode(world, node->back, pvs, scratchptr); + } + else + { + world_drawnode(world, node->back, pvs, scratchptr); + world_drawnode(world, node->front, pvs, scratchptr); + } +} + +// Decompress PVS data for the given leaf ID and store it in scratch RAM at the given scratch pointer location. +// Returns the memory location of decompressed PVS data, and moves the scratch pointer forward. +static u_char *world_loadVisData(const world_t *world, u_short leafIdx, u_char **scratchptr) +{ + u_char *head = *scratchptr; + u_char *tail = head; + + const ps1bsp_leaf_t *leaf = &world->leaves[leafIdx]; - world_drawnode(world, node->front, scratchptr); - world_drawnode(world, node->back, scratchptr); + const u_char *v = &world->visData[leaf->vislist]; + for (int l = 1; l < world->numLeaves; ) + { + u_char bits = *v++; + if (bits) + { + *tail++ = bits; + l += 8; + } + else + { + u_char skip = *v++; + for (u_char i = 0; i < skip; ++i, l += 8) + { + *tail++ = 0; + } + } + } + + *scratchptr = tail; + return head; } static u_short world_leafAtPoint(const world_t *world, const VECTOR *point) @@ -294,9 +348,9 @@ static u_short world_leafAtPoint(const world_t *world, const VECTOR *point) const ps1bsp_plane_t *plane = &world->planes[node->planeId]; // TODO: can be optimized for axis-aligned planes, no need for a dot product there - short dist = m_pointPlaneDist4(point, &plane->normal, plane->dist); + short dist = m_pointPlaneDist2(point, &plane->normal, plane->dist); - nodeIdx = dist < 0 ? node->back : node->front; // TODO: this can be done branchless with (dist < 0)^1 + nodeIdx = dist > 0 ? node->front : node->back; // TODO: this can be done branchless with (dist < 0)^1 } return ~nodeIdx; @@ -312,5 +366,8 @@ void world_draw(const world_t *world) cam_leaf = world_leafAtPoint(world, &cam_pos); - world_drawnode(world, 0, scratchpad); + u_char *scratchptr = scratchpad_root; + u_char *pvs = world_loadVisData(world, cam_leaf, &scratchptr); + + world_drawnode(world, 0, pvs, scratchptr); } diff --git a/world.h b/world.h index d9bbabd..fe23b3a 100644 --- a/world.h +++ b/world.h @@ -25,6 +25,9 @@ typedef struct u_short numLeafFaces; u_short *leafFaces; + + u_short numVisData; + u_char *visData; } world_t; void world_load(const u_long *data, world_t *world);