Quake BSP renderer for PS1
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

547 lines
18 KiB

#ifndef __DRAW_H__
#define __DRAW_H__
#include "common.h"
#include "display.h"
#include <inline_c.h>
// Macros for quickly blitting RGB and UV values with a single copy
// This is faster than copying each value individually
#define setColorFast(pr, r) *((u_int*)(pr)) = *((u_int*)(r))
#define setUVFast(pu, u) *((u_short*)(pu)) = *((u_short*)(u))
#define blit16(dst, src) *((uint16_t*)(dst)) = *((uint16_t*)(src))
#define blit32(dst, src) *((uint32_t*)(dst)) = *((uint32_t*)(src))
#define blit64(dst, src) *((uint64_t*)(dst)) = *((uint64_t*)(src))
static u_int color_white = ((255 << 16) | (255 << 8) | 255);
static INLINE void draw_triangle_lit(SVECTOR *verts, u_long *ot)
{
// Draw a single triangle
const SVECTOR *v0 = &verts[0];
const SVECTOR *v1 = &verts[1];
const SVECTOR *v2 = &verts[2];
// Naively draw the triangle with GTE, nothing special or optimized about this
gte_ldv3(v0, v1, v2);
gte_rtpt(); // Rotation, translation, perspective projection
// Draw a flat-shaded untextured colored triangle
POLY_G3 *poly = (POLY_G3*)mem_prim(sizeof(POLY_G3));
setPolyG3(poly);
gte_stsxy3_g3(poly);
poly->r0 = poly->g0 = poly->b0 = (uint8_t)v0->pad;
poly->r1 = poly->g1 = poly->b1 = (uint8_t)v1->pad;
poly->r2 = poly->g2 = poly->b2 = (uint8_t)v2->pad;
addPrim(ot, poly);
++polyCount;
}
static INLINE void draw_trianglestrip_lit(SVECTOR *verts, u_char numVerts, u_long *ot)
{
// Draw the face as a triangle strip
const SVECTOR *v0, *v1, *v2;
const SVECTOR *head = verts;
const SVECTOR *tail = verts + numVerts;
u_char reverse = 0;
v2 = head++; // Initialize first vertex to index 0 and set head to index 1
u_char numTris = numVerts - 2;
for (u_char triIdx = 0; triIdx < numTris; ++triIdx)
{
if (reverse ^= 1)
{
v0 = v2;
v1 = head;
v2 = --tail;
}
else
{
v0 = v1;
v1 = ++head;
v2 = tail;
}
// Naively draw the triangle with GTE, nothing special or optimized about this
gte_ldv3(v0, v1, v2);
gte_rtpt(); // Rotation, translation, perspective projection
// Draw a flat-shaded untextured colored triangle
POLY_G3 *poly = (POLY_G3*)mem_prim(sizeof(POLY_G3));
setPolyG3(poly);
gte_stsxy3_g3(poly);
poly->r0 = poly->g0 = poly->b0 = (uint8_t)v0->pad;
poly->r1 = poly->g1 = poly->b1 = (uint8_t)v1->pad;
poly->r2 = poly->g2 = poly->b2 = (uint8_t)v2->pad;
addPrim(ot, poly);
++polyCount;
}
}
static INLINE void draw_quadstrip_flat(const ps1bsp_vertex_t *vertices, const ps1bsp_facevertex_t *faceVerts, u_char numVerts, P_COLOR *color, u_long *ot)
{
const ps1bsp_facevertex_t *v0, *v1, *v2, *v3;
u_char i0, i1, i2, i3;
u_char head = 0;
u_char tail = numVerts;
// Initialize the first two vertices
i2 = --tail;
i3 = head++;
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
u_char numQuads = (numVerts - 1) >> 1;
for (u_char quadIdx = 0; quadIdx < numQuads; ++quadIdx)
{
i0 = i2;
i1 = i3;
i2 = --tail;
i3 = head++;
v0 = &faceVerts[i0];
v1 = &faceVerts[i1];
v2 = &faceVerts[i2];
v3 = &faceVerts[i3];
// Transform the first three vertices
gte_ldv3(&vertices[v0->index], &vertices[v1->index], &vertices[v2->index]);
gte_rtpt(); // Rotation, translation, perspective projection
// Draw a flat-shaded untextured colored quad
POLY_G4 *poly = (POLY_G4*)mem_prim(sizeof(POLY_G4));
gte_stsxy3_g3(poly);
// Transform the fourth vertex to complete the quad
gte_ldv0(&vertices[v3->index]);
gte_rtps();
gte_stsxy(&poly->x3);
setColorFast(&poly->r0, color);
setColorFast(&poly->r1, color);
setColorFast(&poly->r2, color);
setColorFast(&poly->r3, color);
setPolyG4(poly);
addPrim(ot, poly);
++polyCount;
}
}
static INLINE void draw_quadstrip_lit(const ps1bsp_vertex_t *vertices, const ps1bsp_polyvertex_t *polyVerts, u_char numVerts, u_long *ot)
{
const ps1bsp_polyvertex_t *v0, *v1, *v2, *v3;
u_char i0, i1, i2, i3;
u_char head = 0;
u_char tail = numVerts;
// Initialize the first two vertices
i2 = --tail;
i3 = head++;
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
u_char numQuads = (numVerts - 1) >> 1;
for (u_char quadIdx = 0; quadIdx < numQuads; ++quadIdx)
{
i0 = i2;
i1 = i3;
i2 = --tail;
i3 = head++;
v0 = &polyVerts[i0];
v1 = &polyVerts[i1];
v2 = &polyVerts[i2];
v3 = &polyVerts[i3];
// Transform the first three vertices
gte_ldv3(&vertices[v0->index], &vertices[v1->index], &vertices[v2->index]);
gte_rtpt(); // Rotation, translation, perspective projection
// Draw a flat-shaded untextured colored quad
POLY_G4 *poly = (POLY_G4*)mem_prim(sizeof(POLY_G4));
setPolyG4(poly);
gte_stsxy3_g3(poly);
// Transform the fourth vertex to complete the quad
gte_ldv0(&vertices[v3->index]);
gte_rtps();
gte_stsxy(&poly->x3);
poly->r0 = poly->g0 = poly->b0 = (uint8_t)v0->r;
poly->r1 = poly->g1 = poly->b1 = (uint8_t)v1->r;
poly->r2 = poly->g2 = poly->b2 = (uint8_t)v2->r;
poly->r3 = poly->g3 = poly->b3 = (uint8_t)v3->r;
addPrim(ot, poly);
++polyCount;
}
}
static INLINE void draw_quadstrip_colored(const ps1bsp_vertex_t *vertices, const ps1bsp_facevertex_t *faceVerts, u_char numVerts, u_long *ot)
{
const ps1bsp_facevertex_t *v0, *v1, *v2, *v3;
u_char i0, i1, i2, i3;
u_char head = 0;
u_char tail = numVerts;
// Initialize the first two vertices
i2 = --tail;
i3 = head++;
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
u_char numQuads = (numVerts - 1) >> 1;
for (u_char quadIdx = 0; quadIdx < numQuads; ++quadIdx)
{
i0 = i2;
i1 = i3;
i2 = --tail;
i3 = head++;
v0 = &faceVerts[i0];
v1 = &faceVerts[i1];
v2 = &faceVerts[i2];
v3 = &faceVerts[i3];
// Transform the first three vertices
gte_ldv3(&vertices[v0->index], &vertices[v1->index], &vertices[v2->index]);
gte_rtpt(); // Rotation, translation, perspective projection
// Draw a flat-shaded untextured colored quad
POLY_G4 *poly = (POLY_G4*)mem_prim(sizeof(POLY_G4));
setPolyG4(poly);
gte_stsxy3_g3(poly);
// Transform the fourth vertex to complete the quad
gte_ldv0(&vertices[v3->index]);
gte_rtps();
gte_stsxy(&poly->x3);
poly->r0 = v0->r << 3; poly->g0 = v0->g << 3; poly->b0 = v0->b << 3;
poly->r1 = v1->r << 3; poly->g1 = v1->g << 3; poly->b1 = v1->b << 3;
poly->r2 = v2->r << 3; poly->g2 = v2->g << 3; poly->b2 = v2->b << 3;
poly->r3 = v3->r << 3; poly->g3 = v3->g << 3; poly->b3 = v3->b << 3;
addPrim(ot, poly);
++polyCount;
}
}
static INLINE void draw_triangle_textured(const ps1bsp_vertex_t *vertices, const ps1bsp_polyvertex_t *polyVerts, u_short tpage, u_long *ot)
{
const ps1bsp_polyvertex_t *v0, *v1, *v2;
v0 = &polyVerts[0];
v1 = &polyVerts[1];
v2 = &polyVerts[2];
// Transform the three vertices
gte_ldv3(&vertices[v0->index], &vertices[v1->index], &vertices[v2->index]);
gte_rtpt(); // Rotation, translation, perspective projection
// Draw a gouraud shaded textured triangle
POLY_GT3 *poly = (POLY_GT3*)mem_prim(sizeof(POLY_GT3));
// Fill out the quad's data fields in struct order, to optimize data access
// First vertex and texture CLUT
setColorFast(&poly->r0, &v0->r);
setUVFast(&poly->u0, &v0->u);
gte_stsxy0(&poly->x0);
poly->clut = quake_clut;
// Second vertex and texture page
setColorFast(&poly->r1, &v1->r);
gte_stsxy1(&poly->x1);
setUVFast(&poly->u1, &v1->u);
poly->tpage = tpage;
// Third vertex
setColorFast(&poly->r2, &v2->r);
gte_stsxy2(&poly->x2);
setUVFast(&poly->u2, &v2->u);
setPolyGT3(poly);
addPrim(ot, poly);
++polyCount;
}
static INLINE void draw_quadstrip_textured(const ps1bsp_vertex_t *vertices, const ps1bsp_polyvertex_t *polyVerts, u_char numVerts, u_short tpage, u_long *ot)
{
const ps1bsp_polyvertex_t *v0, *v1, *v2, *v3;
u_char i0, i1, i2, i3;
u_char head = 0;
u_char tail = numVerts;
// Initialize the first two vertices
i2 = --tail;
i3 = head++;
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
// NOTE: testing has shown that the PS1 is faster just rendering quads and accepting the odd collapsed quad, rather than being clever with pointer comparisons and drawing a single triangle at the end.
u_char numQuads = (numVerts - 1) >> 1;
for (u_char quadIdx = 0; quadIdx < numQuads; ++quadIdx)
{
i0 = i2;
i1 = i3;
i2 = --tail;
i3 = head++;
v0 = &polyVerts[i0];
v1 = &polyVerts[i1];
v2 = &polyVerts[i2];
v3 = &polyVerts[i3];
// Transform the first three vertices
gte_ldv3(&vertices[v0->index], &vertices[v1->index], &vertices[v2->index]);
gte_rtpt(); // Rotation, translation, perspective projection
// Draw a gouraud shaded textured quad
POLY_GT4 *poly = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
// Fill out the quad's data fields in struct order, to optimize data access
// First vertex and texture CLUT
setColorFast(&poly->r0, &v0->r);
setUVFast(&poly->u0, &v0->u);
gte_stsxy0(&poly->x0);
poly->clut = quake_clut;
// Second vertex and texture page
setColorFast(&poly->r1, &v1->r);
gte_stsxy1(&poly->x1);
setUVFast(&poly->u1, &v1->u);
poly->tpage = tpage;
// Third vertex
setColorFast(&poly->r2, &v2->r);
gte_stsxy2(&poly->x2);
setUVFast(&poly->u2, &v2->u);
// Transform the fourth vertex to complete the quad
gte_ldv0(&vertices[v3->index]);
gte_rtps();
// Fourth vertex
setColorFast(&poly->r3, &v3->r);
setUVFast(&poly->u3, &v3->u);
gte_stsxy(&poly->x3);
setPolyGT4(poly);
addPrim(ot, poly);
++polyCount;
}
}
typedef struct _TMPVERT
{
int16_t vx, vy, vz, vpad;
uint8_t r, g, b, cpad;
uint8_t u, v;
uint16_t pad;
} TMPVERT;
#define copyVertFast(dst, pv, v) \
blit32(&(dst)->r, &(pv)->r); \
blit32(&(dst)->vx, &(v)->vx); \
blit32(&(dst)->vz, &(v)->vz); \
blit16(&(dst)->u, &(pv)->u);
#define lerpVert(dst, src0, src1) \
(dst)->vx = (int16_t)(((int32_t)(src0)->vx + (int32_t)(src1)->vx) >> 1); \
(dst)->vy = (int16_t)(((int32_t)(src0)->vy + (int32_t)(src1)->vy) >> 1); \
(dst)->vz = (int16_t)(((int32_t)(src0)->vz + (int32_t)(src1)->vz) >> 1); \
(dst)->r = (uint8_t)(((uint16_t)(src0)->r + (uint16_t)(src1)->r) >> 1); \
(dst)->g = (uint8_t)(((uint16_t)(src0)->g + (uint16_t)(src1)->g) >> 1); \
(dst)->b = (uint8_t)(((uint16_t)(src0)->b + (uint16_t)(src1)->b) >> 1); \
(dst)->u = (uint8_t)(((uint16_t)(src0)->u + (uint16_t)(src1)->u) >> 1); \
(dst)->v = (uint8_t)(((uint16_t)(src0)->v + (uint16_t)(src1)->v) >> 1);
#define blitVert(dst, i, src) \
blit32(&(dst)->x ## i, &(src).vx); \
blit32(&(dst)->r ## i, &(src).r); \
blit16(&(dst)->u ## i, &(src).u);
static INLINE void draw_quadstrip_tess2(const ps1bsp_vertex_t *vertices, const ps1bsp_polyvertex_t *polyVerts, u_char numVerts, u_short tpage, u_long *ot)
{
const ps1bsp_polyvertex_t *pv0, *pv1, *pv2, *pv3;
const ps1bsp_vertex_t *v0, *v1, *v2, *v3;
POLY_GT4 *p0, *p1, *p2, *p3;
u_char i0, i1, i2, i3;
u_char head = 0;
u_char tail = numVerts;
// Initialize the first two vertices
i2 = --tail;
i3 = head++;
TMPVERT *tmp = (TMPVERT*)(scratchpad);
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
// NOTE: testing has shown that the PS1 is faster just rendering quads and accepting the odd collapsed quad, rather than being clever with pointer comparisons and drawing a single triangle at the end.
u_char numQuads = (numVerts - 1) >> 1;
for (u_char quadIdx = 0; quadIdx < numQuads; ++quadIdx)
{
i0 = i2;
i1 = i3;
i2 = --tail;
i3 = head++;
pv0 = &polyVerts[i0];
pv1 = &polyVerts[i1];
pv2 = &polyVerts[i2];
pv3 = &polyVerts[i3];
v0 = &vertices[pv0->index];
v1 = &vertices[pv1->index];
v2 = &vertices[pv2->index];
v3 = &vertices[pv3->index];
copyVertFast(&tmp[0], pv0, v0);
copyVertFast(&tmp[2], pv1, v1);
copyVertFast(&tmp[6], pv2, v2);
copyVertFast(&tmp[8], pv3, v3);
// TODO Optimization: start loading vertices into GTE as soon as we're done with them
lerpVert(&tmp[1], &tmp[0], &tmp[2]);
lerpVert(&tmp[5], &tmp[2], &tmp[8]); // After this, 0 1 2 are ready for GTE
lerpVert(&tmp[3], &tmp[0], &tmp[6]);
lerpVert(&tmp[4], &tmp[3], &tmp[5]); // After this, 3 4 5 are ready for GTE
lerpVert(&tmp[7], &tmp[6], &tmp[8]);
// Transform the vertices in groups of three
gte_ldv3(&tmp[0], &tmp[1], &tmp[2]);
gte_rtpt();
gte_stsxy0(&tmp[0].vx);
gte_stsxy1(&tmp[1].vx);
gte_stsxy2(&tmp[2].vx);
gte_ldv3(&tmp[3], &tmp[4], &tmp[5]);
gte_rtpt();
gte_stsxy0(&tmp[3].vx);
gte_stsxy1(&tmp[4].vx);
gte_stsxy2(&tmp[5].vx);
gte_ldv3(&tmp[6], &tmp[7], &tmp[8]);
gte_rtpt();
gte_stsxy0(&tmp[6].vx);
gte_stsxy1(&tmp[7].vx);
gte_stsxy2(&tmp[8].vx);
// TODO Optimization: interperse quad building instructions while waiting on GTE store/load delays
// Draw the first quad
p0 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
blitVert(p0, 0, tmp[0]);
p0->clut = quake_clut;
blitVert(p0, 1, tmp[3]);
p0->tpage = tpage;
blitVert(p0, 2, tmp[1]);
blitVert(p0, 3, tmp[4]);
setPolyGT4(p0);
addPrim(ot, p0);
// Second quad
p1 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
blitVert(p1, 0, tmp[1]);
p1->clut = quake_clut;
blitVert(p1, 1, tmp[4]);
p1->tpage = tpage;
blitVert(p1, 2, tmp[2]);
blitVert(p1, 3, tmp[5]);
setPolyGT4(p1);
addPrim(ot, p1);
// Third quad
p2 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
blitVert(p2, 0, tmp[3]);
p2->clut = quake_clut;
blitVert(p2, 1, tmp[6]);
p2->tpage = tpage;
blitVert(p2, 2, tmp[4]);
blitVert(p2, 3, tmp[7]);
setPolyGT4(p2);
addPrim(ot, p2);
// Fourth quad
p3 = (POLY_GT4*)mem_prim(sizeof(POLY_GT4));
blitVert(p3, 0, tmp[4]);
p3->clut = quake_clut;
blitVert(p3, 1, tmp[7]);
p3->tpage = tpage;
blitVert(p3, 2, tmp[5]);
blitVert(p3, 3, tmp[8]);
setPolyGT4(p3);
addPrim(ot, p3);
polyCount += 4;
}
}
static INLINE void draw_quadstrip_water(const ps1bsp_vertex_t *vertices, const ps1bsp_polyvertex_t *polyVerts, u_char numVerts, u_short tpage, u_char semiTrans, u_long *ot)
{
// Draw the face as a quad strip
const ps1bsp_polyvertex_t *v0, *v1, *v2, *v3;
u_char i0, i1, i2, i3;
u_char head = 0;
u_char tail = numVerts;
// Initialize the first two vertices
i2 = --tail;
i3 = head++;
// Normally a quad strip would have (N-2)/2 quads, but we might end up with a sole triangle at the end which will be drawn as a collapsed quad
u_char numQuads = (numVerts - 1) >> 1;
for (u_char quadIdx = 0; quadIdx < numQuads; ++quadIdx)
{
i0 = i2;
i1 = i3;
i2 = --tail;
i3 = head++;
v0 = &polyVerts[i0];
v1 = &polyVerts[i1];
v2 = &polyVerts[i2];
v3 = &polyVerts[i3];
// Transform the first three vertices
gte_ldv3(&vertices[v0->index], &vertices[v1->index], &vertices[v2->index]);
gte_rtpt(); // Rotation, translation, perspective projection
// Draw a flat-shaded textured quad
POLY_FT4 *poly = (POLY_FT4*)mem_prim(sizeof(POLY_FT4));
// Fill out the quad's data fields in struct order, to optimize data access
// First vertex and texture CLUT
setColorFast(&poly->r0, &color_white);
setUVFast(&poly->u0, &v0->u);
gte_stsxy0(&poly->x0);
poly->clut = water_clut;
// Second vertex and texture page
setUVFast(&poly->u1, &v1->u);
poly->tpage = tpage;
gte_stsxy1(&poly->x1);
// Third vertex
setUVFast(&poly->u2, &v2->u);
gte_stsxy2(&poly->x2);
// Transform the fourth vertex to complete the quad
gte_ldv0(&vertices[v3->index]);
gte_rtps();
// Fourth vertex
setUVFast(&poly->u3, &v3->u);
gte_stsxy(&poly->x3);
setPolyFT4(poly);
poly->code |= 2 * !!semiTrans;
addPrim(ot, poly);
++polyCount;
}
}
#endif // __DRAW_H__