diff --git a/CMakeLists.txt b/CMakeLists.txt
index 84c4fc9..e1d8e77 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,7 @@ file(GLOB _sources *.c)
 psn00bsdk_add_executable(ps1bsp STATIC ${_sources})
 
 psn00bsdk_target_incbin(ps1bsp PRIVATE tim_e1m1 atlas-e1m1.tim)
-psn00bsdk_target_incbin(ps1bsp PRIVATE tim_e2m2 atlas-e2m2.tim)
+psn00bsdk_target_incbin(ps1bsp PRIVATE bsp_test test.ps1bsp)
 
 psn00bsdk_add_cd_image(
 	iso      # Target name
diff --git a/common.h b/common.h
index 5603df1..3bdbc3c 100644
--- a/common.h
+++ b/common.h
@@ -8,7 +8,7 @@
 #include <psxgte.h>
 #include <psxgpu.h>
 
-#define RotMatrixQ  RotMatrix   // TODO: temporary hack to allow Quake-specific code without implementation
+#include "qmath.h"
 
 extern VECTOR cam_pos;
 extern SVECTOR cam_rot;
diff --git a/display.c b/display.c
index b690d49..d16daad 100644
--- a/display.c
+++ b/display.c
@@ -8,11 +8,13 @@ static DISPENV disp[2];
 static DRAWENV draw[2];
 static int db;
 
-u_long ot[2][OTLEN];	// Ordering tables, two arrays for double buffering. These are basically the buckets for bucket sorting of polygons.
+#define PRIMBUFLEN  131072
+
+static u_long ot[2][OTLEN];	// Ordering tables, two arrays for double buffering. These are basically the buckets for bucket sorting of polygons.
 						// You can also see them as "layers" in the modern 3D graphics sense, but they serve a more immediate role for polygon ordering.
 						// Layer 0 is free for overlays, as polygons at depth 0 will be clipped.
-char primbuff[2][65536];	// Primitive buffer, just a raw buffer of bytes to use as a pool for primitives
-char *nextpri;
+static char primbuff[2][PRIMBUFLEN];	// Primitive buffer, just a raw buffer of bytes to use as a pool for primitives
+static char *nextpri;
 
 const MATRIX identity = {
 	ONE, 0, 0,
@@ -41,6 +43,9 @@ MATRIX light_dirs = {
 	0, 0, 0
 };
 
+MATRIX vp_matrix;
+u_long *curOT;
+
 // Scale X coordinates to correct the aspect ratio for the chosen resolution
 VECTOR aspect_scale = { SCREENWIDTH * ONE / 320 , ONE, ONE };
 
@@ -76,12 +81,10 @@ void display_init()
 	PutDrawEnv(&draw[db]);
 
     // Load test font
-	FntLoad(960, 448);
+	FntLoad(960, 0);
 	
 	// Open up a test font text stream
-	FntOpen(0, 8, SCREENWIDTH, SCREENHEIGHT, 0, 200);
-
-    // TODO: OT initialization
+	FntOpen(0, 8, SCREENWIDTH, SCREENHEIGHT, 0, 512);
 
     // Initialize GTE
     InitGeom();
@@ -91,7 +94,10 @@ void display_init()
 
 void display_start()
 {
-	ClearOTagR(ot[db], OTLEN);
+    curOT = ot[db];
+	ClearOTagR(curOT, OTLEN);
+
+    nextpri = primbuff[db];
 
 	gte_SetBackColor(48, 48, 48);		// Ambient light color
 	gte_SetColorMatrix(&light_cols);	// Light color (up to three different lights)
@@ -107,14 +113,16 @@ void display_start()
 	TransMatrix(&view_matrix, &tpos);	// Apply transformed position to the translation part of the view matrix
 
 	// Compose view and projection matrices to obtain a combined view-projection matrix
-	CompMatrixLV(&proj_matrix, &view_matrix, &view_matrix);
+	CompMatrixLV(&proj_matrix, &view_matrix, &vp_matrix);
 }
 
 void display_finish()
 {
+    DrawOTag(curOT + OTLEN - 1);	// This performs a DMA transfer to quickly send all the primitives off to the GPU
+
     // Flip buffer index
 	db = !db;
-	
+
 	// Wait for all drawing to complete
 	DrawSync(0);
 	
@@ -129,3 +137,14 @@ void display_finish()
 	// Enable display output, ResetGraph() disables it by default
 	SetDispMask(1);
 }
+
+void *display_allocPrim(size_t size)
+{
+    if (nextpri + size > primbuff[db] + PRIMBUFLEN)
+        return NULL;
+
+    // TODO: maybe add a bounds check?
+    void *prim = nextpri;
+    nextpri += size;
+    return prim;
+}
diff --git a/display.h b/display.h
index 15d7f21..5b88d96 100644
--- a/display.h
+++ b/display.h
@@ -6,8 +6,13 @@
 
 #define OTLEN	1024
 
+extern MATRIX vp_matrix;
+extern u_long *curOT;
+
 void display_init();
 void display_start();
 void display_finish();
 
+void *display_allocPrim(size_t size);
+
 #endif  // __DISPLAY_H__
diff --git a/main.c b/main.c
index f15b675..2613dc3 100644
--- a/main.c
+++ b/main.c
@@ -3,12 +3,16 @@
 #include "display.h"
 #include "time.h"
 #include "asset.h"
+#include "world.h"
 
 extern u_long tim_e1m1[];
-extern u_long tim_e2m2[];
+extern u_long bsp_test[];
 
-VECTOR cam_pos = { 0, -400, 100 };
-SVECTOR cam_rot = { 0 };
+VECTOR cam_pos = { 2176, 1152, 128 };	// START
+//VECTOR cam_pos = { 1920, -1408, 352 };	// E1M1
+SVECTOR cam_rot = { 0, 0, 0 };
+
+world_t world;
 
 // BSP face rendering:
 // - Gather vertex data from face start index + length
@@ -24,8 +28,9 @@ void init(void)
 	input_init();
 	display_init();
 
-	asset_loadTexture(tim_e1m1, NULL);
-	asset_loadTexture(tim_e2m2, NULL);
+	//asset_loadTexture(tim_e1m1, NULL);
+
+	world_load(bsp_test, &world);
 }
 
 // Main function, program entrypoint
@@ -40,8 +45,10 @@ int main(int argc, const char *argv[])
 		input_process();
 
 		display_start();
-	
+
 		// Draw stuff
+		world_draw(&world);
+	
 		FntPrint(-1, "Camera pos = (%d, %d, %d) rot = (%d, %d, %d)\n", cam_pos.vx, cam_pos.vy, cam_pos.vz, cam_rot.vx, cam_rot.vy, cam_rot.vz);
 		FntFlush(-1);
 		
diff --git a/ps1bsp.h b/ps1bsp.h
new file mode 100755
index 0000000..e795033
--- /dev/null
+++ b/ps1bsp.h
@@ -0,0 +1,97 @@
+#ifndef __PS1BSP_H__
+#define __PS1BSP_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+Probable rendering process:
+- Determine visible leaves based on PVS and frustum culling, the usual.
+- Chain together faces/polygons to be drawn. Possibly grouped by texture ID?
+  Texture chaining might improve performance by making better use of texture cache, but we'll still be separating polygons based on depth anyway, so the advantage is questionable.
+  Texture chaining should probably be the last optimization we try.
+- Tessellate polygons close to the camera by recursively cutting edges in two, up to X times based on camera distance/screen size. Possibly GTE can aid with averaging coordinates? => look at GPF/GPL (general purpose interpolation)
+- Collect all vertices that need to be transformed, put them through GTE, update lighting values if needed, and cache the results.
+  (It may not be worth it to collect and precalculate vertices, as keeping track of all the administration also comes at a considerable cost.)
+- Draw all the (tessellated) polygons using the precalculated vertex positions. Use GTE to calculate average depth and order polygons.
+  Note: we may not have to calculate average depth for BSP polygons, as the leafs already provide an ordering, and leafs are convex so there is no need to sort the polygons within.
+  We do however need some kind of depth value per leaf to insert alias models at the correct positions in the ordering table.
+*/
+
+typedef struct
+{
+	unsigned short numVertices;
+	unsigned short numTriangles;
+	unsigned short numFaces;
+} ps1bsp_header_t;
+
+typedef struct
+{
+	unsigned char w, h;			// These may be necessary for scaling UVs, especially since we use a mix of mip0 and mip1 textures
+	int tpage;					// Texture page in PS1 VRAM (precalculated when generating the texture atlas)
+	short uoffs, voffs;			// Texture coordinate offset within the texture page
+	unsigned short nextframe;	// If non-zero, the texture is animated and this points to the next texture in the sequence
+} ps1bsp_texture_t;
+
+// This matches the SVECTOR data type, using the extra padding to store vertex color data.
+// The full range and precision required cannot be stored in just shorts, so we make use of a floating origin stored in the BSP leafs.
+// With this the higher-order bits of each vertex position are calculated into the model-view matrix, giving good precision for polygons near the camera.
+typedef struct
+{
+	short x;
+	short y;
+	short z;
+	unsigned char baseLight, finalLight;	// Used for gouraud shading based on static lightmap data
+	
+	// Sampled color value from the face texture, for untextured gouraud shaded drawing
+	unsigned char a : 1;	// 0 = opaque, 1 = semi-transparent
+	unsigned char r : 5;
+	unsigned char g : 5;
+	unsigned char b : 5;
+} ps1bsp_vertex_t;
+
+// Instead of edges as in the original BSP format, we store triangles for easy consumption by the PS1
+// Note: it may actually be more efficient to render quads for faces with 4+ vertices
+typedef struct
+{
+	unsigned short vertex0;
+	unsigned short vertex1;
+	unsigned short vertex2;
+} ps1bsp_triangle_t;
+
+typedef struct
+{
+	unsigned short firstTriangleId;	// TODO: could also just do first-index, num-indices here. No real need for a triangle_t struct.
+	unsigned short numTriangles;
+
+	unsigned short firstQuadId;		// For if/when we decide to add quads to the mix
+	unsigned short numQuads;
+} ps1bsp_face_t;
+
+// Pre-parsed and encoded entity data (this runs the risk of becoming too bloated)
+typedef struct
+{
+	unsigned short classtype;	// Hash of the original classname
+	short angle[3];				// Can store both mangle (all axes) and just angle (Z-axis rotation only)
+	int origin[3];				// In 12-bit fixed point coordinates
+	unsigned int spawnflags;
+	unsigned short messageId;	// Index into a pool of pre-defined messages
+} ps1bsp_entity_t;
+
+typedef struct
+{
+	unsigned short length;
+	char message[];
+} ps1bsp_message_t;
+
+typedef struct
+{
+	// TODO: add floating origin position, so face vertices can be moved relative to the camera position
+} ps1bsp_leaf_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	// __PS1BSP_H__
diff --git a/qmath.c b/qmath.c
new file mode 100644
index 0000000..2ffb5e9
--- /dev/null
+++ b/qmath.c
@@ -0,0 +1,34 @@
+#include "common.h"
+
+MATRIX *RotMatrixQ(SVECTOR *r, MATRIX *m)
+{
+	short s[3],c[3];
+	MATRIX tm[3];
+
+	s[0] = isin(r->vx);		s[1] = isin(r->vy);		s[2] = isin(r->vz);
+	c[0] = icos(r->vx);		c[1] = icos(r->vy);		c[2] = icos(r->vz);
+
+	// mY (roll)
+	m->m[0][0] = c[1];	m->m[0][1] = 0;		m->m[0][2] = s[1];
+	m->m[1][0] = 0;		m->m[1][1] = ONE;	m->m[1][2] = 0;
+	m->m[2][0] = -s[1];	m->m[2][1] = 0;		m->m[2][2] = c[1];
+
+	// mX (pitch)
+	tm[0].m[0][0] = ONE;		tm[0].m[0][1] = 0;			tm[0].m[0][2] = 0;
+	tm[0].m[1][0] = 0;			tm[0].m[1][1] = c[0];		tm[0].m[1][2] = -s[0];
+	tm[0].m[2][0] = 0;			tm[0].m[2][1] = s[0];		tm[0].m[2][2] = c[0];
+
+    // mZ (yaw)
+	tm[1].m[0][0] = c[2];	tm[1].m[0][1] = -s[2];	tm[1].m[0][2] = 0;
+	tm[1].m[1][0] = s[2];	tm[1].m[1][1] = c[2];	tm[1].m[1][2] = 0;
+	tm[1].m[2][0] = 0;		tm[1].m[2][1] = 0;		tm[1].m[2][2] = ONE;
+
+	PushMatrix();
+
+	MulMatrix0( m, &tm[1], &tm[2] );
+	MulMatrix0( &tm[2], &tm[0], m );
+
+	PopMatrix();
+
+	return m;
+}
diff --git a/qmath.h b/qmath.h
new file mode 100644
index 0000000..8724f54
--- /dev/null
+++ b/qmath.h
@@ -0,0 +1,6 @@
+#ifndef __QMATH_H__
+#define __QMATH_H__
+
+MATRIX *RotMatrixQ(SVECTOR *r, MATRIX *m);
+
+#endif // __QMATH_H__
diff --git a/test.ps1bsp b/test.ps1bsp
new file mode 100755
index 0000000..9497db6
Binary files /dev/null and b/test.ps1bsp differ
diff --git a/world.c b/world.c
new file mode 100644
index 0000000..f87da95
--- /dev/null
+++ b/world.c
@@ -0,0 +1,95 @@
+#include "common.h"
+#include "world.h"
+#include "display.h"
+
+#include <inline_c.h>
+
+static CVECTOR colors[] = 
+{
+    { 255, 0, 0 },
+    { 0, 255, 0 },
+    { 0, 0, 255 },
+    { 255, 255, 0 },
+    { 255, 0, 255 },
+    { 0, 255, 255 },
+    { 128, 255, 0 },
+    { 255, 128, 0 },
+    { 128, 0, 255 },
+    { 255, 0, 128 },
+    { 0, 128, 255 },
+    { 0, 255, 128 },
+};
+static const int numColors = sizeof(colors) / sizeof(CVECTOR);
+
+void world_load(const u_long *data, world_t *world)
+{
+    const char *bytes = (const char*)data;
+
+    world->header = (ps1bsp_header_t*)bytes;
+    bytes += sizeof(ps1bsp_header_t);
+
+    world->vertices = (ps1bsp_vertex_t*)bytes;
+    bytes += sizeof(ps1bsp_vertex_t) * world->header->numVertices;
+
+    world->triangles = (ps1bsp_triangle_t*)bytes;
+    bytes += sizeof(ps1bsp_triangle_t) * world->header->numTriangles;
+
+    world->faces = (ps1bsp_face_t*)bytes;
+    bytes += sizeof(ps1bsp_face_t) * world->header->numFaces;
+}
+
+void world_draw(const world_t *world)
+{
+    int p;
+
+    // The world doesn't move, so we just set the camera view-projection matrix
+    gte_SetRotMatrix(&vp_matrix);
+	gte_SetTransMatrix(&vp_matrix);
+
+    for (int faceIdx = 0; faceIdx < world->header->numFaces; ++faceIdx)
+    {
+        const ps1bsp_face_t *face = &world->faces[faceIdx];
+        const CVECTOR *col = &colors[faceIdx % numColors];
+
+        for (int triangleIdx = 0; triangleIdx < face->numTriangles; ++triangleIdx)
+        {
+            const ps1bsp_triangle_t *tri = &world->triangles[face->firstTriangleId + triangleIdx];
+
+            // Naively draw the triangle with GTE, nothing special or optimized about this
+            SVECTOR *v0 = (SVECTOR*)&world->vertices[tri->vertex0];
+            SVECTOR *v1 = (SVECTOR*)&world->vertices[tri->vertex1];
+            SVECTOR *v2 = (SVECTOR*)&world->vertices[tri->vertex2];
+
+            gte_ldv3(v0, v1, v2);
+            gte_rtpt();     // Rotation, translation, perspective projection
+
+            // Normal clipping for backface culling
+            gte_nclip();
+            gte_stopz(&p);
+            if (p < 0)
+                continue;
+
+            // Average Z for depth sorting and culling
+            gte_avsz3();
+            gte_stotz(&p);
+            unsigned short depth = p >> 2;
+            if (depth <= 0 || depth >= OTLEN)
+                continue;
+
+            // Draw a flat-shaded untextured colored triangle
+            POLY_F3 *poly = (POLY_F3*)display_allocPrim(sizeof(POLY_F3));
+            if (poly == NULL)
+                break;
+
+            setPolyF3(poly);
+
+            gte_stsxy3_f3(poly);
+
+            poly->r0 = col->r;
+            poly->g0 = col->g;
+            poly->b0 = col->b;
+
+            addPrim(curOT + depth, poly);
+        }
+    }
+}
diff --git a/world.h b/world.h
new file mode 100644
index 0000000..af210cd
--- /dev/null
+++ b/world.h
@@ -0,0 +1,17 @@
+#ifndef __WORLD_H__
+#define __WORLD_H__
+
+#include "ps1bsp.h"
+
+typedef struct
+{
+    ps1bsp_header_t *header;
+    ps1bsp_vertex_t *vertices;
+    ps1bsp_triangle_t *triangles;
+    ps1bsp_face_t *faces;
+} world_t;
+
+void world_load(const u_long *data, world_t *world);
+void world_draw(const world_t *world);
+
+#endif  // __WORLD_H__