From 6cbd201b63efa46d68d58df183007fdfb5438753 Mon Sep 17 00:00:00 2001 From: Mid <> Date: Sun, 12 Oct 2025 20:46:56 +0300 Subject: [PATCH] Huge load 1. k3Update added, which must be called per frame. 2. Added GPU timers for profiling. 3. Added ARB_direct_state_access support because Mesa is being a bitch again. 4. Cached uniform locations in an open-addressing hash table. Unfortunately, I'm pretty sure there was no performance increase, at least on my development machine, but it shouldn't hurt anywhere else. --- src/k3.c | 356 ++++++++++++++++++++++++++++++---------------- src/k3.h | 4 +- src/k3_internal.h | 52 ++++++- src/k3batch.c | 16 ++- 4 files changed, 293 insertions(+), 135 deletions(-) diff --git a/src/k3.c b/src/k3.c index 44dd7e8..07635bb 100644 --- a/src/k3.c +++ b/src/k3.c @@ -1,6 +1,7 @@ #include"k3_internal.h" #include"gl.h" +#include"komihash.h" #include #include @@ -691,7 +692,7 @@ struct Renderable { struct k3Mdl *mdl; struct k3Mesh *mesh; struct k3AnimationBone *bones; - GLhandleARB glsl; + struct k3GLSLP *glslp; GLuint arbvp; GLuint arbfp; } __attribute__((aligned(16))); @@ -732,7 +733,7 @@ static intmax_t rblecompar1(const void *a, const void *b) { } if(i == 0) { - i = ((const struct Renderable*) a)->glsl - ((const struct Renderable*) b)->glsl; + i = ((const struct Renderable*) a)->glslp->handle - ((const struct Renderable*) b)->glslp->handle; } if(i == 0) { @@ -769,7 +770,7 @@ void k3Batch(struct k3Mdl *mdl, mat4 modelmat, struct k3AnimationBone *bones) { r->mesh = &mdl->meshes[mesh]; glm_mat4_copy(modelmat, r->modelmat); r->bones = bones; - r->glsl = GL_FROM_K3GLSL(r->mesh->mat.passes[0].glsl.hp); + r->glslp = r->mesh->mat.passes[0].glsl.hp; r->arbvp = GL_FROM_K3ARBVP(r->mesh->mat.passes[0].arbvp.vp); r->arbfp = GL_FROM_K3ARBFP(r->mesh->mat.passes[0].arbfp.fp); } @@ -783,9 +784,9 @@ static void setup_ff_projection(mat4 proj) { } } -static void setup_core_projection(GLuint prog, mat4 proj) { +static void setup_core_projection(struct k3GLSLP *p, mat4 proj) { if(k3IsCore) { - GLint u = glGetUniformLocation(prog, "u_projection"); + GLint u = k3ProgramGetUId(p, "u_projection"); if(u != -1) { glUniformMatrix4fv(u, 1, GL_FALSE, (float*) proj); @@ -844,31 +845,29 @@ static void setup_arbprog_globals() { } } -static void setup_glsl_globals(GLuint bound, mat4 view) { +static void setup_glsl_globals(struct k3GLSLP *p, mat4 view) { if(!k3IsCore) { - glUniform1fARB(glGetUniformLocationARB(bound, "u_time"), Time); - glUniform3fARB(glGetUniformLocationARB(bound, "u_cam"), CamMat[3][0], CamMat[3][1], CamMat[3][2]); - glUniformMatrix4fvARB(glGetUniformLocationARB(bound, "u_view"), 1, GL_FALSE, (float*) view); + glUniform1fARB(k3ProgramGetUId(p, "u_time"), Time); + glUniform3fARB(k3ProgramGetUId(p, "u_cam"), CamMat[3][0], CamMat[3][1], CamMat[3][2]); + glUniformMatrix4fvARB(k3ProgramGetUId(p, "u_view"), 1, GL_FALSE, (float*) view); } else { - glUniform1f(glGetUniformLocationARB(bound, "u_time"), Time); - glUniform3f(glGetUniformLocationARB(bound, "u_cam"), CamMat[3][0], CamMat[3][1], CamMat[3][2]); - glUniformMatrix4fv(glGetUniformLocationARB(bound, "u_view"), 1, GL_FALSE, (float*) view); + glUniform1f(k3ProgramGetUId(p, "u_time"), Time); + glUniform3f(k3ProgramGetUId(p, "u_cam"), CamMat[3][0], CamMat[3][1], CamMat[3][2]); + glUniformMatrix4fv(k3ProgramGetUId(p, "u_view"), 1, GL_FALSE, (float*) view); } } -static void setup_glsl_mat_uniforms(GLhandleARB bound, struct k3Mat *mat, int pass) { +static void setup_glsl_mat_uniforms(struct k3GLSLP *p, struct k3Mat *mat, int pass) { for(int u = 0; u < mat->passes[pass].glsl.uCount; u++) { + GLuint id = k3ProgramGetUId(p, mat->passes[pass].glsl.u[u].name); + if(mat->passes[pass].glsl.u[u].type == k3_MAT_UNIFORM_I1) { - GLuint id = glGetUniformLocationARB(bound, mat->passes[pass].glsl.u[u].name); - if(!k3IsCore) { glUniform1iARB(id, mat->passes[pass].glsl.u[u].i1); } else { glUniform1i(id, mat->passes[pass].glsl.u[u].i1); } } else if(mat->passes[pass].glsl.u[u].type == k3_MAT_UNIFORM_F1) { - GLuint id = glGetUniformLocationARB(bound, mat->passes[pass].glsl.u[u].name); - if(!k3IsCore) { glUniform1fARB(id, mat->passes[pass].glsl.u[u].f1); } else { @@ -878,20 +877,20 @@ static void setup_glsl_mat_uniforms(GLhandleARB bound, struct k3Mat *mat, int pa } } -static void setup_glsl_model_uniforms(GLuint bound, float *modelmat) { +static void setup_glsl_model_uniforms(struct k3GLSLP *p, float *modelmat) { mat4 invmodel; glm_mat4_inv(modelmat, invmodel); if(!k3IsCore) { - glUniformMatrix4fvARB(glGetUniformLocationARB(bound, "u_model"), 1, GL_FALSE, (float*) modelmat); - glUniformMatrix4fvARB(glGetUniformLocationARB(bound, "u_imodel"), 1, GL_FALSE, (float*) invmodel); + glUniformMatrix4fvARB(k3ProgramGetUId(p, "u_model"), 1, GL_FALSE, (float*) modelmat); + glUniformMatrix4fvARB(k3ProgramGetUId(p, "u_imodel"), 1, GL_FALSE, (float*) invmodel); } else { - glUniformMatrix4fv(glGetUniformLocationARB(bound, "u_model"), 1, GL_FALSE, (float*) modelmat); - glUniformMatrix4fv(glGetUniformLocationARB(bound, "u_imodel"), 1, GL_FALSE, (float*) invmodel); + glUniformMatrix4fv(k3ProgramGetUId(p, "u_model"), 1, GL_FALSE, (float*) modelmat); + glUniformMatrix4fv(k3ProgramGetUId(p, "u_imodel"), 1, GL_FALSE, (float*) invmodel); } } -static void setup_glsl_lighting_uniforms(GLuint bound, int lightsStart, int lightsCount) { +static void setup_glsl_lighting_uniforms(struct k3GLSLP *p, int lightsStart, int lightsCount) { if(lightsCount > 4) { lightsCount = 4; k3Log(k3_ERR, "Max 4 lights per pass"); @@ -954,19 +953,19 @@ static void setup_glsl_lighting_uniforms(GLuint bound, int lightsStart, int ligh } if(!k3IsCore) { - glUniform4fvARB(glGetUniformLocationARB(bound, "u_BaseLightSettings1"), 4, (float*) settings1); - glUniform4fvARB(glGetUniformLocationARB(bound, "u_BaseLightSettings2"), 4, (float*) settings2); - glUniform4fvARB(glGetUniformLocationARB(bound, "u_BaseLightColors"), 4, (float*) colors); - glUniform4fvARB(glGetUniformLocationARB(bound, "u_AmbientLight"), 1, (float*) ambient); + glUniform4fvARB(k3ProgramGetUId(p, "u_BaseLightSettings1"), 4, (float*) settings1); + glUniform4fvARB(k3ProgramGetUId(p, "u_BaseLightSettings2"), 4, (float*) settings2); + glUniform4fvARB(k3ProgramGetUId(p, "u_BaseLightColors"), 4, (float*) colors); + glUniform4fvARB(k3ProgramGetUId(p, "u_AmbientLight"), 1, (float*) ambient); } else { - glUniform4fv(glGetUniformLocationARB(bound, "u_BaseLightSettings1"), 4, (float*) settings1); - glUniform4fv(glGetUniformLocationARB(bound, "u_BaseLightSettings2"), 4, (float*) settings2); - glUniform4fv(glGetUniformLocationARB(bound, "u_BaseLightColors"), 4, (float*) colors); - glUniform4fv(glGetUniformLocationARB(bound, "u_AmbientLight"), 1, (float*) ambient); + glUniform4fv(k3ProgramGetUId(p, "u_BaseLightSettings1"), 4, (float*) settings1); + glUniform4fv(k3ProgramGetUId(p, "u_BaseLightSettings2"), 4, (float*) settings2); + glUniform4fv(k3ProgramGetUId(p, "u_BaseLightColors"), 4, (float*) colors); + glUniform4fv(k3ProgramGetUId(p, "u_AmbientLight"), 1, (float*) ambient); } } -static void setup_glsl_shadow_uniforms(GLuint bound, int atlasUnit, int lightsStart, int lightsCount) { +static void setup_glsl_shadow_uniforms(struct k3GLSLP *p, int atlasUnit, int lightsStart, int lightsCount) { if(lightsCount > 4) { lightsCount = 4; k3Log(k3_ERR, "Max 4 lights per pass"); @@ -977,7 +976,7 @@ static void setup_glsl_shadow_uniforms(GLuint bound, int atlasUnit, int lightsSt if(LightShadowIrregularMode) { assert(k3IsCore); - glUniform1i(glGetUniformLocation(bound, "u_pixelsinshadow"), 0); + glUniform1i(k3ProgramGetUId(p, "u_pixelsinshadow"), 0); } else { size_t vpi = 0; mat4 m[6]; @@ -1004,27 +1003,43 @@ static void setup_glsl_shadow_uniforms(GLuint bound, int atlasUnit, int lightsSt } if(!k3IsCore) { - glUniformMatrix4fvARB(glGetUniformLocationARB(bound, "u_shadows0vp"), vpi, GL_FALSE, (float*) m); - glUniform4fvARB(glGetUniformLocationARB(bound, "u_shadows0seg"), 4, (float*) seg); + glUniformMatrix4fvARB(k3ProgramGetUId(p, "u_shadows0vp"), vpi, GL_FALSE, (float*) m); + glUniform4fvARB(k3ProgramGetUId(p, "u_shadows0seg"), 4, (float*) seg); - glUniform1iARB(glGetUniformLocationARB(bound, "u_shadows0atlas"), atlasUnit); + glUniform1iARB(k3ProgramGetUId(p, "u_shadows0atlas"), atlasUnit); } else { - glUniformMatrix4fv(glGetUniformLocationARB(bound, "u_shadows0vp"), vpi, GL_FALSE, (float*) m); - glUniform4fv(glGetUniformLocationARB(bound, "u_shadows0seg"), 4, (float*) seg); + glUniformMatrix4fv(k3ProgramGetUId(p, "u_shadows0vp"), vpi, GL_FALSE, (float*) m); + glUniform4fv(k3ProgramGetUId(p, "u_shadows0seg"), 4, (float*) seg); - glUniform1i(glGetUniformLocationARB(bound, "u_shadows0atlas"), atlasUnit); + glUniform1i(k3ProgramGetUId(p, "u_shadows0atlas"), atlasUnit); } } } static int bind_mat_textures(struct k3Mat *mat, int pass) { int i; - if(GLAD_GL_EXT_direct_state_access) { + if(GLAD_GL_ARB_direct_state_access) { for(i = 0; i < k3_MAX_GLSL_UNITS; i++) { - glBindMultiTextureEXT(GL_TEXTURE0 + i, GL_TEXTURE_2D, GL_FROM_K3TEX(mat->passes[pass].units[i])); + GLuint tex = GL_FROM_K3TEX(mat->passes[pass].units[i]); + if(tex == 0) { + break; + } + glBindTextureUnit(i, tex); + } + } else if(GLAD_GL_EXT_direct_state_access) { + for(i = 0; i < k3_MAX_GLSL_UNITS; i++) { + GLuint tex = GL_FROM_K3TEX(mat->passes[pass].units[i]); + if(tex == 0) { + break; + } + glBindMultiTextureEXT(GL_TEXTURE0 + i, GL_TEXTURE_2D, tex); } } else { for(i = 0; i < k3_MAX_GLSL_UNITS; i++) { + GLuint tex = GL_FROM_K3TEX(mat->passes[pass].units[i]); + if(tex == 0) { + break; + } glActiveTexture(GL_TEXTURE0 + i); glBindTexture(GL_TEXTURE_2D, GL_FROM_K3TEX(mat->passes[pass].units[i])); } @@ -1041,7 +1056,9 @@ static int bind_shadow_texture(int textureUnit) { } return textureUnit; } else { - if(GLAD_GL_EXT_direct_state_access) { + if(GLAD_GL_ARB_direct_state_access) { + glBindTextureUnit(textureUnit, GL_FROM_K3TEX(ShadowAtlas->depth)); + } else if(GLAD_GL_EXT_direct_state_access) { glBindMultiTextureEXT(GL_TEXTURE0 + textureUnit, GL_TEXTURE_2D, GL_FROM_K3TEX(ShadowAtlas->depth)); } else { glActiveTexture(GL_TEXTURE0 + textureUnit); @@ -1051,16 +1068,16 @@ static int bind_shadow_texture(int textureUnit) { } } -static void enable_glsl_bones(GLuint bound, struct k3Mdl *mdl, struct k3AnimationBone *bones) { +static void enable_glsl_bones(struct k3GLSLP *p, struct k3Mdl *mdl, struct k3AnimationBone *bones) { GLint a0; GLint a1; if(!k3IsCore) { - a0 = glGetAttribLocationARB(bound, "a_boneids"); - a1 = glGetAttribLocationARB(bound, "a_boneweights"); + a0 = glGetAttribLocationARB(p->handle, "a_boneids"); + a1 = glGetAttribLocationARB(p->handle, "a_boneweights"); } else { - a0 = glGetAttribLocation(bound, "a_boneids"); - a1 = glGetAttribLocation(bound, "a_boneweights"); + a0 = glGetAttribLocation(p->handle, "a_boneids"); + a1 = glGetAttribLocation(p->handle, "a_boneweights"); } if((a0 == -1) != (a1 == -1)) { @@ -1093,9 +1110,9 @@ static void enable_glsl_bones(GLuint bound, struct k3Mdl *mdl, struct k3Animatio if(bones) { if(!k3IsCore) { - glUniform4fvARB(glGetUniformLocationARB(bound, "u_bonedata"), 2 * mdl->boneCount, (float*) bones); + glUniform4fvARB(k3ProgramGetUId(p, "u_bonedata"), 2 * mdl->boneCount, (float*) bones); } else { - glUniform4fv(glGetUniformLocationARB(bound, "u_bonedata"), 2 * mdl->boneCount, (float*) bones); + glUniform4fv(k3ProgramGetUId(p, "u_bonedata"), 2 * mdl->boneCount, (float*) bones); } } else { vec4 data[48] = {}; @@ -1104,19 +1121,19 @@ static void enable_glsl_bones(GLuint bound, struct k3Mdl *mdl, struct k3Animatio } if(!k3IsCore) { - glUniform4fvARB(glGetUniformLocationARB(bound, "u_bonedata"), 48, (float*) data); + glUniform4fvARB(k3ProgramGetUId(p, "u_bonedata"), 48, (float*) data); } else { - glUniform4fv(glGetUniformLocationARB(bound, "u_bonedata"), 48, (float*) data); + glUniform4fv(k3ProgramGetUId(p, "u_bonedata"), 48, (float*) data); } } } -static void disable_glsl_bones(struct k3Mdl *mdl, GLuint bound) { +static void disable_glsl_bones(struct k3Mdl *mdl, struct k3GLSLP *p) { GLint a0; GLint a1; - a0 = glGetAttribLocationARB(bound, "a_boneids"); - a1 = glGetAttribLocationARB(bound, "a_boneweights"); + a0 = glGetAttribLocationARB(p->handle, "a_boneids"); + a1 = glGetAttribLocationARB(p->handle, "a_boneweights"); if(a0 != -1) { if(!k3IsCore) { @@ -1129,7 +1146,7 @@ static void disable_glsl_bones(struct k3Mdl *mdl, GLuint bound) { } } -static void enable_glsl_tangents(GLuint bound, struct k3Mdl *mdl) { +static void enable_glsl_tangents(struct k3GLSLP *p, struct k3Mdl *mdl) { if(mdl->offT == -1) { return; } @@ -1137,9 +1154,9 @@ static void enable_glsl_tangents(GLuint bound, struct k3Mdl *mdl) { GLint a; if(!k3IsCore) { - a = glGetAttribLocationARB(bound, "a_tangent"); + a = glGetAttribLocationARB(p->handle, "a_tangent"); } else { - a = glGetAttribLocation(bound, "a_tangent"); + a = glGetAttribLocation(p->handle, "a_tangent"); } if(a == -1) { @@ -1155,7 +1172,7 @@ static void enable_glsl_tangents(GLuint bound, struct k3Mdl *mdl) { } } -static void disable_glsl_tangents(GLuint bound, struct k3Mdl *mdl) { +static void disable_glsl_tangents(struct k3GLSLP *p, struct k3Mdl *mdl) { if(mdl->offT == -1) { return; } @@ -1163,9 +1180,9 @@ static void disable_glsl_tangents(GLuint bound, struct k3Mdl *mdl) { GLint a; if(!k3IsCore) { - a = glGetAttribLocationARB(bound, "a_tangent"); + a = glGetAttribLocationARB(p->handle, "a_tangent"); } else { - a = glGetAttribLocation(bound, "a_tangent"); + a = glGetAttribLocation(p->handle, "a_tangent"); } if(a == -1) { @@ -1179,7 +1196,7 @@ static void disable_glsl_tangents(GLuint bound, struct k3Mdl *mdl) { } } -static void enable_vertex_buffers(struct k3Mdl *mdl, GLuint prog) { +static void enable_vertex_buffers(struct k3Mdl *mdl, struct k3GLSLP *p) { if(!k3IsCore) { glEnableClientState(GL_VERTEX_ARRAY); glEnableClientState(GL_NORMAL_ARRAY); @@ -1195,11 +1212,11 @@ static void enable_vertex_buffers(struct k3Mdl *mdl, GLuint prog) { } else { glColor4f(1, 1, 1, 1); } - } else if(prog) { - GLint aPos = glGetAttribLocation(prog, "a_pos"); - GLint aNormal = glGetAttribLocation(prog, "a_normal"); - GLint aUv = glGetAttribLocation(prog, "a_uv"); - GLint aColor = glGetAttribLocation(prog, "a_color"); + } else if(p) { + GLint aPos = glGetAttribLocation(p->handle, "a_pos"); + GLint aNormal = glGetAttribLocation(p->handle, "a_normal"); + GLint aUv = glGetAttribLocation(p->handle, "a_uv"); + GLint aColor = glGetAttribLocation(p->handle, "a_color"); if(aPos != -1) { glEnableVertexAttribArray(aPos); @@ -1225,7 +1242,7 @@ static void enable_vertex_buffers(struct k3Mdl *mdl, GLuint prog) { } } -static void disable_vertex_buffers(struct k3Mdl *mdl, GLuint prog) { +static void disable_vertex_buffers(struct k3Mdl *mdl, struct k3GLSLP *p) { if(!k3IsCore) { glDisableClientState(GL_VERTEX_ARRAY); glDisableClientState(GL_NORMAL_ARRAY); @@ -1234,11 +1251,11 @@ static void disable_vertex_buffers(struct k3Mdl *mdl, GLuint prog) { if(mdl->offC != -1) { glDisableClientState(GL_COLOR_ARRAY); } - } else if(prog) { - GLint aPos = glGetAttribLocation(prog, "a_pos"); - GLint aNormal = glGetAttribLocation(prog, "a_normal"); - GLint aUv = glGetAttribLocation(prog, "a_uv"); - GLint aColor = glGetAttribLocation(prog, "a_color"); + } else if(p) { + GLint aPos = glGetAttribLocation(p->handle, "a_pos"); + GLint aNormal = glGetAttribLocation(p->handle, "a_normal"); + GLint aUv = glGetAttribLocation(p->handle, "a_uv"); + GLint aColor = glGetAttribLocation(p->handle, "a_color"); if(aPos != -1) { glDisableVertexAttribArray(aPos); @@ -1387,6 +1404,8 @@ static bool outside_frustum(vec3 *aabb, float *modelmat, vec4 *frustum) { } static void forward_subpass(mat4 projection, mat4 view, int transparent, int lightsStart, int lightsCount, size_t rbleStart, size_t rbleEnd) { + struct k3Timer timer = k3StartTimer("forward_subpass"); + setup_ff_lights(view, lightsStart, lightsCount); mat4 viewProj; @@ -1395,7 +1414,7 @@ static void forward_subpass(mat4 projection, mat4 view, int transparent, int lig vec4 cameraFrustum[6]; glm_frustum_planes(viewProj, cameraFrustum); - GLhandleARB lastGLSL = -1; + struct k3GLSLP *lastGLSLP = NULL; GLuint lastVP = 0, lastFP = 0; struct k3Mat *lastMaterial = NULL; int lastAdditive = -1; @@ -1406,7 +1425,7 @@ static void forward_subpass(mat4 projection, mat4 view, int transparent, int lig struct k3Mesh *mesh = renderQueue[rble].mesh; float *modelmat = (float*) renderQueue[rble].modelmat; struct k3AnimationBone *bones = renderQueue[rble].bones; - GLhandleARB glsl = renderQueue[rble].glsl; + struct k3GLSLP *glslp = renderQueue[rble].glslp; GLuint arbvp = renderQueue[rble].arbvp; GLuint arbfp = renderQueue[rble].arbfp; @@ -1459,35 +1478,35 @@ static void forward_subpass(mat4 projection, mat4 view, int transparent, int lig } } - if(glsl) { - if(lastGLSL != glsl) { + if(glslp) { + if(lastGLSLP != glslp) { if(k3IsCore) - glUseProgram(glsl); + glUseProgram(glslp->handle); else - glUseProgramObjectARB(glsl); + glUseProgramObjectARB(glslp->handle); - lastGLSL = glsl; + lastGLSLP = glslp; - setup_glsl_globals(glsl, view); + setup_glsl_globals(glslp, view); } - setup_core_projection(glsl, ProjMat); + setup_core_projection(glslp, ProjMat); - setup_glsl_mat_uniforms(glsl, mat, 0); - setup_glsl_shadow_uniforms(glsl, mat->passes[0].unitsUsed, lightsStart, lightsCount); - setup_glsl_lighting_uniforms(glsl, lightsStart, lightsCount); + setup_glsl_mat_uniforms(glslp, mat, 0); + setup_glsl_shadow_uniforms(glslp, mat->passes[0].unitsUsed, lightsStart, lightsCount); + setup_glsl_lighting_uniforms(glslp, lightsStart, lightsCount); - setup_glsl_model_uniforms(glsl, modelmat); + setup_glsl_model_uniforms(glslp, modelmat); if(mat != lastMaterial) { bind_mat_textures(mat, 0); bind_shadow_texture(mat->passes[0].unitsUsed); } } else if(!k3IsCore) { - if(lastGLSL && GLAD_GL_ARB_shading_language_100) { + if(lastGLSLP && GLAD_GL_ARB_shading_language_100) { glUseProgramObjectARB(0); } - lastGLSL = 0; + lastGLSLP = NULL; if(lastVP != arbvp) { if(lastVP && !arbvp) { @@ -1563,9 +1582,9 @@ static void forward_subpass(mat4 projection, mat4 view, int transparent, int lig glBindBufferARB(GL_ARRAY_BUFFER_ARB, mdl->vstore->gl); glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, mdl->estore->gl); - if(glsl) { - enable_glsl_bones(glsl, mdl, bones); - enable_glsl_tangents(glsl, mdl); + if(glslp) { + enable_glsl_bones(glslp, mdl, bones); + enable_glsl_tangents(glslp, mdl); } if(k3IsSoftSkinning && bones) { @@ -1573,23 +1592,27 @@ static void forward_subpass(mat4 projection, mat4 view, int transparent, int lig } if(isnanf(mat->passes[0].aabb)) { - enable_vertex_buffers(mdl, glsl); + enable_vertex_buffers(mdl, glslp); glDrawElements(GL_TRIANGLES, mesh->idxNumber, GL_UNSIGNED_SHORT, (void*) (mesh->idxStart * 2)); - disable_vertex_buffers(mdl, glsl); + disable_vertex_buffers(mdl, glslp); } else { push_aabb(mat, 0, mdl); } - if(glsl) { - disable_glsl_tangents(glsl, mdl); - disable_glsl_bones(mdl, glsl); + if(glslp) { + disable_glsl_tangents(glslp, mdl); + disable_glsl_bones(mdl, glslp); } } + + k3EndTimer(timer); } void k3PassForward(mat4 projection, mat4 cam) { + struct k3Timer timer = k3StartTimer("k3PassForward"); + glm_mat4_copy(cam, CamMat); glm_mat4_copy(projection, ProjMat); queuesort(); @@ -1646,9 +1669,13 @@ void k3PassForward(mat4 projection, mat4 cam) { } } } + + k3EndTimer(timer); } void k3PassDepthOnly(mat4 projection, mat4 cam, int clear, int cull) { + struct k3Timer timer = k3StartTimer("k3PassDepthOnly"); + glm_mat4_copy(cam, CamMat); glm_mat4_copy(projection, ProjMat); queuesort(); @@ -1688,7 +1715,7 @@ void k3PassDepthOnly(mat4 projection, mat4 cam, int clear, int cull) { } GLuint lastVP = 0; - GLhandleARB lastGLSL = -1; + struct k3GLSLP *lastGLSLP = NULL; for(size_t rble = 0; rble < renderQueueSize; rble++) { struct k3Mdl *mdl = renderQueue[rble].mdl; @@ -1705,34 +1732,34 @@ void k3PassDepthOnly(mat4 projection, mat4 cam, int clear, int cull) { continue; } - GLhandleARB glsl = renderQueue[rble].glsl; + struct k3GLSLP *glslp = renderQueue[rble].glslp; GLuint arbvp = renderQueue[rble].arbvp; - if(glsl) { - if(lastGLSL != glsl) { + if(glslp) { + if(lastGLSLP != glslp) { if(k3IsCore) - glUseProgram(glsl); + glUseProgram(glslp->handle); else - glUseProgramObjectARB(glsl); + glUseProgramObjectARB(glslp->handle); - lastGLSL = glsl; + lastGLSLP = glslp; - setup_glsl_globals(glsl, view); + setup_glsl_globals(glslp, view); } - setup_core_projection(glsl, projection); + setup_core_projection(glslp, projection); - setup_glsl_mat_uniforms(glsl, mat, 0); + setup_glsl_mat_uniforms(glslp, mat, 0); - setup_glsl_model_uniforms(glsl, modelmat); + setup_glsl_model_uniforms(glslp, modelmat); } else if(!k3IsCore) { - if(lastGLSL && GLAD_GL_ARB_shading_language_100) { + if(lastGLSLP && GLAD_GL_ARB_shading_language_100) { if(k3IsCore) glUseProgram(0); else glUseProgramObjectARB(0); } - lastGLSL = 0; + lastGLSLP = NULL; if(arbvp != lastVP) { if(arbvp && !lastVP) { @@ -1759,8 +1786,8 @@ void k3PassDepthOnly(mat4 projection, mat4 cam, int clear, int cull) { glBindBufferARB(GL_ARRAY_BUFFER_ARB, mdl->vstore->gl); glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, mdl->estore->gl); - if(glsl) { - enable_glsl_bones(glsl, mdl, bones); + if(glslp) { + enable_glsl_bones(glslp, mdl, bones); } if(k3IsSoftSkinning && bones) { @@ -1768,21 +1795,23 @@ void k3PassDepthOnly(mat4 projection, mat4 cam, int clear, int cull) { } if(isnanf(mat->passes[0].aabb)) { - enable_vertex_buffers(mdl, glsl); + enable_vertex_buffers(mdl, glslp); glDrawElements(GL_TRIANGLES, mesh->idxNumber, GL_UNSIGNED_SHORT, (void*) (mesh->idxStart * 2)); - disable_vertex_buffers(mdl, glsl); + disable_vertex_buffers(mdl, glslp); } else { push_aabb(mat, 0, mdl); } - if(glsl) { - disable_glsl_bones(mdl, glsl); + if(glslp) { + disable_glsl_bones(mdl, glslp); } } glFrontFace(GL_CCW); + + k3EndTimer(timer); } static void split_frustum(mat4 proj, int cascades, mat4 croppeds[]) { @@ -2005,8 +2034,8 @@ static void pass_irregular(int passnum, mat4 mainproj, mat4 maincam, mat4 lightp glBindBufferARB(GL_ARRAY_BUFFER_ARB, mdl->vstore->gl); glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, mdl->estore->gl); - if(glsl) { - enable_glsl_bones(glsl, mdl, bones); + if(glslp) { + enable_glsl_bones(glslp, mdl, bones); } if(k3IsSoftSkinning && bones) { @@ -2014,17 +2043,17 @@ static void pass_irregular(int passnum, mat4 mainproj, mat4 maincam, mat4 lightp } if(isnanf(mat->passes[0].aabb)) { - enable_vertex_buffers(mdl, glsl); + enable_vertex_buffers(mdl, glslp); glDrawElements(GL_TRIANGLES, mesh->idxNumber, GL_UNSIGNED_SHORT, (void*) (mesh->idxStart * 2)); - disable_vertex_buffers(mdl, glsl); + disable_vertex_buffers(mdl, glslp); } else { push_aabb(mat, 0, mdl); } - if(glsl) { - disable_glsl_bones(mdl, glsl); + if(glslp) { + disable_glsl_bones(mdl, glslp); } } } @@ -2151,6 +2180,8 @@ void k3PassIrregular(struct k3Offscreen *mainview, mat4 mainproj, mat4 maincam) // Constructs shadowmap atlas, saves `offscr` for own use void k3PassShadowmap(mat4 projection, mat4 cam, struct k3Offscreen *offscr, float cellSizeLimit) { + struct k3Timer timer = k3StartTimer("k3PassShadowmap"); + glm_mat4_copy(projection, ProjMat); glm_mat4_copy(cam, CamMat); @@ -2235,6 +2266,8 @@ void k3PassShadowmap(mat4 projection, mat4 cam, struct k3Offscreen *offscr, floa } } k3EndOffscreen(offscr); + + k3EndTimer(timer); } void k3BatchClear() { @@ -2701,6 +2734,8 @@ struct k3GLSLG *k3ShaderGLSLG(const char *src_, const char*(*ldr)(const char *fn struct k3GLSLP *k3ProgramGLSL(struct k3GLSLV *vs, struct k3GLSLF *fs, struct k3GLSLG *gs) { GLhandleARB prog; + struct k3GLSLP *ret = calloc(1, sizeof(*ret)); + if(!k3IsCore) { prog = glCreateProgramObjectARB(); @@ -2730,13 +2765,31 @@ struct k3GLSLP *k3ProgramGLSL(struct k3GLSLV *vs, struct k3GLSLF *fs, struct k3G GLint uniformCount; glGetObjectParameterivARB(prog, GL_OBJECT_ACTIVE_UNIFORMS_ARB, &uniformCount); + ret->handle = prog; + ret->ucount = uniformCount; + ret->uloc = calloc(uniformCount, sizeof(*ret->uloc)); + ret->uname = calloc(uniformCount, sizeof(*ret->uname)); for(i = 0; i < uniformCount; i++) { int size; int type; glGetActiveUniformARB(prog, i, maxLength, NULL, &size, &type, name); - k3Log(k3_DEBUG, "%i %s", size, name); + if(strchr(name, '[')) { + *strchr(name, '[') = '\0'; + } + + uint64_t idx = komihash(name, strlen(name), 0); + for(size_t iter = 0; iter < uniformCount; iter++, idx++) { + idx = idx % uniformCount; + if(ret->uname[idx] == NULL) { + ret->uname[idx] = strdup(name); + ret->uloc[idx] = glGetUniformLocationARB(prog, name); + break; + } + } + + k3Log(k3_DEBUG, "%i %s @ %i", size, name, ret->uloc[idx]); } } else { prog = glCreateProgram(); @@ -2767,20 +2820,47 @@ struct k3GLSLP *k3ProgramGLSL(struct k3GLSLV *vs, struct k3GLSLF *fs, struct k3G GLint uniformCount; glGetProgramiv(prog, GL_ACTIVE_UNIFORMS, &uniformCount); + ret->handle = prog; + ret->ucount = uniformCount; + ret->uloc = calloc(uniformCount, sizeof(*ret->uloc)); + ret->uname = calloc(uniformCount, sizeof(*ret->uname)); + for(i = 0; i < uniformCount; i++) { int size; int type; glGetActiveUniform(prog, i, maxLength, NULL, &size, &type, name); - k3Log(k3_DEBUG, "%i %s", size, name); + if(strchr(name, '[')) { + *strchr(name, '[') = '\0'; + } + + uint64_t idx = komihash(name, strlen(name), 0); + for(size_t iter = 0; iter < uniformCount; iter++, idx++) { + idx = idx % uniformCount; + if(ret->uname[idx] == NULL) { + ret->uname[idx] = strdup(name); + ret->uloc[idx] = glGetUniformLocation(prog, name); + break; + } + } + + k3Log(k3_DEBUG, "%i %s @ %i", size, name, ret->uloc[idx]); } } - return (struct k3GLSLP*) (uintptr_t) prog; + return ret; } -uint16_t k3ProgramGetUId(struct k3GLSLP *p, const char *key) { - return glGetUniformLocationARB(GL_FROM_K3GLSL(p), key); +int16_t k3ProgramGetUId(struct k3GLSLP *p, const char *key) { + uint64_t idx = komihash(key, strlen(key), 0); + for(size_t i = 0; i < p->ucount; i++, idx++) { + idx = idx % p->ucount; + + if(!strcmp(p->uname[idx], key)) { + return p->uloc[idx]; + } + } + return -1; } struct k3ARBVP *k3ProgramARBVP(const char *src) { @@ -3083,3 +3163,29 @@ uint16_t k3TexSzMax() { return i; } + +struct k3Timer *k3Timers; +size_t k3TimerCount; +void k3Update() { + for(size_t ti = 0; ti < k3TimerCount;) { + struct k3Timer *t = &k3Timers[ti]; + + GLint b1 = 0, b2 = 0; + glGetQueryObjectiv(t->qStart, GL_QUERY_RESULT_AVAILABLE, &b1); + glGetQueryObjectiv(t->qEnd, GL_QUERY_RESULT_AVAILABLE, &b2); + + if(b1 && b2) { + uint64_t t1, t2; + glGetQueryObjectui64v(t->qStart, GL_QUERY_RESULT, &t1); + glGetQueryObjectui64v(t->qEnd, GL_QUERY_RESULT, &t2); + + k3Log(k3_TRACE, "Routine %s took %lu us", t->name, (t2 - t1) / 1000); + + glDeleteQueries(2, (GLuint*) t); + memmove(t, t + 1, sizeof(*t) * (k3TimerCount - ti - 1)); + k3TimerCount--; + } else { + ti++; + } + } +} diff --git a/src/k3.h b/src/k3.h index cfe5287..0f425a7 100644 --- a/src/k3.h +++ b/src/k3.h @@ -40,7 +40,7 @@ struct k3GLSLG *k3ShaderGLSLG(const char *src_, const char*(*ldr)(const char *fn struct k3GLSLP; struct k3GLSLP *k3ProgramGLSL(struct k3GLSLV*, struct k3GLSLF*, struct k3GLSLG*); -uint16_t k3ProgramGetUId(struct k3GLSLP*, const char *key); +int16_t k3ProgramGetUId(struct k3GLSLP*, const char *key); struct k3ARBVP; struct k3ARBVP *k3ProgramARBVP(const char *src); @@ -205,7 +205,7 @@ int k3CubemapTraditional(struct k3Tex*, mat4 proj, mat4 cam); void k3SetTime(float t); enum k3LogLevel { - k3_DEBUG, k3_INFO, k3_WARN, k3_ERR + k3_TRACE, k3_DEBUG, k3_INFO, k3_WARN, k3_ERR }; typedef void(*k3LogCallback)(enum k3LogLevel, const char *str, size_t len); void k3SetLogCallback(k3LogCallback); diff --git a/src/k3_internal.h b/src/k3_internal.h index 6b90155..3de6961 100644 --- a/src/k3_internal.h +++ b/src/k3_internal.h @@ -7,12 +7,13 @@ #include #include #include +#include #define GL_FROM_K3TEX(k3t) ((k3t) ? (k3t)->tex : 0) #define GL_FROM_K3MARCHER(k3m) ((GLuint) (uintptr_t) (k3m)) #define GL_FROM_K3ARBVP(k3m) ((GLuint) (uintptr_t) (k3m)) #define GL_FROM_K3ARBFP(k3m) ((GLuint) (uintptr_t) (k3m)) -#define GL_FROM_K3GLSL(k3m) ((GLuint) (uintptr_t) (k3m)) +#define GL_FROM_K3GLSL(k3m) (((struct k3GLSLP*) k3m)->handle) extern bool k3IsCore; @@ -84,3 +85,52 @@ struct k3Mdl { const char *debugname; }; + +struct k3GLSLP { + GLhandleARB handle; + + size_t ucount; + char **uname; + GLint *uloc; +}; + +struct k3Timer { + GLuint qStart; + GLuint qEnd; + char name[64]; +}; +extern struct k3Timer *k3Timers; +extern size_t k3TimerCount; +static inline struct k3Timer k3StartTimer(char *name) { + struct k3Timer t = {}; + + if(!GLAD_GL_ARB_timer_query) { + return; + } + + glGenQueries(2, (GLuint*) &t); + + strncpy(t.name, name, sizeof(t.name)); + + glQueryCounter(t.qStart, GL_TIMESTAMP); + + if(GLAD_GL_KHR_debug) { + glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, name); + } + + return t; +} +static inline void k3EndTimer(struct k3Timer t) { + if(!GLAD_GL_ARB_timer_query) { + return; + } + + glQueryCounter(t.qEnd, GL_TIMESTAMP); + + if(GLAD_GL_KHR_debug) { + glPopDebugGroup(); + } + + k3Timers = realloc(k3Timers, sizeof(*k3Timers) * (k3TimerCount + 1)); + k3Timers[k3TimerCount++] = t; +} diff --git a/src/k3batch.c b/src/k3batch.c index c696843..92baac5 100644 --- a/src/k3batch.c +++ b/src/k3batch.c @@ -132,7 +132,9 @@ void k3BatchFlush() { } if(k3IsCore) { - glUseProgram((GLuint) coreProg); + GLuint handle = GL_FROM_K3GLSL(coreProg); + + glUseProgram(handle); glUniform2f(coreUResolution, ResolutionX, ResolutionY); float *farr = alloca(SCount * 60 * sizeof(*farr)); @@ -211,14 +213,14 @@ void k3BatchFlush() { glBindBufferARB(GL_ARRAY_BUFFER_ARB, coreVBO); glBufferDataARB(GL_ARRAY_BUFFER_ARB, SCount * 60 * sizeof(*farr), farr, GL_DYNAMIC_DRAW); - glUniform1f(glGetUniformLocation((GLuint) coreProg, "u_texuse"), !!activeTex); + glUniform1f(k3ProgramGetUId(coreProg, "u_texuse"), !!activeTex); - glUniform1f(glGetUniformLocation((GLuint) coreProg, "u_borderradius"), activeBorderRadius); + glUniform1f(k3ProgramGetUId(coreProg, "u_borderradius"), activeBorderRadius); - GLint aPos = glGetAttribLocation((GLuint) coreProg, "a_pos"); - GLint aUv = glGetAttribLocation((GLuint) coreProg, "a_uv"); - GLint aColor = glGetAttribLocation((GLuint) coreProg, "a_color"); - GLint aSize = glGetAttribLocation((GLuint) coreProg, "a_size"); + GLint aPos = glGetAttribLocation(handle, "a_pos"); + GLint aUv = glGetAttribLocation(handle, "a_uv"); + GLint aColor = glGetAttribLocation(handle, "a_color"); + GLint aSize = glGetAttribLocation(handle, "a_size"); glEnableVertexAttribArray(aPos); glEnableVertexAttribArray(aUv);