Huge load

1. k3Update added, which must be called per frame.
2. Added GPU timers for profiling.
3. Added ARB_direct_state_access support because Mesa is being a bitch
again.
4. Cached uniform locations in an open-addressing hash table.
Unfortunately, I'm pretty sure there was no performance increase, at
least on my development machine, but it shouldn't hurt anywhere else.
This commit is contained in:
Mid 2025-10-12 20:46:56 +03:00
parent 2ebab9358d
commit 6cbd201b63
4 changed files with 293 additions and 135 deletions

356
src/k3.c
View File

@ -1,6 +1,7 @@
#include"k3_internal.h"
#include"gl.h"
#include"komihash.h"
#include<stdlib.h>
#include<string.h>
@ -691,7 +692,7 @@ struct Renderable {
struct k3Mdl *mdl;
struct k3Mesh *mesh;
struct k3AnimationBone *bones;
GLhandleARB glsl;
struct k3GLSLP *glslp;
GLuint arbvp;
GLuint arbfp;
} __attribute__((aligned(16)));
@ -732,7 +733,7 @@ static intmax_t rblecompar1(const void *a, const void *b) {
}
if(i == 0) {
i = ((const struct Renderable*) a)->glsl - ((const struct Renderable*) b)->glsl;
i = ((const struct Renderable*) a)->glslp->handle - ((const struct Renderable*) b)->glslp->handle;
}
if(i == 0) {
@ -769,7 +770,7 @@ void k3Batch(struct k3Mdl *mdl, mat4 modelmat, struct k3AnimationBone *bones) {
r->mesh = &mdl->meshes[mesh];
glm_mat4_copy(modelmat, r->modelmat);
r->bones = bones;
r->glsl = GL_FROM_K3GLSL(r->mesh->mat.passes[0].glsl.hp);
r->glslp = r->mesh->mat.passes[0].glsl.hp;
r->arbvp = GL_FROM_K3ARBVP(r->mesh->mat.passes[0].arbvp.vp);
r->arbfp = GL_FROM_K3ARBFP(r->mesh->mat.passes[0].arbfp.fp);
}
@ -783,9 +784,9 @@ static void setup_ff_projection(mat4 proj) {
}
}
static void setup_core_projection(GLuint prog, mat4 proj) {
static void setup_core_projection(struct k3GLSLP *p, mat4 proj) {
if(k3IsCore) {
GLint u = glGetUniformLocation(prog, "u_projection");
GLint u = k3ProgramGetUId(p, "u_projection");
if(u != -1) {
glUniformMatrix4fv(u, 1, GL_FALSE, (float*) proj);
@ -844,31 +845,29 @@ static void setup_arbprog_globals() {
}
}
static void setup_glsl_globals(GLuint bound, mat4 view) {
static void setup_glsl_globals(struct k3GLSLP *p, mat4 view) {
if(!k3IsCore) {
glUniform1fARB(glGetUniformLocationARB(bound, "u_time"), Time);
glUniform3fARB(glGetUniformLocationARB(bound, "u_cam"), CamMat[3][0], CamMat[3][1], CamMat[3][2]);
glUniformMatrix4fvARB(glGetUniformLocationARB(bound, "u_view"), 1, GL_FALSE, (float*) view);
glUniform1fARB(k3ProgramGetUId(p, "u_time"), Time);
glUniform3fARB(k3ProgramGetUId(p, "u_cam"), CamMat[3][0], CamMat[3][1], CamMat[3][2]);
glUniformMatrix4fvARB(k3ProgramGetUId(p, "u_view"), 1, GL_FALSE, (float*) view);
} else {
glUniform1f(glGetUniformLocationARB(bound, "u_time"), Time);
glUniform3f(glGetUniformLocationARB(bound, "u_cam"), CamMat[3][0], CamMat[3][1], CamMat[3][2]);
glUniformMatrix4fv(glGetUniformLocationARB(bound, "u_view"), 1, GL_FALSE, (float*) view);
glUniform1f(k3ProgramGetUId(p, "u_time"), Time);
glUniform3f(k3ProgramGetUId(p, "u_cam"), CamMat[3][0], CamMat[3][1], CamMat[3][2]);
glUniformMatrix4fv(k3ProgramGetUId(p, "u_view"), 1, GL_FALSE, (float*) view);
}
}
static void setup_glsl_mat_uniforms(GLhandleARB bound, struct k3Mat *mat, int pass) {
static void setup_glsl_mat_uniforms(struct k3GLSLP *p, struct k3Mat *mat, int pass) {
for(int u = 0; u < mat->passes[pass].glsl.uCount; u++) {
GLuint id = k3ProgramGetUId(p, mat->passes[pass].glsl.u[u].name);
if(mat->passes[pass].glsl.u[u].type == k3_MAT_UNIFORM_I1) {
GLuint id = glGetUniformLocationARB(bound, mat->passes[pass].glsl.u[u].name);
if(!k3IsCore) {
glUniform1iARB(id, mat->passes[pass].glsl.u[u].i1);
} else {
glUniform1i(id, mat->passes[pass].glsl.u[u].i1);
}
} else if(mat->passes[pass].glsl.u[u].type == k3_MAT_UNIFORM_F1) {
GLuint id = glGetUniformLocationARB(bound, mat->passes[pass].glsl.u[u].name);
if(!k3IsCore) {
glUniform1fARB(id, mat->passes[pass].glsl.u[u].f1);
} else {
@ -878,20 +877,20 @@ static void setup_glsl_mat_uniforms(GLhandleARB bound, struct k3Mat *mat, int pa
}
}
static void setup_glsl_model_uniforms(GLuint bound, float *modelmat) {
static void setup_glsl_model_uniforms(struct k3GLSLP *p, float *modelmat) {
mat4 invmodel;
glm_mat4_inv(modelmat, invmodel);
if(!k3IsCore) {
glUniformMatrix4fvARB(glGetUniformLocationARB(bound, "u_model"), 1, GL_FALSE, (float*) modelmat);
glUniformMatrix4fvARB(glGetUniformLocationARB(bound, "u_imodel"), 1, GL_FALSE, (float*) invmodel);
glUniformMatrix4fvARB(k3ProgramGetUId(p, "u_model"), 1, GL_FALSE, (float*) modelmat);
glUniformMatrix4fvARB(k3ProgramGetUId(p, "u_imodel"), 1, GL_FALSE, (float*) invmodel);
} else {
glUniformMatrix4fv(glGetUniformLocationARB(bound, "u_model"), 1, GL_FALSE, (float*) modelmat);
glUniformMatrix4fv(glGetUniformLocationARB(bound, "u_imodel"), 1, GL_FALSE, (float*) invmodel);
glUniformMatrix4fv(k3ProgramGetUId(p, "u_model"), 1, GL_FALSE, (float*) modelmat);
glUniformMatrix4fv(k3ProgramGetUId(p, "u_imodel"), 1, GL_FALSE, (float*) invmodel);
}
}
static void setup_glsl_lighting_uniforms(GLuint bound, int lightsStart, int lightsCount) {
static void setup_glsl_lighting_uniforms(struct k3GLSLP *p, int lightsStart, int lightsCount) {
if(lightsCount > 4) {
lightsCount = 4;
k3Log(k3_ERR, "Max 4 lights per pass");
@ -954,19 +953,19 @@ static void setup_glsl_lighting_uniforms(GLuint bound, int lightsStart, int ligh
}
if(!k3IsCore) {
glUniform4fvARB(glGetUniformLocationARB(bound, "u_BaseLightSettings1"), 4, (float*) settings1);
glUniform4fvARB(glGetUniformLocationARB(bound, "u_BaseLightSettings2"), 4, (float*) settings2);
glUniform4fvARB(glGetUniformLocationARB(bound, "u_BaseLightColors"), 4, (float*) colors);
glUniform4fvARB(glGetUniformLocationARB(bound, "u_AmbientLight"), 1, (float*) ambient);
glUniform4fvARB(k3ProgramGetUId(p, "u_BaseLightSettings1"), 4, (float*) settings1);
glUniform4fvARB(k3ProgramGetUId(p, "u_BaseLightSettings2"), 4, (float*) settings2);
glUniform4fvARB(k3ProgramGetUId(p, "u_BaseLightColors"), 4, (float*) colors);
glUniform4fvARB(k3ProgramGetUId(p, "u_AmbientLight"), 1, (float*) ambient);
} else {
glUniform4fv(glGetUniformLocationARB(bound, "u_BaseLightSettings1"), 4, (float*) settings1);
glUniform4fv(glGetUniformLocationARB(bound, "u_BaseLightSettings2"), 4, (float*) settings2);
glUniform4fv(glGetUniformLocationARB(bound, "u_BaseLightColors"), 4, (float*) colors);
glUniform4fv(glGetUniformLocationARB(bound, "u_AmbientLight"), 1, (float*) ambient);
glUniform4fv(k3ProgramGetUId(p, "u_BaseLightSettings1"), 4, (float*) settings1);
glUniform4fv(k3ProgramGetUId(p, "u_BaseLightSettings2"), 4, (float*) settings2);
glUniform4fv(k3ProgramGetUId(p, "u_BaseLightColors"), 4, (float*) colors);
glUniform4fv(k3ProgramGetUId(p, "u_AmbientLight"), 1, (float*) ambient);
}
}
static void setup_glsl_shadow_uniforms(GLuint bound, int atlasUnit, int lightsStart, int lightsCount) {
static void setup_glsl_shadow_uniforms(struct k3GLSLP *p, int atlasUnit, int lightsStart, int lightsCount) {
if(lightsCount > 4) {
lightsCount = 4;
k3Log(k3_ERR, "Max 4 lights per pass");
@ -977,7 +976,7 @@ static void setup_glsl_shadow_uniforms(GLuint bound, int atlasUnit, int lightsSt
if(LightShadowIrregularMode) {
assert(k3IsCore);
glUniform1i(glGetUniformLocation(bound, "u_pixelsinshadow"), 0);
glUniform1i(k3ProgramGetUId(p, "u_pixelsinshadow"), 0);
} else {
size_t vpi = 0;
mat4 m[6];
@ -1004,27 +1003,43 @@ static void setup_glsl_shadow_uniforms(GLuint bound, int atlasUnit, int lightsSt
}
if(!k3IsCore) {
glUniformMatrix4fvARB(glGetUniformLocationARB(bound, "u_shadows0vp"), vpi, GL_FALSE, (float*) m);
glUniform4fvARB(glGetUniformLocationARB(bound, "u_shadows0seg"), 4, (float*) seg);
glUniformMatrix4fvARB(k3ProgramGetUId(p, "u_shadows0vp"), vpi, GL_FALSE, (float*) m);
glUniform4fvARB(k3ProgramGetUId(p, "u_shadows0seg"), 4, (float*) seg);
glUniform1iARB(glGetUniformLocationARB(bound, "u_shadows0atlas"), atlasUnit);
glUniform1iARB(k3ProgramGetUId(p, "u_shadows0atlas"), atlasUnit);
} else {
glUniformMatrix4fv(glGetUniformLocationARB(bound, "u_shadows0vp"), vpi, GL_FALSE, (float*) m);
glUniform4fv(glGetUniformLocationARB(bound, "u_shadows0seg"), 4, (float*) seg);
glUniformMatrix4fv(k3ProgramGetUId(p, "u_shadows0vp"), vpi, GL_FALSE, (float*) m);
glUniform4fv(k3ProgramGetUId(p, "u_shadows0seg"), 4, (float*) seg);
glUniform1i(glGetUniformLocationARB(bound, "u_shadows0atlas"), atlasUnit);
glUniform1i(k3ProgramGetUId(p, "u_shadows0atlas"), atlasUnit);
}
}
}
static int bind_mat_textures(struct k3Mat *mat, int pass) {
int i;
if(GLAD_GL_EXT_direct_state_access) {
if(GLAD_GL_ARB_direct_state_access) {
for(i = 0; i < k3_MAX_GLSL_UNITS; i++) {
glBindMultiTextureEXT(GL_TEXTURE0 + i, GL_TEXTURE_2D, GL_FROM_K3TEX(mat->passes[pass].units[i]));
GLuint tex = GL_FROM_K3TEX(mat->passes[pass].units[i]);
if(tex == 0) {
break;
}
glBindTextureUnit(i, tex);
}
} else if(GLAD_GL_EXT_direct_state_access) {
for(i = 0; i < k3_MAX_GLSL_UNITS; i++) {
GLuint tex = GL_FROM_K3TEX(mat->passes[pass].units[i]);
if(tex == 0) {
break;
}
glBindMultiTextureEXT(GL_TEXTURE0 + i, GL_TEXTURE_2D, tex);
}
} else {
for(i = 0; i < k3_MAX_GLSL_UNITS; i++) {
GLuint tex = GL_FROM_K3TEX(mat->passes[pass].units[i]);
if(tex == 0) {
break;
}
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, GL_FROM_K3TEX(mat->passes[pass].units[i]));
}
@ -1041,7 +1056,9 @@ static int bind_shadow_texture(int textureUnit) {
}
return textureUnit;
} else {
if(GLAD_GL_EXT_direct_state_access) {
if(GLAD_GL_ARB_direct_state_access) {
glBindTextureUnit(textureUnit, GL_FROM_K3TEX(ShadowAtlas->depth));
} else if(GLAD_GL_EXT_direct_state_access) {
glBindMultiTextureEXT(GL_TEXTURE0 + textureUnit, GL_TEXTURE_2D, GL_FROM_K3TEX(ShadowAtlas->depth));
} else {
glActiveTexture(GL_TEXTURE0 + textureUnit);
@ -1051,16 +1068,16 @@ static int bind_shadow_texture(int textureUnit) {
}
}
static void enable_glsl_bones(GLuint bound, struct k3Mdl *mdl, struct k3AnimationBone *bones) {
static void enable_glsl_bones(struct k3GLSLP *p, struct k3Mdl *mdl, struct k3AnimationBone *bones) {
GLint a0;
GLint a1;
if(!k3IsCore) {
a0 = glGetAttribLocationARB(bound, "a_boneids");
a1 = glGetAttribLocationARB(bound, "a_boneweights");
a0 = glGetAttribLocationARB(p->handle, "a_boneids");
a1 = glGetAttribLocationARB(p->handle, "a_boneweights");
} else {
a0 = glGetAttribLocation(bound, "a_boneids");
a1 = glGetAttribLocation(bound, "a_boneweights");
a0 = glGetAttribLocation(p->handle, "a_boneids");
a1 = glGetAttribLocation(p->handle, "a_boneweights");
}
if((a0 == -1) != (a1 == -1)) {
@ -1093,9 +1110,9 @@ static void enable_glsl_bones(GLuint bound, struct k3Mdl *mdl, struct k3Animatio
if(bones) {
if(!k3IsCore) {
glUniform4fvARB(glGetUniformLocationARB(bound, "u_bonedata"), 2 * mdl->boneCount, (float*) bones);
glUniform4fvARB(k3ProgramGetUId(p, "u_bonedata"), 2 * mdl->boneCount, (float*) bones);
} else {
glUniform4fv(glGetUniformLocationARB(bound, "u_bonedata"), 2 * mdl->boneCount, (float*) bones);
glUniform4fv(k3ProgramGetUId(p, "u_bonedata"), 2 * mdl->boneCount, (float*) bones);
}
} else {
vec4 data[48] = {};
@ -1104,19 +1121,19 @@ static void enable_glsl_bones(GLuint bound, struct k3Mdl *mdl, struct k3Animatio
}
if(!k3IsCore) {
glUniform4fvARB(glGetUniformLocationARB(bound, "u_bonedata"), 48, (float*) data);
glUniform4fvARB(k3ProgramGetUId(p, "u_bonedata"), 48, (float*) data);
} else {
glUniform4fv(glGetUniformLocationARB(bound, "u_bonedata"), 48, (float*) data);
glUniform4fv(k3ProgramGetUId(p, "u_bonedata"), 48, (float*) data);
}
}
}
static void disable_glsl_bones(struct k3Mdl *mdl, GLuint bound) {
static void disable_glsl_bones(struct k3Mdl *mdl, struct k3GLSLP *p) {
GLint a0;
GLint a1;
a0 = glGetAttribLocationARB(bound, "a_boneids");
a1 = glGetAttribLocationARB(bound, "a_boneweights");
a0 = glGetAttribLocationARB(p->handle, "a_boneids");
a1 = glGetAttribLocationARB(p->handle, "a_boneweights");
if(a0 != -1) {
if(!k3IsCore) {
@ -1129,7 +1146,7 @@ static void disable_glsl_bones(struct k3Mdl *mdl, GLuint bound) {
}
}
static void enable_glsl_tangents(GLuint bound, struct k3Mdl *mdl) {
static void enable_glsl_tangents(struct k3GLSLP *p, struct k3Mdl *mdl) {
if(mdl->offT == -1) {
return;
}
@ -1137,9 +1154,9 @@ static void enable_glsl_tangents(GLuint bound, struct k3Mdl *mdl) {
GLint a;
if(!k3IsCore) {
a = glGetAttribLocationARB(bound, "a_tangent");
a = glGetAttribLocationARB(p->handle, "a_tangent");
} else {
a = glGetAttribLocation(bound, "a_tangent");
a = glGetAttribLocation(p->handle, "a_tangent");
}
if(a == -1) {
@ -1155,7 +1172,7 @@ static void enable_glsl_tangents(GLuint bound, struct k3Mdl *mdl) {
}
}
static void disable_glsl_tangents(GLuint bound, struct k3Mdl *mdl) {
static void disable_glsl_tangents(struct k3GLSLP *p, struct k3Mdl *mdl) {
if(mdl->offT == -1) {
return;
}
@ -1163,9 +1180,9 @@ static void disable_glsl_tangents(GLuint bound, struct k3Mdl *mdl) {
GLint a;
if(!k3IsCore) {
a = glGetAttribLocationARB(bound, "a_tangent");
a = glGetAttribLocationARB(p->handle, "a_tangent");
} else {
a = glGetAttribLocation(bound, "a_tangent");
a = glGetAttribLocation(p->handle, "a_tangent");
}
if(a == -1) {
@ -1179,7 +1196,7 @@ static void disable_glsl_tangents(GLuint bound, struct k3Mdl *mdl) {
}
}
static void enable_vertex_buffers(struct k3Mdl *mdl, GLuint prog) {
static void enable_vertex_buffers(struct k3Mdl *mdl, struct k3GLSLP *p) {
if(!k3IsCore) {
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_NORMAL_ARRAY);
@ -1195,11 +1212,11 @@ static void enable_vertex_buffers(struct k3Mdl *mdl, GLuint prog) {
} else {
glColor4f(1, 1, 1, 1);
}
} else if(prog) {
GLint aPos = glGetAttribLocation(prog, "a_pos");
GLint aNormal = glGetAttribLocation(prog, "a_normal");
GLint aUv = glGetAttribLocation(prog, "a_uv");
GLint aColor = glGetAttribLocation(prog, "a_color");
} else if(p) {
GLint aPos = glGetAttribLocation(p->handle, "a_pos");
GLint aNormal = glGetAttribLocation(p->handle, "a_normal");
GLint aUv = glGetAttribLocation(p->handle, "a_uv");
GLint aColor = glGetAttribLocation(p->handle, "a_color");
if(aPos != -1) {
glEnableVertexAttribArray(aPos);
@ -1225,7 +1242,7 @@ static void enable_vertex_buffers(struct k3Mdl *mdl, GLuint prog) {
}
}
static void disable_vertex_buffers(struct k3Mdl *mdl, GLuint prog) {
static void disable_vertex_buffers(struct k3Mdl *mdl, struct k3GLSLP *p) {
if(!k3IsCore) {
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_NORMAL_ARRAY);
@ -1234,11 +1251,11 @@ static void disable_vertex_buffers(struct k3Mdl *mdl, GLuint prog) {
if(mdl->offC != -1) {
glDisableClientState(GL_COLOR_ARRAY);
}
} else if(prog) {
GLint aPos = glGetAttribLocation(prog, "a_pos");
GLint aNormal = glGetAttribLocation(prog, "a_normal");
GLint aUv = glGetAttribLocation(prog, "a_uv");
GLint aColor = glGetAttribLocation(prog, "a_color");
} else if(p) {
GLint aPos = glGetAttribLocation(p->handle, "a_pos");
GLint aNormal = glGetAttribLocation(p->handle, "a_normal");
GLint aUv = glGetAttribLocation(p->handle, "a_uv");
GLint aColor = glGetAttribLocation(p->handle, "a_color");
if(aPos != -1) {
glDisableVertexAttribArray(aPos);
@ -1387,6 +1404,8 @@ static bool outside_frustum(vec3 *aabb, float *modelmat, vec4 *frustum) {
}
static void forward_subpass(mat4 projection, mat4 view, int transparent, int lightsStart, int lightsCount, size_t rbleStart, size_t rbleEnd) {
struct k3Timer timer = k3StartTimer("forward_subpass");
setup_ff_lights(view, lightsStart, lightsCount);
mat4 viewProj;
@ -1395,7 +1414,7 @@ static void forward_subpass(mat4 projection, mat4 view, int transparent, int lig
vec4 cameraFrustum[6];
glm_frustum_planes(viewProj, cameraFrustum);
GLhandleARB lastGLSL = -1;
struct k3GLSLP *lastGLSLP = NULL;
GLuint lastVP = 0, lastFP = 0;
struct k3Mat *lastMaterial = NULL;
int lastAdditive = -1;
@ -1406,7 +1425,7 @@ static void forward_subpass(mat4 projection, mat4 view, int transparent, int lig
struct k3Mesh *mesh = renderQueue[rble].mesh;
float *modelmat = (float*) renderQueue[rble].modelmat;
struct k3AnimationBone *bones = renderQueue[rble].bones;
GLhandleARB glsl = renderQueue[rble].glsl;
struct k3GLSLP *glslp = renderQueue[rble].glslp;
GLuint arbvp = renderQueue[rble].arbvp;
GLuint arbfp = renderQueue[rble].arbfp;
@ -1459,35 +1478,35 @@ static void forward_subpass(mat4 projection, mat4 view, int transparent, int lig
}
}
if(glsl) {
if(lastGLSL != glsl) {
if(glslp) {
if(lastGLSLP != glslp) {
if(k3IsCore)
glUseProgram(glsl);
glUseProgram(glslp->handle);
else
glUseProgramObjectARB(glsl);
glUseProgramObjectARB(glslp->handle);
lastGLSL = glsl;
lastGLSLP = glslp;
setup_glsl_globals(glsl, view);
setup_glsl_globals(glslp, view);
}
setup_core_projection(glsl, ProjMat);
setup_core_projection(glslp, ProjMat);
setup_glsl_mat_uniforms(glsl, mat, 0);
setup_glsl_shadow_uniforms(glsl, mat->passes[0].unitsUsed, lightsStart, lightsCount);
setup_glsl_lighting_uniforms(glsl, lightsStart, lightsCount);
setup_glsl_mat_uniforms(glslp, mat, 0);
setup_glsl_shadow_uniforms(glslp, mat->passes[0].unitsUsed, lightsStart, lightsCount);
setup_glsl_lighting_uniforms(glslp, lightsStart, lightsCount);
setup_glsl_model_uniforms(glsl, modelmat);
setup_glsl_model_uniforms(glslp, modelmat);
if(mat != lastMaterial) {
bind_mat_textures(mat, 0);
bind_shadow_texture(mat->passes[0].unitsUsed);
}
} else if(!k3IsCore) {
if(lastGLSL && GLAD_GL_ARB_shading_language_100) {
if(lastGLSLP && GLAD_GL_ARB_shading_language_100) {
glUseProgramObjectARB(0);
}
lastGLSL = 0;
lastGLSLP = NULL;
if(lastVP != arbvp) {
if(lastVP && !arbvp) {
@ -1563,9 +1582,9 @@ static void forward_subpass(mat4 projection, mat4 view, int transparent, int lig
glBindBufferARB(GL_ARRAY_BUFFER_ARB, mdl->vstore->gl);
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, mdl->estore->gl);
if(glsl) {
enable_glsl_bones(glsl, mdl, bones);
enable_glsl_tangents(glsl, mdl);
if(glslp) {
enable_glsl_bones(glslp, mdl, bones);
enable_glsl_tangents(glslp, mdl);
}
if(k3IsSoftSkinning && bones) {
@ -1573,23 +1592,27 @@ static void forward_subpass(mat4 projection, mat4 view, int transparent, int lig
}
if(isnanf(mat->passes[0].aabb)) {
enable_vertex_buffers(mdl, glsl);
enable_vertex_buffers(mdl, glslp);
glDrawElements(GL_TRIANGLES, mesh->idxNumber, GL_UNSIGNED_SHORT, (void*) (mesh->idxStart * 2));
disable_vertex_buffers(mdl, glsl);
disable_vertex_buffers(mdl, glslp);
} else {
push_aabb(mat, 0, mdl);
}
if(glsl) {
disable_glsl_tangents(glsl, mdl);
disable_glsl_bones(mdl, glsl);
if(glslp) {
disable_glsl_tangents(glslp, mdl);
disable_glsl_bones(mdl, glslp);
}
}
k3EndTimer(timer);
}
void k3PassForward(mat4 projection, mat4 cam) {
struct k3Timer timer = k3StartTimer("k3PassForward");
glm_mat4_copy(cam, CamMat);
glm_mat4_copy(projection, ProjMat);
queuesort();
@ -1646,9 +1669,13 @@ void k3PassForward(mat4 projection, mat4 cam) {
}
}
}
k3EndTimer(timer);
}
void k3PassDepthOnly(mat4 projection, mat4 cam, int clear, int cull) {
struct k3Timer timer = k3StartTimer("k3PassDepthOnly");
glm_mat4_copy(cam, CamMat);
glm_mat4_copy(projection, ProjMat);
queuesort();
@ -1688,7 +1715,7 @@ void k3PassDepthOnly(mat4 projection, mat4 cam, int clear, int cull) {
}
GLuint lastVP = 0;
GLhandleARB lastGLSL = -1;
struct k3GLSLP *lastGLSLP = NULL;
for(size_t rble = 0; rble < renderQueueSize; rble++) {
struct k3Mdl *mdl = renderQueue[rble].mdl;
@ -1705,34 +1732,34 @@ void k3PassDepthOnly(mat4 projection, mat4 cam, int clear, int cull) {
continue;
}
GLhandleARB glsl = renderQueue[rble].glsl;
struct k3GLSLP *glslp = renderQueue[rble].glslp;
GLuint arbvp = renderQueue[rble].arbvp;
if(glsl) {
if(lastGLSL != glsl) {
if(glslp) {
if(lastGLSLP != glslp) {
if(k3IsCore)
glUseProgram(glsl);
glUseProgram(glslp->handle);
else
glUseProgramObjectARB(glsl);
glUseProgramObjectARB(glslp->handle);
lastGLSL = glsl;
lastGLSLP = glslp;
setup_glsl_globals(glsl, view);
setup_glsl_globals(glslp, view);
}
setup_core_projection(glsl, projection);
setup_core_projection(glslp, projection);
setup_glsl_mat_uniforms(glsl, mat, 0);
setup_glsl_mat_uniforms(glslp, mat, 0);
setup_glsl_model_uniforms(glsl, modelmat);
setup_glsl_model_uniforms(glslp, modelmat);
} else if(!k3IsCore) {
if(lastGLSL && GLAD_GL_ARB_shading_language_100) {
if(lastGLSLP && GLAD_GL_ARB_shading_language_100) {
if(k3IsCore)
glUseProgram(0);
else
glUseProgramObjectARB(0);
}
lastGLSL = 0;
lastGLSLP = NULL;
if(arbvp != lastVP) {
if(arbvp && !lastVP) {
@ -1759,8 +1786,8 @@ void k3PassDepthOnly(mat4 projection, mat4 cam, int clear, int cull) {
glBindBufferARB(GL_ARRAY_BUFFER_ARB, mdl->vstore->gl);
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, mdl->estore->gl);
if(glsl) {
enable_glsl_bones(glsl, mdl, bones);
if(glslp) {
enable_glsl_bones(glslp, mdl, bones);
}
if(k3IsSoftSkinning && bones) {
@ -1768,21 +1795,23 @@ void k3PassDepthOnly(mat4 projection, mat4 cam, int clear, int cull) {
}
if(isnanf(mat->passes[0].aabb)) {
enable_vertex_buffers(mdl, glsl);
enable_vertex_buffers(mdl, glslp);
glDrawElements(GL_TRIANGLES, mesh->idxNumber, GL_UNSIGNED_SHORT, (void*) (mesh->idxStart * 2));
disable_vertex_buffers(mdl, glsl);
disable_vertex_buffers(mdl, glslp);
} else {
push_aabb(mat, 0, mdl);
}
if(glsl) {
disable_glsl_bones(mdl, glsl);
if(glslp) {
disable_glsl_bones(mdl, glslp);
}
}
glFrontFace(GL_CCW);
k3EndTimer(timer);
}
static void split_frustum(mat4 proj, int cascades, mat4 croppeds[]) {
@ -2005,8 +2034,8 @@ static void pass_irregular(int passnum, mat4 mainproj, mat4 maincam, mat4 lightp
glBindBufferARB(GL_ARRAY_BUFFER_ARB, mdl->vstore->gl);
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, mdl->estore->gl);
if(glsl) {
enable_glsl_bones(glsl, mdl, bones);
if(glslp) {
enable_glsl_bones(glslp, mdl, bones);
}
if(k3IsSoftSkinning && bones) {
@ -2014,17 +2043,17 @@ static void pass_irregular(int passnum, mat4 mainproj, mat4 maincam, mat4 lightp
}
if(isnanf(mat->passes[0].aabb)) {
enable_vertex_buffers(mdl, glsl);
enable_vertex_buffers(mdl, glslp);
glDrawElements(GL_TRIANGLES, mesh->idxNumber, GL_UNSIGNED_SHORT, (void*) (mesh->idxStart * 2));
disable_vertex_buffers(mdl, glsl);
disable_vertex_buffers(mdl, glslp);
} else {
push_aabb(mat, 0, mdl);
}
if(glsl) {
disable_glsl_bones(mdl, glsl);
if(glslp) {
disable_glsl_bones(mdl, glslp);
}
}
}
@ -2151,6 +2180,8 @@ void k3PassIrregular(struct k3Offscreen *mainview, mat4 mainproj, mat4 maincam)
// Constructs shadowmap atlas, saves `offscr` for own use
void k3PassShadowmap(mat4 projection, mat4 cam, struct k3Offscreen *offscr, float cellSizeLimit) {
struct k3Timer timer = k3StartTimer("k3PassShadowmap");
glm_mat4_copy(projection, ProjMat);
glm_mat4_copy(cam, CamMat);
@ -2235,6 +2266,8 @@ void k3PassShadowmap(mat4 projection, mat4 cam, struct k3Offscreen *offscr, floa
}
}
k3EndOffscreen(offscr);
k3EndTimer(timer);
}
void k3BatchClear() {
@ -2701,6 +2734,8 @@ struct k3GLSLG *k3ShaderGLSLG(const char *src_, const char*(*ldr)(const char *fn
struct k3GLSLP *k3ProgramGLSL(struct k3GLSLV *vs, struct k3GLSLF *fs, struct k3GLSLG *gs) {
GLhandleARB prog;
struct k3GLSLP *ret = calloc(1, sizeof(*ret));
if(!k3IsCore) {
prog = glCreateProgramObjectARB();
@ -2730,13 +2765,31 @@ struct k3GLSLP *k3ProgramGLSL(struct k3GLSLV *vs, struct k3GLSLF *fs, struct k3G
GLint uniformCount;
glGetObjectParameterivARB(prog, GL_OBJECT_ACTIVE_UNIFORMS_ARB, &uniformCount);
ret->handle = prog;
ret->ucount = uniformCount;
ret->uloc = calloc(uniformCount, sizeof(*ret->uloc));
ret->uname = calloc(uniformCount, sizeof(*ret->uname));
for(i = 0; i < uniformCount; i++) {
int size;
int type;
glGetActiveUniformARB(prog, i, maxLength, NULL, &size, &type, name);
k3Log(k3_DEBUG, "%i %s", size, name);
if(strchr(name, '[')) {
*strchr(name, '[') = '\0';
}
uint64_t idx = komihash(name, strlen(name), 0);
for(size_t iter = 0; iter < uniformCount; iter++, idx++) {
idx = idx % uniformCount;
if(ret->uname[idx] == NULL) {
ret->uname[idx] = strdup(name);
ret->uloc[idx] = glGetUniformLocationARB(prog, name);
break;
}
}
k3Log(k3_DEBUG, "%i %s @ %i", size, name, ret->uloc[idx]);
}
} else {
prog = glCreateProgram();
@ -2767,20 +2820,47 @@ struct k3GLSLP *k3ProgramGLSL(struct k3GLSLV *vs, struct k3GLSLF *fs, struct k3G
GLint uniformCount;
glGetProgramiv(prog, GL_ACTIVE_UNIFORMS, &uniformCount);
ret->handle = prog;
ret->ucount = uniformCount;
ret->uloc = calloc(uniformCount, sizeof(*ret->uloc));
ret->uname = calloc(uniformCount, sizeof(*ret->uname));
for(i = 0; i < uniformCount; i++) {
int size;
int type;
glGetActiveUniform(prog, i, maxLength, NULL, &size, &type, name);
k3Log(k3_DEBUG, "%i %s", size, name);
if(strchr(name, '[')) {
*strchr(name, '[') = '\0';
}
uint64_t idx = komihash(name, strlen(name), 0);
for(size_t iter = 0; iter < uniformCount; iter++, idx++) {
idx = idx % uniformCount;
if(ret->uname[idx] == NULL) {
ret->uname[idx] = strdup(name);
ret->uloc[idx] = glGetUniformLocation(prog, name);
break;
}
}
k3Log(k3_DEBUG, "%i %s @ %i", size, name, ret->uloc[idx]);
}
}
return (struct k3GLSLP*) (uintptr_t) prog;
return ret;
}
uint16_t k3ProgramGetUId(struct k3GLSLP *p, const char *key) {
return glGetUniformLocationARB(GL_FROM_K3GLSL(p), key);
int16_t k3ProgramGetUId(struct k3GLSLP *p, const char *key) {
uint64_t idx = komihash(key, strlen(key), 0);
for(size_t i = 0; i < p->ucount; i++, idx++) {
idx = idx % p->ucount;
if(!strcmp(p->uname[idx], key)) {
return p->uloc[idx];
}
}
return -1;
}
struct k3ARBVP *k3ProgramARBVP(const char *src) {
@ -3083,3 +3163,29 @@ uint16_t k3TexSzMax() {
return i;
}
struct k3Timer *k3Timers;
size_t k3TimerCount;
void k3Update() {
for(size_t ti = 0; ti < k3TimerCount;) {
struct k3Timer *t = &k3Timers[ti];
GLint b1 = 0, b2 = 0;
glGetQueryObjectiv(t->qStart, GL_QUERY_RESULT_AVAILABLE, &b1);
glGetQueryObjectiv(t->qEnd, GL_QUERY_RESULT_AVAILABLE, &b2);
if(b1 && b2) {
uint64_t t1, t2;
glGetQueryObjectui64v(t->qStart, GL_QUERY_RESULT, &t1);
glGetQueryObjectui64v(t->qEnd, GL_QUERY_RESULT, &t2);
k3Log(k3_TRACE, "Routine %s took %lu us", t->name, (t2 - t1) / 1000);
glDeleteQueries(2, (GLuint*) t);
memmove(t, t + 1, sizeof(*t) * (k3TimerCount - ti - 1));
k3TimerCount--;
} else {
ti++;
}
}
}

View File

@ -40,7 +40,7 @@ struct k3GLSLG *k3ShaderGLSLG(const char *src_, const char*(*ldr)(const char *fn
struct k3GLSLP;
struct k3GLSLP *k3ProgramGLSL(struct k3GLSLV*, struct k3GLSLF*, struct k3GLSLG*);
uint16_t k3ProgramGetUId(struct k3GLSLP*, const char *key);
int16_t k3ProgramGetUId(struct k3GLSLP*, const char *key);
struct k3ARBVP;
struct k3ARBVP *k3ProgramARBVP(const char *src);
@ -205,7 +205,7 @@ int k3CubemapTraditional(struct k3Tex*, mat4 proj, mat4 cam);
void k3SetTime(float t);
enum k3LogLevel {
k3_DEBUG, k3_INFO, k3_WARN, k3_ERR
k3_TRACE, k3_DEBUG, k3_INFO, k3_WARN, k3_ERR
};
typedef void(*k3LogCallback)(enum k3LogLevel, const char *str, size_t len);
void k3SetLogCallback(k3LogCallback);

View File

@ -7,12 +7,13 @@
#include<cglm/vec2.h>
#include<cglm/frustum.h>
#include<cglm/cam.h>
#include<string.h>
#define GL_FROM_K3TEX(k3t) ((k3t) ? (k3t)->tex : 0)
#define GL_FROM_K3MARCHER(k3m) ((GLuint) (uintptr_t) (k3m))
#define GL_FROM_K3ARBVP(k3m) ((GLuint) (uintptr_t) (k3m))
#define GL_FROM_K3ARBFP(k3m) ((GLuint) (uintptr_t) (k3m))
#define GL_FROM_K3GLSL(k3m) ((GLuint) (uintptr_t) (k3m))
#define GL_FROM_K3GLSL(k3m) (((struct k3GLSLP*) k3m)->handle)
extern bool k3IsCore;
@ -84,3 +85,52 @@ struct k3Mdl {
const char *debugname;
};
struct k3GLSLP {
GLhandleARB handle;
size_t ucount;
char **uname;
GLint *uloc;
};
struct k3Timer {
GLuint qStart;
GLuint qEnd;
char name[64];
};
extern struct k3Timer *k3Timers;
extern size_t k3TimerCount;
static inline struct k3Timer k3StartTimer(char *name) {
struct k3Timer t = {};
if(!GLAD_GL_ARB_timer_query) {
return;
}
glGenQueries(2, (GLuint*) &t);
strncpy(t.name, name, sizeof(t.name));
glQueryCounter(t.qStart, GL_TIMESTAMP);
if(GLAD_GL_KHR_debug) {
glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, name);
}
return t;
}
static inline void k3EndTimer(struct k3Timer t) {
if(!GLAD_GL_ARB_timer_query) {
return;
}
glQueryCounter(t.qEnd, GL_TIMESTAMP);
if(GLAD_GL_KHR_debug) {
glPopDebugGroup();
}
k3Timers = realloc(k3Timers, sizeof(*k3Timers) * (k3TimerCount + 1));
k3Timers[k3TimerCount++] = t;
}

View File

@ -132,7 +132,9 @@ void k3BatchFlush() {
}
if(k3IsCore) {
glUseProgram((GLuint) coreProg);
GLuint handle = GL_FROM_K3GLSL(coreProg);
glUseProgram(handle);
glUniform2f(coreUResolution, ResolutionX, ResolutionY);
float *farr = alloca(SCount * 60 * sizeof(*farr));
@ -211,14 +213,14 @@ void k3BatchFlush() {
glBindBufferARB(GL_ARRAY_BUFFER_ARB, coreVBO);
glBufferDataARB(GL_ARRAY_BUFFER_ARB, SCount * 60 * sizeof(*farr), farr, GL_DYNAMIC_DRAW);
glUniform1f(glGetUniformLocation((GLuint) coreProg, "u_texuse"), !!activeTex);
glUniform1f(k3ProgramGetUId(coreProg, "u_texuse"), !!activeTex);
glUniform1f(glGetUniformLocation((GLuint) coreProg, "u_borderradius"), activeBorderRadius);
glUniform1f(k3ProgramGetUId(coreProg, "u_borderradius"), activeBorderRadius);
GLint aPos = glGetAttribLocation((GLuint) coreProg, "a_pos");
GLint aUv = glGetAttribLocation((GLuint) coreProg, "a_uv");
GLint aColor = glGetAttribLocation((GLuint) coreProg, "a_color");
GLint aSize = glGetAttribLocation((GLuint) coreProg, "a_size");
GLint aPos = glGetAttribLocation(handle, "a_pos");
GLint aUv = glGetAttribLocation(handle, "a_uv");
GLint aColor = glGetAttribLocation(handle, "a_color");
GLint aSize = glGetAttribLocation(handle, "a_size");
glEnableVertexAttribArray(aPos);
glEnableVertexAttribArray(aUv);