k3/src/k3.c

3202 lines
86 KiB
C

#include"k3_internal.h"
#include"gl.h"
#include"komihash.h"
#include<stdlib.h>
#include<string.h>
#include<stdio.h>
#include<stdarg.h>
#include<limits.h>
#include<assert.h>
#include<cglm/vec2.h>
#include<cglm/frustum.h>
#include<cglm/cam.h>
#include<cglm/box.h>
#include"ssort.h"
#define RAST_DIFF 0
#define RAST_NORM 1
#define RAST_DISP 2
#define RAST_LEVELS 3
bool k3IsSoftSkinning = 0;
bool k3IsCore = 0;
static struct k3GLSLP *basicBlitProgram;
static struct k3GLSLP *basicCubemapProgram;
typedef void*(*TextureOfflineCompressorFunc)(const void *pixels, uint16_t width, uint16_t height, GLenum externalFmt, GLenum intype, size_t *compressedSize);
static bool TextureCompressionEnabled;
static GLenum TextureCompressionRGBA;
static GLenum TextureCompressionSRGBA;
static TextureOfflineCompressorFunc TextureOfflineCompressor;
static bool MipmappingEnabled;
void k3StorageRef(struct k3Storage *s) {
s->ref++;
}
void k3StorageUnref(struct k3Storage *s) {
if(--s->ref == 0) {
s->free(s);
free(s);
}
}
static void k3StorageBasicFree(struct k3Storage *s) {
glDeleteBuffersARB(1, &((struct k3StorageBasic*) s)->gl);
}
struct k3StorageBasic *k3StorageBasic() {
struct k3StorageBasic *ret = calloc(1, sizeof(*ret));
ret->ref = 1;
ret->free = k3StorageBasicFree;
glGenBuffersARB(1, &ret->gl);
return ret;
}
static uint16_t MainWidth, MainHeight;
void k3Resize(uint16_t width, uint16_t height) {
MainWidth = width;
MainHeight = height;
}
static float Time;
void k3SetTime(float t) {
Time = t;
}
static void update_aabb(struct k3Mdl *mdl, vec3 *pos) {
if(pos == k3_ATTRIB_NONE || pos == k3_ATTRIB_EMPTY || mdl->verts == 0) {
glm_vec3_zero(mdl->aabb[0]);
glm_vec3_zero(mdl->aabb[1]);
return;
}
glm_vec3_copy(pos[0], mdl->aabb[0]);
glm_vec3_copy(pos[0], mdl->aabb[1]);
for(size_t i = 0; i < mdl->verts; i++) {
glm_vec3_minv(pos[i], mdl->aabb[0], mdl->aabb[0]);
glm_vec3_maxv(pos[i], mdl->aabb[1], mdl->aabb[1]);
}
}
static int8_t *generate_tangents(size_t verts, size_t indices, uint16_t *inds, vec3 *pos, int8_t *nrm, vec2 *uvs) {
vec4 *tans1 = _mm_malloc(verts * sizeof(*tans1), 16);
vec4 *tans2 = _mm_malloc(verts * sizeof(*tans2), 16);
memset(tans1, 0, sizeof(*tans1) * verts);
memset(tans2, 0, sizeof(*tans2) * verts);
bool warningBadTangents = 0;
for(size_t i = 0; i < indices; i += 3) {
vec3 p1, p2, p3;
glm_vec3_copy(pos[inds[i + 0]], p1);
glm_vec3_copy(pos[inds[i + 1]], p2);
glm_vec3_copy(pos[inds[i + 2]], p3);
vec2 w1, w2, w3;
glm_vec2_copy(uvs[inds[i + 0]], w1);
glm_vec2_copy(uvs[inds[i + 1]], w2);
glm_vec2_copy(uvs[inds[i + 2]], w3);
float x1 = p2[0] - p1[0];
float x2 = p3[0] - p1[0];
float y1 = p2[1] - p1[1];
float y2 = p3[1] - p1[1];
float z1 = p2[2] - p1[2];
float z2 = p3[2] - p1[2];
float s1 = w2[0] - w1[0];
float s2 = w3[0] - w1[0];
float t1 = w2[1] - w1[1];
float t2 = w3[1] - w1[1];
if(s1 * t2 - s2 * t1 == 0 && !warningBadTangents) {
warningBadTangents = true;
k3Log(k3_WARN, "Bad tangents. Is mesh properly UV-mapped?");
}
float r = 1.f / (s1 * t2 - s2 * t1);
vec3 sdir = {(t2 * x1 - t1 * x2) * r, (t2 * y1 - t1 * y2) * r, (t2 * z1 - t1 * z2) * r};
vec3 tdir = {(s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r, (s1 * z2 - s2 * z1) * r};
glm_vec3_add(tans1[inds[i + 0]], sdir, tans1[inds[i + 0]]);
glm_vec3_add(tans1[inds[i + 1]], sdir, tans1[inds[i + 1]]);
glm_vec3_add(tans1[inds[i + 2]], sdir, tans1[inds[i + 2]]);
glm_vec3_add(tans2[inds[i + 0]], tdir, tans2[inds[i + 0]]);
glm_vec3_add(tans2[inds[i + 1]], tdir, tans2[inds[i + 1]]);
glm_vec3_add(tans2[inds[i + 2]], tdir, tans2[inds[i + 2]]);
}
int8_t *ret = malloc(verts * 3);
memset(ret, 0, verts * 3);
for(size_t i = 0; i < verts; i++) {
vec3 n = {nrm[i * 3 + 0], nrm[i * 3 + 1], nrm[i * 3 + 2]};
glm_vec3_normalize(n);
glm_vec3_scale(n, glm_vec3_dot(n, tans1[i]), n);
vec3 t;
glm_vec3_sub(tans1[i], n, t);
glm_vec3_normalize(t);
ret[i * 3 + 0] = t[0] * 127;
ret[i * 3 + 1] = t[1] * 127;
ret[i * 3 + 2] = t[2] * 127;
}
return ret;
}
struct k3Mdl *k3MdlCreate(size_t verts, size_t indices, size_t boneCount, vec3 *pos, uint8_t *nrm, float *uvs, uint8_t *cols, uint8_t *boneids, uint16_t *boneweights, uint16_t *inds, mat4 *invBind, uint8_t *boneParents) {
struct k3Mdl *ret = calloc(1, sizeof(*ret));
ret->verts = verts;
ret->meshCount = 0;
ret->meshes = NULL;
ret->boneCount = boneCount;
ret->invBind = invBind;
ret->boneParents = boneParents;
ret->animCount = 0;
ret->anims = NULL;
if(k3IsSoftSkinning && pos && boneids && boneweights) {
ret->cpuSkinning.pos = _mm_malloc(sizeof(*ret->cpuSkinning.pos) * verts, 16);
memcpy(ret->cpuSkinning.pos, pos, sizeof(*ret->cpuSkinning.pos) * verts);
ret->cpuSkinning.boneids = malloc(sizeof(*ret->cpuSkinning.boneids) * verts * 4);
memcpy(ret->cpuSkinning.boneids, boneids, sizeof(*ret->cpuSkinning.boneids) * verts * 4);
ret->cpuSkinning.boneweights = malloc(sizeof(*ret->cpuSkinning.boneweights) * verts * 4);
memcpy(ret->cpuSkinning.boneweights, boneweights, sizeof(*ret->cpuSkinning.boneweights) * verts * 4);
}
ret->vstore = k3StorageBasic();
ret->estore = k3StorageBasic();
glBindBufferARB(GL_ARRAY_BUFFER_ARB, ret->vstore->gl);
size_t size = verts * (12 + 3 + 8 + 3);
if(cols) {
size += verts * 4;
}
if(boneCount) {
size += verts * (4 + 8);
}
glBufferDataARB(GL_ARRAY_BUFFER_ARB, size, NULL, GL_STREAM_DRAW_ARB);
int o = 0;
ret->offV = o;
if(pos != k3_ATTRIB_EMPTY) {
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, o, 12 * verts, pos);
}
o += 12 * verts;
if(nrm) {
ret->offN = o;
if(nrm != k3_ATTRIB_EMPTY) {
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, o, 3 * verts, nrm);
}
o += 3 * verts;
} else ret->offN = -1;
if(uvs) {
ret->offU = o;
if(uvs != k3_ATTRIB_EMPTY) {
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, o, 8 * verts, uvs);
}
o += 8 * verts;
} else ret->offU = -1;
if(nrm && uvs) {
ret->offT = o;
if(nrm != k3_ATTRIB_EMPTY && uvs != k3_ATTRIB_EMPTY) {
int8_t *tans = generate_tangents(verts, indices, inds, pos, nrm, uvs);
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, o, 3 * verts, tans);
free(tans);
}
o += 3 * verts;
} else ret->offT = -1;
if(cols) {
ret->offC = o;
if(cols != k3_ATTRIB_EMPTY) {
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, o, 4 * verts, cols);
}
o += 4 * verts;
} else ret->offC = -1;
if(boneCount) {
ret->offB = o;
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, o, 4 * verts, boneids);
o += 4 * verts;
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, o, 8 * verts, boneweights);
o += 8 * verts;
} else ret->offB = -1;
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, ret->estore->gl);
if(inds && inds != k3_ATTRIB_EMPTY) {
glBufferDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, indices * sizeof(uint16_t), inds, GL_STATIC_DRAW_ARB);
}
update_aabb(ret, pos);
ret->debugname = strdup("Unnamed");
return ret;
}
void k3MdlUpdatePos(struct k3Mdl *mdl, vec3 *pos) {
if(mdl->offV == -1) {
k3Log(k3_ERR, "Missing vertex position attribute. Cannot update.");
return;
}
update_aabb(mdl, pos);
glBindBufferARB(GL_ARRAY_BUFFER_ARB, mdl->vstore->gl);
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, mdl->offV, 12 * mdl->verts, pos);
}
void k3MdlUpdateNrm(struct k3Mdl *mdl, uint8_t *nrm) {
if(mdl->offN == -1) {
k3Log(k3_ERR, "Missing vertex normal attribute. Cannot update.");
return;
}
glBindBufferARB(GL_ARRAY_BUFFER_ARB, mdl->vstore->gl);
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, mdl->offN, 3 * mdl->verts, nrm);
}
void k3MdlAddMesh(struct k3Mdl *mdl, struct k3Mat *mat, uint32_t idxStart, uint32_t idxNumber) {
mdl->meshes = realloc(mdl->meshes, sizeof(*mdl->meshes) * (mdl->meshCount + 1));
struct k3Mesh *mehs = &mdl->meshes[mdl->meshCount++];
mehs->idxStart = idxStart;
mehs->idxNumber = idxNumber;
memcpy(&mehs->mat, mat, sizeof(*mat));
}
struct k3Mesh *k3MdlGetMeshes(struct k3Mdl *mdl, size_t *count) {
*count = mdl->meshCount;
return mdl->meshes;
}
void k3MdlAddAnim(struct k3Mdl *mdl, struct k3AnimationFountain *anim) {
mdl->anims = realloc(mdl->anims, sizeof(*mdl->anims) * (mdl->animCount + 1));
mdl->anims[mdl->animCount++] = anim;
}
struct k3AnimationFountain *k3MdlGetAnim(struct k3Mdl *mdl, uint16_t id) {
for(size_t i = 0; i < mdl->animCount; i++) {
if(mdl->anims[i]->id == id) {
return mdl->anims[i];
}
}
}
size_t k3MdlGetBoneCount(struct k3Mdl *mdl) {
return mdl->boneCount;
}
static size_t bonenames_len(size_t boneCount, const char *names) {
const char *bn = names;
size_t sz = 0;
for(int b = 0; b < boneCount; b++) {
sz += strlen(bn) + 1;
bn += strlen(bn) + 1;
}
return sz;
}
struct k3Mdl *k3MdlCopySubs(struct k3Mdl *src) {
struct k3Mdl *dst = calloc(1, sizeof(*dst));
dst->cpuSkinning.pos = src->cpuSkinning.pos;
dst->cpuSkinning.boneids = src->cpuSkinning.boneids;
dst->cpuSkinning.boneweights = src->cpuSkinning.boneweights;
dst->verts = src->verts;
dst->vstore = src->vstore;
dst->estore = src->estore;
k3StorageRef((void*) dst->vstore);
k3StorageRef((void*) dst->estore);
dst->meshCount = src->meshCount;
dst->meshes = malloc(sizeof(*dst->meshes) * dst->meshCount);
memcpy(dst->meshes, src->meshes, sizeof(*dst->meshes) * dst->meshCount);
dst->boneCount = src->boneCount;
dst->invBind = _mm_malloc(sizeof(*dst->invBind) * dst->boneCount, 16);
memcpy(dst->invBind, src->invBind, sizeof(*dst->invBind) * dst->boneCount);
dst->boneParents = malloc(sizeof(*dst->boneParents) * dst->boneCount);
memcpy(dst->boneParents, src->boneParents, sizeof(*dst->boneParents) * dst->boneCount);
if(src->boneNames) {
size_t sz = bonenames_len(src->boneCount, src->boneNames);
dst->boneNames = calloc(1, sz);
memcpy(dst->boneNames, src->boneNames, sz);
}
dst->animCount = src->animCount;
dst->anims = malloc(sizeof(*dst->anims) * dst->animCount);
memcpy(dst->anims, src->anims, sizeof(*dst->anims) * dst->animCount);
memcpy(dst->aabb, src->aabb, sizeof(dst->aabb));
dst->offB = src->offB;
dst->offC = src->offC;
dst->offN = src->offN;
dst->offT = src->offT;
dst->offU = src->offU;
dst->offV = src->offV;
dst->debugname = strdup(src->debugname);
return dst;
}
void k3MdlSetDebugName(struct k3Mdl *mdl, const char *name) {
free(mdl->debugname);
mdl->debugname = strdup(name);
k3Log(k3_DEBUG, "Model name: %s", name);
}
static void qwfun(struct k3Mdl *mdl, size_t b, float *output) {
for(size_t c = b + 1; c < mdl->boneCount; c++) {
if(mdl->boneParents[c] == b) {
output[c] = 1;
qwfun(mdl, c, output);
}
}
}
void k3MdlQueryWeights(struct k3Mdl *mdl, size_t boneIdx, float *output) {
memset(output, 0, sizeof(*output) * mdl->boneCount);
if(boneIdx < mdl->boneCount) {
qwfun(mdl, boneIdx, output);
}
}
intmax_t k3MdlGetBoneFromName(struct k3Mdl *mdl, const char *targetName) {
if(!mdl->boneNames) {
return -1;
}
const char *name = mdl->boneNames;
for(size_t b = 0; b < mdl->boneCount; b++) {
if(!strcmp(name, targetName)) {
return b;
}
name = name + strlen(name) + 1;
}
return -1;
}
void k3MdlSetBoneNames(struct k3Mdl *mdl, const char *names) {
size_t sz = bonenames_len(mdl->boneCount, names);
mdl->boneNames = malloc(sz);
memcpy(mdl->boneNames, names, sz);
}
void k3AnimatorInit(struct k3Animator *this, struct k3Mdl *mdl) {
this->mdl = mdl;
size_t boneCount = k3MdlGetBoneCount(this->mdl);
this->bones = _mm_malloc(boneCount * sizeof(*this->bones), 16);
for(size_t b = 0; b < boneCount; b++) {
this->bones[b].translation[0] = 0;
this->bones[b].translation[1] = 0;
this->bones[b].translation[2] = 0;
this->bones[b].translation[3] = 1;
this->bones[b].rotation[0] = 0;
this->bones[b].rotation[1] = 0;
this->bones[b].rotation[2] = 0;
this->bones[b].rotation[3] = 1;
}
}
void k3AnimatorSet(struct k3Animator *anim, float time) {
anim->time = time;
k3AnimationUpdate(anim->anim, anim->time);
if(!anim->bones) {
anim->bones = _mm_malloc(sizeof(*anim->bones) * anim->anim->bones, 16);
memset(anim->bones, 0, sizeof(*anim->bones) * anim->anim->bones);
}
if(!anim->anim->cache) {
return;
}
mat4 *mats = alloca(sizeof(*mats) * anim->anim->bones);
memset(mats, 0, sizeof(*mats) * anim->anim->bones);
for(size_t b = 0; b < anim->anim->bones; b++) {
mat4 m = GLM_MAT4_IDENTITY_INIT;
glm_quat_mat4(anim->anim->cache[b].rotation, m);
glm_vec4_copy(anim->anim->cache[b].translation, m[3]);
uint8_t parent = anim->mdl->boneParents[b];
if(parent != 0xFF) {
glm_mat4_mul(mats[parent], m, mats[b]);
} else {
glm_mat4_copy(m, mats[b]);
}
}
for(size_t b = 0; b < anim->anim->bones; b++) {
glm_mat4_mul(mats[b], anim->mdl->invBind[b], mats[b]);
}
for(size_t b = 0; b < anim->anim->bones; b++) {
vec4 t;
mat4 r;
vec3 s;
glm_decompose(mats[b], t, r, s);
t[3] = 1;
glm_vec4_copy(t, anim->bones[b].translation);
versor q;
glm_mat4_quat(r, q);
glm_quat_copy(q, anim->bones[b].rotation);
}
}
void k3AnimatorStep(struct k3Animator *anim, float dt) {
k3AnimatorSet(anim, anim->time + dt);
}
struct k3Tex *k3TexCreate(enum k3TexType type) {
struct k3Tex *ret = calloc(1, sizeof(*ret));
if(type == k3_CUBEMAP) {
ret->cubemap = 1;
}
return ret;
}
void k3TexUpdate(struct k3Tex *tex, enum k3TexType type, int index, uint16_t width, uint16_t height, void *data) {
GLenum target = tex->cubemap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D;
/*
The Intel 4500MHD driver causes broken textures upon calling glTexImage2D a second time to resize them.
Specifically, the textures appear to be read as if all zeroes.
And they return to normal once you come back to the original size!?!
For this reason, each k3TexUpdate call should destroy the texture name, and create a new one.
*/
if(tex->tex) {
// Assume cubemaps don't get reuploaded because I'm lazy..
if(!tex->cubemap) {
glDeleteTextures(1, &tex->tex);
tex->tex = 0;
}
} else {
k3Log(k3_INFO, "Init type %i tex (%u,%u,%u)", type, width, height, 0);
}
GLint internalFmt, externalFmt, intype;
int linearFilter, mipmap;
int compressed = 0;
switch(type) {
case k3_RAWCOLOR:
internalFmt = GLAD_GL_ARB_texture_float ? GL_RGBA32F_ARB : GL_RGB10_A2;
externalFmt = GL_RGBA;
intype = GL_UNSIGNED_BYTE;
linearFilter = 1;
mipmap = 1;
break;
case k3_NORMAL:
compressed = TextureCompressionEnabled;
internalFmt = compressed ? TextureCompressionRGBA : GL_RGBA;
externalFmt = GL_RGBA;
intype = GL_UNSIGNED_BYTE;
linearFilter = 1;
mipmap = 1;
break;
case k3_DIFFUSE:
case k3_EMISSION:
compressed = TextureCompressionEnabled;
internalFmt = compressed
? (GLAD_GL_EXT_texture_sRGB ? TextureCompressionSRGBA : TextureCompressionRGBA)
: (GLAD_GL_EXT_texture_sRGB ? GL_SRGB8_ALPHA8_EXT : GL_RGBA8);
externalFmt = GL_RGBA;
intype = GL_UNSIGNED_BYTE;
linearFilter = 1;
mipmap = 1;
break;
case k3_DISPLACEMENT:
case k3_ROUGHNESS:
internalFmt = !k3IsCore ? GL_LUMINANCE8 : GL_RED;
externalFmt = GL_RED;
intype = GL_UNSIGNED_BYTE;
linearFilter = 1;
mipmap = 1;
break;
case k3_ALPHA:
internalFmt = !k3IsCore ? GL_ALPHA8 : GL_RED;
externalFmt = !k3IsCore ? GL_ALPHA : GL_RED;
intype = GL_UNSIGNED_BYTE;
linearFilter = 1;
mipmap = 1;
break;
case k3_DEPTH:
internalFmt = GL_DEPTH_COMPONENT32F;
externalFmt = GL_DEPTH_COMPONENT;
intype = GL_UNSIGNED_INT;
if(!k3IsCore) glTexParameteri(target, GL_DEPTH_TEXTURE_MODE, GL_LUMINANCE);
linearFilter = 1;
mipmap = 0;
break;
default:
abort();
break;
}
if(!MipmappingEnabled) {
mipmap = 0;
}
if(!tex->tex) {
glGenTextures(1, &tex->tex);
glBindTexture(target, tex->tex);
if(tex->cubemap) {
glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
} else {
if(!k3IsCore && mipmap) {
glTexParameteri(target, GL_GENERATE_MIPMAP, GL_TRUE);
}
}
if(type == k3_DEPTH) {
glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, (const float[]) {1, 1, 1, 1});
glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
}
if(!tex->cubemap && mipmap) {
glTexParameteri(target, GL_TEXTURE_MIN_FILTER, linearFilter ? GL_LINEAR_MIPMAP_LINEAR : GL_NEAREST_MIPMAP_LINEAR);
glTexParameteri(target, GL_TEXTURE_MAG_FILTER, linearFilter ? GL_LINEAR : GL_NEAREST);
} else {
glTexParameteri(target, GL_TEXTURE_MIN_FILTER, linearFilter ? GL_LINEAR : GL_NEAREST);
glTexParameteri(target, GL_TEXTURE_MAG_FILTER, linearFilter ? GL_LINEAR : GL_NEAREST);
}
} else {
glBindTexture(target, tex->tex);
}
if(k3IsCore && type == k3_ALPHA) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_R, GL_ONE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_G, GL_ONE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, GL_ONE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_A, GL_RED);
}
if(tex->cubemap) {
target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + index;
} else if(index) {
k3Log(k3_WARN, "Non-zero index passed for non-cubemap texture");
}
if(compressed && TextureOfflineCompressor) {
size_t len;
data = TextureOfflineCompressor(data, width, height, externalFmt, intype, &len);
glCompressedTexImage2D(target, 0, internalFmt, width, height, 0, len, data);
free(data);
} else {
glTexImage2D(target, 0, internalFmt, width, height, 0, externalFmt, intype, data);
}
if(!tex->cubemap && k3IsCore && mipmap) {
glGenerateMipmap(target);
}
tex->szX = width;
tex->szY = height;
tex->szZ = 0;
tex->glInternalFormat = internalFmt;
}
uint32_t k3TexSzX(struct k3Tex *this) {
return this->szX;
}
uint32_t k3TexSzY(struct k3Tex *this) {
return this->szY;
}
uint32_t k3TexSzZ(struct k3Tex *this) {
return this->szZ;
}
static size_t LightCountOld;
static size_t LightCount;
static struct k3Light *Lights;
void k3SetLights(size_t count, struct k3Light *lightsNew) {
LightCount = count;
Lights = lightsNew;
for(int i = 0; i < count; i++) {
if(Lights[i].type == k3_DIRECTIONAL) {
if(Lights[i].dir.cascadeCount == 0) {
Lights[i].dir.cascadeCount = 1;
} else if(Lights[i].dir.cascadeCount > 3) {
Lights[i].dir.cascadeCount = 3;
}
}
}
}
struct k3Light *k3GetLights(size_t *a) {
*a = LightCount;
return Lights;
}
void k3Clear() {
glDepthMask(GL_TRUE);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glClearColor(0.2, 0.2, 0.2, 1);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
}
uint8_t k3GraphicalReduction;
struct Renderable {
mat4 modelmat;
struct k3Mdl *mdl;
struct k3Mesh *mesh;
struct k3AnimationBone *bones;
struct k3GLSLP *glslp;
GLuint arbvp;
GLuint arbfp;
} __attribute__((aligned(16)));
static struct Renderable *renderQueue = NULL;
static size_t renderQueueSize, renderQueueCapacity = 0;
static mat4 CamMat;
static mat4 ProjMat;
struct LightShadow {
uint8_t vpCount;
mat4 vp[6];
vec4 atlasSegment;
#ifdef k3_IRREGULAR_SHADOWS
GLuint multimapEls; // buffer
GLuint multimapHeads; // image
#endif
} __attribute__((aligned(16)));
static struct LightShadow *LightShadows;
static size_t LightShadowsCount = 0, LightShadowsCapacity = 0;
static struct k3Offscreen *ShadowAtlas;
static bool LightShadowIrregularMode = false;
static GLuint IrregPixelsInShadow;
static intmax_t rblecompar1(const void *a, const void *b) {
int i = ((const struct Renderable*) a)->mesh->mat.passes[0].transparent - ((const struct Renderable*) b)->mesh->mat.passes[0].transparent;
if(i == 0) {
float dist2A = glm_vec3_distance2(((const struct Renderable*) a)->modelmat[3], CamMat[3]);
float dist2B = glm_vec3_distance2(((const struct Renderable*) b)->modelmat[3], CamMat[3]);
i = copysignf(1, dist2A - dist2B);
if(((const struct Renderable*) a)->mesh->mat.passes[0].transparent) {
i = -i;
}
}
if(i == 0) {
i = ((const struct Renderable*) a)->glslp->handle - ((const struct Renderable*) b)->glslp->handle;
}
if(i == 0) {
i = ((const struct Renderable*) a)->arbfp - ((const struct Renderable*) b)->arbfp;
}
if(i == 0) {
i = ((const struct Renderable*) a)->arbvp - ((const struct Renderable*) b)->arbvp;
}
//if(i == 0) {
// i = (uintptr_t) ((const struct Renderable*) a)->mesh->mat - (uintptr_t) ((const struct Renderable*) b)->mesh->mat;
//}
return i;
}
static void queuesort() {
ssort(renderQueue, renderQueueSize, sizeof(struct Renderable), rblecompar1);
}
void k3Batch(struct k3Mdl *mdl, mat4 modelmat, struct k3AnimationBone *bones) {
for(size_t mesh = 0; mesh < mdl->meshCount; mesh++) {
if(renderQueueSize == renderQueueCapacity) {
struct Renderable *new = _mm_malloc(sizeof(*renderQueue) * (renderQueueCapacity + 64), 16);
if(renderQueue) {
memcpy(new, renderQueue, sizeof(*renderQueue) * renderQueueCapacity);
_mm_free(renderQueue);
}
renderQueue = new;
renderQueueCapacity += 64;
}
struct Renderable *r = &renderQueue[renderQueueSize++];
r->mdl = mdl;
r->mesh = &mdl->meshes[mesh];
glm_mat4_copy(modelmat, r->modelmat);
r->bones = bones;
r->glslp = r->mesh->mat.passes[0].glsl.hp;
r->arbvp = GL_FROM_K3ARBVP(r->mesh->mat.passes[0].arbvp.vp);
r->arbfp = GL_FROM_K3ARBFP(r->mesh->mat.passes[0].arbfp.fp);
}
}
static void setup_ff_projection(mat4 proj) {
if(!k3IsCore) {
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glLoadMatrixf((float*) proj);
}
}
static void setup_core_projection(struct k3GLSLP *p, mat4 proj) {
if(k3IsCore) {
GLint u = k3ProgramGetUId(p, "u_projection");
if(u != -1) {
glUniformMatrix4fv(u, 1, GL_FALSE, (float*) proj);
}
}
}
static void setup_ff_lights(mat4 view, int lightStart, int lightCount) {
// Some drivers crash on glEnable(GL_LIGHTING) *AND* glDisable(GL_LIGHTING) for some reason
// Just don't do it unless fixed-function is explicitly on
if(!k3IsCore) {
static int lightCountOld = 0;
if(lightCountOld != lightCount) {
if(lightCount > lightCountOld) {
for(int i = lightCountOld; i < lightCount; i++) {
glEnable(GL_LIGHT0 + i);
}
} else {
for(int i = lightCount; i < lightCountOld; i++) {
glDisable(GL_LIGHT0 + i);
}
}
lightCountOld = lightCount;
}
glMatrixMode(GL_MODELVIEW);
glLoadMatrixf((float*) view); // Lighting in eye-space
if(LightCount)
glEnable(GL_LIGHTING);
else
glDisable(GL_LIGHTING);
for(int i = 0; i < lightCount; i++) {
glLightfv(GL_LIGHT0 + i, GL_POSITION, Lights[lightStart + i].dir.direction);
glLightfv(GL_LIGHT0 + i, GL_DIFFUSE, Lights[lightStart + i].color);
// Fixed-function does not support a light cutoff radius, so approximate, assuming 0.01 is black
glLightf(GL_LIGHT0 + i, GL_CONSTANT_ATTENUATION, 1);
glLightf(GL_LIGHT0 + i, GL_LINEAR_ATTENUATION, 0);
glLightf(GL_LIGHT0 + i, GL_QUADRATIC_ATTENUATION, 1.f / (Lights[lightStart + i].radius * Lights[lightStart + i].radius * 0.01));
}
}
}
static void setup_arbprog_globals() {
if(GLAD_GL_ARB_fragment_program) {
glProgramEnvParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, 0, Time, 0, 0, 0);
}
if(GLAD_GL_ARB_vertex_program) {
glProgramEnvParameter4fARB(GL_VERTEX_PROGRAM_ARB, 0, Time, 0, 0, 0);
}
}
static void setup_glsl_globals(struct k3GLSLP *p, mat4 view) {
if(!k3IsCore) {
glUniform1fARB(k3ProgramGetUId(p, "u_time"), Time);
glUniform3fARB(k3ProgramGetUId(p, "u_cam"), CamMat[3][0], CamMat[3][1], CamMat[3][2]);
glUniformMatrix4fvARB(k3ProgramGetUId(p, "u_view"), 1, GL_FALSE, (float*) view);
} else {
glUniform1f(k3ProgramGetUId(p, "u_time"), Time);
glUniform3f(k3ProgramGetUId(p, "u_cam"), CamMat[3][0], CamMat[3][1], CamMat[3][2]);
glUniformMatrix4fv(k3ProgramGetUId(p, "u_view"), 1, GL_FALSE, (float*) view);
}
}
static void setup_glsl_mat_uniforms(struct k3GLSLP *p, struct k3Mat *mat, int pass) {
for(int u = 0; u < mat->passes[pass].glsl.uCount; u++) {
GLuint id = k3ProgramGetUId(p, mat->passes[pass].glsl.u[u].name);
if(mat->passes[pass].glsl.u[u].type == k3_MAT_UNIFORM_I1) {
if(!k3IsCore) {
glUniform1iARB(id, mat->passes[pass].glsl.u[u].i1);
} else {
glUniform1i(id, mat->passes[pass].glsl.u[u].i1);
}
} else if(mat->passes[pass].glsl.u[u].type == k3_MAT_UNIFORM_F1) {
if(!k3IsCore) {
glUniform1fARB(id, mat->passes[pass].glsl.u[u].f1);
} else {
glUniform1f(id, mat->passes[pass].glsl.u[u].f1);
}
}
}
}
static void setup_glsl_model_uniforms(struct k3GLSLP *p, float *modelmat) {
mat4 invmodel;
glm_mat4_inv(modelmat, invmodel);
if(!k3IsCore) {
glUniformMatrix4fvARB(k3ProgramGetUId(p, "u_model"), 1, GL_FALSE, (float*) modelmat);
glUniformMatrix4fvARB(k3ProgramGetUId(p, "u_imodel"), 1, GL_FALSE, (float*) invmodel);
} else {
glUniformMatrix4fv(k3ProgramGetUId(p, "u_model"), 1, GL_FALSE, (float*) modelmat);
glUniformMatrix4fv(k3ProgramGetUId(p, "u_imodel"), 1, GL_FALSE, (float*) invmodel);
}
}
static void setup_glsl_lighting_uniforms(struct k3GLSLP *p, int lightsStart, int lightsCount) {
if(lightsCount > 4) {
lightsCount = 4;
k3Log(k3_ERR, "Max 4 lights per pass");
}
if(lightsStart >= LightCount) {
lightsStart = 0;
lightsCount = 0;
}
if(lightsStart + lightsCount > LightCount) {
lightsCount = LightCount - lightsStart;
}
if(lightsCount) {
// raise(SIGINT);
}
vec4 settings1[4] = {};
vec4 settings2[4] = {};
vec4 colors[4] = {};
for(int i = 0; i < lightsCount; i++) {
struct k3Light *l = &Lights[i + lightsStart];
if(l->type == k3_DIRECTIONAL) {
glm_vec4_copy(l->dir.direction, settings1[i]);
settings2[i][0] = l->dir.cascadeCount;
} else if(l->type == k3_SPOT) {
glm_vec4_copy(l->spot.position, settings1[i]);
float inclination = acosf(l->spot.direction[1]);
float azimuth = atan2f(l->spot.direction[2], l->spot.direction[0]);
settings2[i][0] = inclination;
settings2[i][1] = azimuth;
settings2[i][2] = cosf(l->spot.angle);
settings2[i][3] = l->radius;
} else if(l->type == k3_HALF_OMNI) {
glm_vec4_copy(l->omni.position, settings1[i]);
settings1[i][3] = 2;
settings2[i][3] = l->radius;
} else if(l->type == k3_OMNI) {
glm_vec4_copy(l->omni.position, settings1[i]);
settings1[i][3] = 3;
settings2[i][3] = l->radius;
}
memcpy(colors[i], Lights[lightsStart + i].color, sizeof(vec4));
}
vec4 ambient = {0, 0, 0, 0};
if(lightsStart == 0) {
glm_vec4_copy((vec4) {0.018, 0.018, 0.040, 0}, ambient);
}
if(!k3IsCore) {
glUniform4fvARB(k3ProgramGetUId(p, "u_BaseLightSettings1"), 4, (float*) settings1);
glUniform4fvARB(k3ProgramGetUId(p, "u_BaseLightSettings2"), 4, (float*) settings2);
glUniform4fvARB(k3ProgramGetUId(p, "u_BaseLightColors"), 4, (float*) colors);
glUniform4fvARB(k3ProgramGetUId(p, "u_AmbientLight"), 1, (float*) ambient);
} else {
glUniform4fv(k3ProgramGetUId(p, "u_BaseLightSettings1"), 4, (float*) settings1);
glUniform4fv(k3ProgramGetUId(p, "u_BaseLightSettings2"), 4, (float*) settings2);
glUniform4fv(k3ProgramGetUId(p, "u_BaseLightColors"), 4, (float*) colors);
glUniform4fv(k3ProgramGetUId(p, "u_AmbientLight"), 1, (float*) ambient);
}
}
static void setup_glsl_shadow_uniforms(struct k3GLSLP *p, int atlasUnit, int lightsStart, int lightsCount) {
if(lightsCount > 4) {
lightsCount = 4;
k3Log(k3_ERR, "Max 4 lights per pass");
}
if(!ShadowAtlas) return;
if(LightShadowIrregularMode) {
assert(k3IsCore);
glUniform1i(k3ProgramGetUId(p, "u_pixelsinshadow"), 0);
} else {
size_t vpi = 0;
mat4 m[6];
memset(m, 0, sizeof(m));
vec4 seg[4];
for(int i = 0; i < lightsCount; i++) {
for(int z = 0; z < LightShadows[i + lightsStart].vpCount; z++) {
glm_mat4_copy(LightShadows[i + lightsStart].vp[z], m[vpi]);
vpi++;
}
glm_vec4_copy(LightShadows[i + lightsStart].atlasSegment, seg[i]);
}
// Make it look outside the atlas completely
for(int i = lightsCount; i < 4; i++) {
seg[i][0] = -1;
seg[i][1] = -1;
seg[i][2] = -1;
seg[i][3] = -1;
}
if(!k3IsCore) {
glUniformMatrix4fvARB(k3ProgramGetUId(p, "u_shadows0vp"), vpi, GL_FALSE, (float*) m);
glUniform4fvARB(k3ProgramGetUId(p, "u_shadows0seg"), 4, (float*) seg);
glUniform1iARB(k3ProgramGetUId(p, "u_shadows0atlas"), atlasUnit);
} else {
glUniformMatrix4fv(k3ProgramGetUId(p, "u_shadows0vp"), vpi, GL_FALSE, (float*) m);
glUniform4fv(k3ProgramGetUId(p, "u_shadows0seg"), 4, (float*) seg);
glUniform1i(k3ProgramGetUId(p, "u_shadows0atlas"), atlasUnit);
}
}
}
static int bind_mat_textures(struct k3Mat *mat, int pass) {
int i;
if(GLAD_GL_ARB_direct_state_access) {
for(i = 0; i < k3_MAX_GLSL_UNITS; i++) {
GLuint tex = GL_FROM_K3TEX(mat->passes[pass].units[i]);
if(tex == 0) {
break;
}
glBindTextureUnit(i, tex);
}
} else if(GLAD_GL_EXT_direct_state_access) {
for(i = 0; i < k3_MAX_GLSL_UNITS; i++) {
GLuint tex = GL_FROM_K3TEX(mat->passes[pass].units[i]);
if(tex == 0) {
break;
}
glBindMultiTextureEXT(GL_TEXTURE0 + i, GL_TEXTURE_2D, tex);
}
} else {
for(i = 0; i < k3_MAX_GLSL_UNITS; i++) {
GLuint tex = GL_FROM_K3TEX(mat->passes[pass].units[i]);
if(tex == 0) {
break;
}
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, GL_FROM_K3TEX(mat->passes[pass].units[i]));
}
}
return i;
}
static int bind_shadow_texture(int textureUnit) {
if(LightShadowIrregularMode) {
if(glBindImageTexture) {
glBindImageTexture(0, IrregPixelsInShadow, 0, GL_FALSE, 0, GL_READ_WRITE_ARB, GL_R32UI);
} else {
glBindImageTextureEXT(0, IrregPixelsInShadow, 0, GL_FALSE, 0, GL_READ_WRITE_ARB, GL_R32UI);
}
return textureUnit;
} else {
if(GLAD_GL_ARB_direct_state_access) {
glBindTextureUnit(textureUnit, GL_FROM_K3TEX(ShadowAtlas->depth));
} else if(GLAD_GL_EXT_direct_state_access) {
glBindMultiTextureEXT(GL_TEXTURE0 + textureUnit, GL_TEXTURE_2D, GL_FROM_K3TEX(ShadowAtlas->depth));
} else {
glActiveTexture(GL_TEXTURE0 + textureUnit);
glBindTexture(GL_TEXTURE_2D, GL_FROM_K3TEX(ShadowAtlas->depth));
}
return ++textureUnit;
}
}
static void enable_glsl_bones(struct k3GLSLP *p, struct k3Mdl *mdl, struct k3AnimationBone *bones) {
GLint a0;
GLint a1;
if(!k3IsCore) {
a0 = glGetAttribLocationARB(p->handle, "a_boneids");
a1 = glGetAttribLocationARB(p->handle, "a_boneweights");
} else {
a0 = glGetAttribLocation(p->handle, "a_boneids");
a1 = glGetAttribLocation(p->handle, "a_boneweights");
}
if((a0 == -1) != (a1 == -1)) {
k3Log(k3_ERR, "a_boneids and a_boneweights must be both null or non-null");
}
if(a0 != -1) {
if(bones) {
if(!k3IsCore) {
glEnableVertexAttribArrayARB(a0);
glEnableVertexAttribArrayARB(a1);
glVertexAttribPointerARB(a0, 4, GL_UNSIGNED_BYTE, GL_FALSE, 0, (void*) (mdl->offB + 0 * mdl->verts));
glVertexAttribPointerARB(a1, 4, GL_UNSIGNED_SHORT, GL_TRUE, 0, (void*) (mdl->offB + 4 * mdl->verts));
} else {
glEnableVertexAttribArray(a0);
glEnableVertexAttribArray(a1);
glVertexAttribPointer(a0, 4, GL_UNSIGNED_BYTE, GL_FALSE, 0, (void*) (mdl->offB + 0 * mdl->verts));
glVertexAttribPointer(a1, 4, GL_UNSIGNED_SHORT, GL_TRUE, 0, (void*) (mdl->offB + 4 * mdl->verts));
}
} else {
if(!k3IsCore) {
glVertexAttrib4fARB(a0, 0, 0, 0, 0);
glVertexAttrib4fARB(a1, 1, 0, 0, 0);
} else {
glVertexAttrib4f(a0, 0, 0, 0, 0);
glVertexAttrib4f(a1, 1, 0, 0, 0);
}
}
}
if(bones) {
if(!k3IsCore) {
glUniform4fvARB(k3ProgramGetUId(p, "u_bonedata"), 2 * mdl->boneCount, (float*) bones);
} else {
glUniform4fv(k3ProgramGetUId(p, "u_bonedata"), 2 * mdl->boneCount, (float*) bones);
}
} else {
vec4 data[48] = {};
for(int i = 0; i < 48; i++) {
data[i][3] = 1;
}
if(!k3IsCore) {
glUniform4fvARB(k3ProgramGetUId(p, "u_bonedata"), 48, (float*) data);
} else {
glUniform4fv(k3ProgramGetUId(p, "u_bonedata"), 48, (float*) data);
}
}
}
static void disable_glsl_bones(struct k3Mdl *mdl, struct k3GLSLP *p) {
GLint a0;
GLint a1;
a0 = glGetAttribLocationARB(p->handle, "a_boneids");
a1 = glGetAttribLocationARB(p->handle, "a_boneweights");
if(a0 != -1) {
if(!k3IsCore) {
glDisableVertexAttribArrayARB(a0);
glDisableVertexAttribArrayARB(a1);
} else {
glDisableVertexAttribArray(a0);
glDisableVertexAttribArray(a1);
}
}
}
static void enable_glsl_tangents(struct k3GLSLP *p, struct k3Mdl *mdl) {
if(mdl->offT == -1) {
return;
}
GLint a;
if(!k3IsCore) {
a = glGetAttribLocationARB(p->handle, "a_tangent");
} else {
a = glGetAttribLocation(p->handle, "a_tangent");
}
if(a == -1) {
return;
}
if(!k3IsCore) {
glEnableVertexAttribArrayARB(a);
glVertexAttribPointerARB(a, 3, GL_BYTE, GL_TRUE, 0, (void*) mdl->offT);
} else {
glEnableVertexAttribArray(a);
glVertexAttribPointer(a, 3, GL_BYTE, GL_TRUE, 0, (void*) mdl->offT);
}
}
static void disable_glsl_tangents(struct k3GLSLP *p, struct k3Mdl *mdl) {
if(mdl->offT == -1) {
return;
}
GLint a;
if(!k3IsCore) {
a = glGetAttribLocationARB(p->handle, "a_tangent");
} else {
a = glGetAttribLocation(p->handle, "a_tangent");
}
if(a == -1) {
return;
}
if(!k3IsCore) {
glDisableVertexAttribArrayARB(a);
} else {
glDisableVertexAttribArray(a);
}
}
static void enable_vertex_buffers(struct k3Mdl *mdl, struct k3GLSLP *p) {
if(!k3IsCore) {
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_NORMAL_ARRAY);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
glVertexPointer(3, GL_FLOAT, 0, (void*) (uintptr_t) mdl->offV);
glNormalPointer(GL_BYTE, 0, (void*) (uintptr_t) mdl->offN);
glTexCoordPointer(2, GL_FLOAT, 0, (void*) (uintptr_t) mdl->offU);
if(mdl->offC != -1) {
glEnableClientState(GL_COLOR_ARRAY);
glColorPointer(4, GL_UNSIGNED_BYTE, 0, (void*) (uintptr_t) mdl->offC);
} else {
glColor4f(1, 1, 1, 1);
}
} else if(p) {
GLint aPos = glGetAttribLocation(p->handle, "a_pos");
GLint aNormal = glGetAttribLocation(p->handle, "a_normal");
GLint aUv = glGetAttribLocation(p->handle, "a_uv");
GLint aColor = glGetAttribLocation(p->handle, "a_color");
if(aPos != -1) {
glEnableVertexAttribArray(aPos);
glVertexAttribPointer(aPos, 3, GL_FLOAT, GL_FALSE, 0, (void*) (uintptr_t) mdl->offV);
}
if(aNormal != -1) {
glEnableVertexAttribArray(aNormal);
glVertexAttribPointer(aNormal, 3, GL_BYTE, GL_TRUE, 0, (void*) (uintptr_t) mdl->offN);
}
if(aUv != -1) {
glEnableVertexAttribArray(aUv);
glVertexAttribPointer(aUv, 2, GL_FLOAT, GL_FALSE, 0, (void*) (uintptr_t) mdl->offU);
}
if(mdl->offC != -1 && aColor != -1) {
glEnableVertexAttribArray(aColor);
glVertexAttribPointer(aColor, 4, GL_UNSIGNED_BYTE, GL_TRUE, 0, (void*) (uintptr_t) mdl->offC);
} else if(aColor != -1) {
glVertexAttrib4f(aColor, 1, 1, 1, 1);
}
}
}
static void disable_vertex_buffers(struct k3Mdl *mdl, struct k3GLSLP *p) {
if(!k3IsCore) {
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_NORMAL_ARRAY);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
if(mdl->offC != -1) {
glDisableClientState(GL_COLOR_ARRAY);
}
} else if(p) {
GLint aPos = glGetAttribLocation(p->handle, "a_pos");
GLint aNormal = glGetAttribLocation(p->handle, "a_normal");
GLint aUv = glGetAttribLocation(p->handle, "a_uv");
GLint aColor = glGetAttribLocation(p->handle, "a_color");
if(aPos != -1) {
glDisableVertexAttribArray(aPos);
}
if(aNormal != -1) {
glDisableVertexAttribArray(aNormal);
}
if(aUv != -1) {
glDisableVertexAttribArray(aUv);
}
if(mdl->offC != -1 && aColor != -1) {
glDisableVertexAttribArray(aColor);
}
}
}
static void push_aabb(struct k3Mat *mat, int pass, struct k3Mdl *mdl) {
float aabb = mat->passes[pass].aabb;
vec3 verts[] = {
-aabb + mdl->aabb[0][0], -aabb + mdl->aabb[0][1], -aabb + mdl->aabb[0][2],
-aabb + mdl->aabb[0][0], +aabb + mdl->aabb[1][1], -aabb + mdl->aabb[0][2],
+aabb + mdl->aabb[1][0], -aabb + mdl->aabb[0][1], -aabb + mdl->aabb[0][2],
-aabb + mdl->aabb[0][0], +aabb + mdl->aabb[1][1], -aabb + mdl->aabb[0][2],
+aabb + mdl->aabb[1][0], +aabb + mdl->aabb[1][1], -aabb + mdl->aabb[0][2],
+aabb + mdl->aabb[1][0], -aabb + mdl->aabb[0][1], -aabb + mdl->aabb[0][2],
-aabb + mdl->aabb[0][0], -aabb + mdl->aabb[0][1], +aabb + mdl->aabb[1][2],
+aabb + mdl->aabb[1][0], -aabb + mdl->aabb[0][1], +aabb + mdl->aabb[1][2],
-aabb + mdl->aabb[0][0], +aabb + mdl->aabb[1][1], +aabb + mdl->aabb[1][2],
+aabb + mdl->aabb[1][0], -aabb + mdl->aabb[0][1], +aabb + mdl->aabb[1][2],
+aabb + mdl->aabb[1][0], +aabb + mdl->aabb[1][1], +aabb + mdl->aabb[1][2],
-aabb + mdl->aabb[0][0], +aabb + mdl->aabb[1][1], +aabb + mdl->aabb[1][2],
-aabb + mdl->aabb[0][0], -aabb + mdl->aabb[0][1], +aabb + mdl->aabb[1][2],
-aabb + mdl->aabb[0][0], +aabb + mdl->aabb[1][1], +aabb + mdl->aabb[1][2],
-aabb + mdl->aabb[0][0], -aabb + mdl->aabb[0][1], -aabb + mdl->aabb[0][2],
-aabb + mdl->aabb[0][0], +aabb + mdl->aabb[1][1], +aabb + mdl->aabb[1][2],
-aabb + mdl->aabb[0][0], +aabb + mdl->aabb[1][1], -aabb + mdl->aabb[0][2],
-aabb + mdl->aabb[0][0], -aabb + mdl->aabb[0][1], -aabb + mdl->aabb[0][2],
+aabb + mdl->aabb[1][0], -aabb + mdl->aabb[0][1], +aabb + mdl->aabb[1][2],
+aabb + mdl->aabb[1][0], -aabb + mdl->aabb[0][1], -aabb + mdl->aabb[0][2],
+aabb + mdl->aabb[1][0], +aabb + mdl->aabb[1][1], +aabb + mdl->aabb[1][2],
+aabb + mdl->aabb[1][0], -aabb + mdl->aabb[0][1], -aabb + mdl->aabb[0][2],
+aabb + mdl->aabb[1][0], +aabb + mdl->aabb[1][1], -aabb + mdl->aabb[0][2],
+aabb + mdl->aabb[1][0], +aabb + mdl->aabb[1][1], +aabb + mdl->aabb[1][2],
-aabb + mdl->aabb[0][0], +aabb + mdl->aabb[1][1], +aabb + mdl->aabb[1][2],
+aabb + mdl->aabb[1][0], +aabb + mdl->aabb[1][1], +aabb + mdl->aabb[1][2],
-aabb + mdl->aabb[0][0], +aabb + mdl->aabb[1][1], -aabb + mdl->aabb[0][2],
+aabb + mdl->aabb[1][0], +aabb + mdl->aabb[1][1], +aabb + mdl->aabb[1][2],
+aabb + mdl->aabb[1][0], +aabb + mdl->aabb[1][1], -aabb + mdl->aabb[0][2],
-aabb + mdl->aabb[0][0], +aabb + mdl->aabb[1][1], -aabb + mdl->aabb[0][2],
-aabb + mdl->aabb[0][0], -aabb + mdl->aabb[0][1], +aabb + mdl->aabb[1][2],
-aabb + mdl->aabb[0][0], -aabb + mdl->aabb[0][1], -aabb + mdl->aabb[0][2],
+aabb + mdl->aabb[1][0], -aabb + mdl->aabb[0][1], +aabb + mdl->aabb[1][2],
-aabb + mdl->aabb[0][0], -aabb + mdl->aabb[0][1], -aabb + mdl->aabb[0][2],
+aabb + mdl->aabb[1][0], -aabb + mdl->aabb[0][1], -aabb + mdl->aabb[0][2],
+aabb + mdl->aabb[1][0], -aabb + mdl->aabb[0][1], +aabb + mdl->aabb[1][2],
};
if(!k3IsCore) {
glBegin(GL_TRIANGLES);
for(int i = 0; i < sizeof(verts) / sizeof(*verts); i++) {
glVertex3f(verts[i][0], verts[i][1], verts[i][2]);
}
glEnd();
} else {
static GLuint vbo;
if(!vbo) {
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(verts), verts, GL_DYNAMIC_DRAW);
} else {
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(verts), verts);
}
GLint aPos = glGetAttribLocation(GL_FROM_K3GLSL(mat->passes[0].glsl.hp), "a_pos");
glEnableVertexAttribArray(aPos);
glVertexAttribPointer(aPos, 3, GL_FLOAT, GL_FALSE, 0, (void*) 0);
glDrawArrays(GL_TRIANGLES, 0, sizeof(verts) / sizeof(*verts));
glDisableVertexAttribArray(aPos);
}
}
static void apply_cpu_skinning(struct k3Mdl *mdl, struct k3AnimationBone *bones) {
if(mdl->boneCount == 0) {
return;
}
vec3 *boned = alloca(mdl->verts * sizeof(*boned));
for(size_t i = 0; i < mdl->verts; i++) {
glm_vec3_zero(boned[i]);
for(size_t boner = 0; boner < 4; boner++) {
int bone = mdl->cpuSkinning.boneids[i * 4 + boner];
float weight = mdl->cpuSkinning.boneweights[i * 4 + boner] / 65535.f;
vec3 a;
glm_vec3_scale((float*) bones[bone].rotation, 2 * glm_vec3_dot(mdl->cpuSkinning.pos[i], (float*) bones[bone].rotation), a);
vec3 b;
glm_vec3_scale(mdl->cpuSkinning.pos[i], bones[bone].rotation[3] * bones[bone].rotation[3] - glm_vec3_dot((float*) bones[bone].rotation, (float*) bones[bone].rotation), b);
vec3 c;
glm_vec3_cross((float*) bones[bone].rotation, mdl->cpuSkinning.pos[i], c);
glm_vec3_scale(c, 2 * bones[bone].rotation[3], c);
vec3 v;
glm_vec3_add(a, b, v);
glm_vec3_add(v, c, v);
glm_vec3_add(v, (float*) bones[bone].translation, v);
glm_vec3_muladds(v, weight, boned[i]);
}
}
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, mdl->offV, mdl->verts * sizeof(*boned), boned);
}
static bool outside_frustum(vec3 *aabb, float *modelmat, vec4 *frustum) {
vec3 transformedAABB[2];
glm_aabb_transform(aabb, modelmat, transformedAABB);
return !glm_aabb_frustum(transformedAABB, frustum);
}
static void forward_subpass(mat4 projection, mat4 view, int transparent, int lightsStart, int lightsCount, size_t rbleStart, size_t rbleEnd) {
struct k3Timer timer = k3StartTimer("forward_subpass");
setup_ff_lights(view, lightsStart, lightsCount);
mat4 viewProj;
glm_mat4_mul(projection, view, viewProj);
vec4 cameraFrustum[6];
glm_frustum_planes(viewProj, cameraFrustum);
struct k3GLSLP *lastGLSLP = NULL;
GLuint lastVP = 0, lastFP = 0;
struct k3Mat *lastMaterial = NULL;
int lastAdditive = -1;
int lastDepthwrite = -1;
for(size_t rble = rbleStart; rble < rbleEnd; rble++) {
struct k3Mdl *mdl = renderQueue[rble].mdl;
struct k3Mesh *mesh = renderQueue[rble].mesh;
float *modelmat = (float*) renderQueue[rble].modelmat;
struct k3AnimationBone *bones = renderQueue[rble].bones;
struct k3GLSLP *glslp = renderQueue[rble].glslp;
GLuint arbvp = renderQueue[rble].arbvp;
GLuint arbfp = renderQueue[rble].arbfp;
if(outside_frustum(mdl->aabb, modelmat, cameraFrustum)) {
continue;
}
struct k3Mat *mat = &mesh->mat;
if(mat->passes[0].additive && lightsStart != 0) {
// Additive materials aren't affected by lighting. Do not draw more than once.
continue;
}
if(mat->passes[0].transparent != transparent) {
continue;
}
if(lastAdditive != mat->passes[0].additive) {
if(mat->passes[0].additive) {
glDepthMask(GL_FALSE);
glBlendFuncSeparate(GL_ONE, GL_ONE, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
} else {
glDepthMask(GL_TRUE);
if(transparent) {
if(lightsStart == 0) {
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
} else {
glBlendFunc(GL_SRC_ALPHA, GL_ONE);
}
} else {
if(lightsStart == 0) {
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
} else {
glBlendFunc(GL_ONE, GL_ONE);
}
}
}
lastAdditive = mat->passes[0].additive;
}
if(lastDepthwrite != mat->passes[0].depthwrite) {
lastDepthwrite = mat->passes[0].depthwrite;
if(lastDepthwrite) {
glDepthMask(GL_TRUE);
} else {
glDepthMask(GL_FALSE);
}
}
if(glslp) {
if(lastGLSLP != glslp) {
if(k3IsCore)
glUseProgram(glslp->handle);
else
glUseProgramObjectARB(glslp->handle);
lastGLSLP = glslp;
setup_glsl_globals(glslp, view);
}
setup_core_projection(glslp, ProjMat);
setup_glsl_mat_uniforms(glslp, mat, 0);
setup_glsl_shadow_uniforms(glslp, mat->passes[0].unitsUsed, lightsStart, lightsCount);
setup_glsl_lighting_uniforms(glslp, lightsStart, lightsCount);
setup_glsl_model_uniforms(glslp, modelmat);
if(mat != lastMaterial) {
bind_mat_textures(mat, 0);
bind_shadow_texture(mat->passes[0].unitsUsed);
}
} else if(!k3IsCore) {
if(lastGLSLP && GLAD_GL_ARB_shading_language_100) {
glUseProgramObjectARB(0);
}
lastGLSLP = NULL;
if(lastVP != arbvp) {
if(lastVP && !arbvp) {
glDisable(GL_VERTEX_PROGRAM_ARB);
} else if(!lastVP && arbvp) {
glEnable(GL_VERTEX_PROGRAM_ARB);
}
if(arbvp) {
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, arbvp);
}
lastVP = arbvp;
}
if(lastFP != arbfp) {
if(lastFP && !arbfp) {
glDisable(GL_FRAGMENT_PROGRAM_ARB);
} else if(!lastFP && arbfp) {
glEnable(GL_FRAGMENT_PROGRAM_ARB);
}
if(arbfp) {
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, arbfp);
if(mat != lastMaterial) {
bind_mat_textures(mat, 0);
}
}
lastFP = arbfp;
}
if(!k3IsCore && !arbfp) {
glActiveTexture(GL_TEXTURE0);
if(mat->passes[0].units[0]) {
glEnable(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D, GL_FROM_K3TEX(mat->passes[0].units[0]));
glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT_AND_DIFFUSE, (float[]) {1, 1, 1, 1});
} else {
glDisable(GL_TEXTURE_2D);
glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT_AND_DIFFUSE, mat->primitive.diffuse);
}
glMaterialfv(GL_FRONT_AND_BACK, GL_SPECULAR, mat->primitive.specular);
glMaterialfv(GL_FRONT_AND_BACK, GL_EMISSION, mat->primitive.emission);
glMateriali(GL_FRONT_AND_BACK, GL_SHININESS, mat->primitive.shininess);
}
}
lastMaterial = mat;
if(mat->passes[0].nocull) {
glDisable(GL_CULL_FACE);
} else {
glEnable(GL_CULL_FACE);
}
if(!k3IsCore) {
if(mat->passes[0].alphatest) {
glEnable(GL_ALPHA_TEST);
glAlphaFunc(GL_GREATER, 0.9f);
} else {
glDisable(GL_ALPHA_TEST);
}
}
if(!k3IsCore) {
glMatrixMode(GL_MODELVIEW);
mat4 modelview;
glm_mat4_mul(view, modelmat, modelview);
glLoadMatrixf((float*) modelview);
}
glBindBufferARB(GL_ARRAY_BUFFER_ARB, mdl->vstore->gl);
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, mdl->estore->gl);
if(glslp) {
enable_glsl_bones(glslp, mdl, bones);
enable_glsl_tangents(glslp, mdl);
}
if(k3IsSoftSkinning && bones) {
apply_cpu_skinning(mdl, bones);
}
if(isnanf(mat->passes[0].aabb)) {
enable_vertex_buffers(mdl, glslp);
glDrawElements(GL_TRIANGLES, mesh->idxNumber, GL_UNSIGNED_SHORT, (void*) (mesh->idxStart * 2));
disable_vertex_buffers(mdl, glslp);
} else {
push_aabb(mat, 0, mdl);
}
if(glslp) {
disable_glsl_tangents(glslp, mdl);
disable_glsl_bones(mdl, glslp);
}
}
k3EndTimer(timer);
}
void k3PassForward(mat4 projection, mat4 cam) {
struct k3Timer timer = k3StartTimer("k3PassForward");
glm_mat4_copy(cam, CamMat);
glm_mat4_copy(projection, ProjMat);
queuesort();
mat4 view;
glm_mat4_inv_fast(cam, view);
setup_ff_projection(projection);
setup_arbprog_globals();
glEnable(GL_DEPTH_TEST);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDepthFunc(GL_LEQUAL);
if(GLAD_GL_EXT_framebuffer_sRGB) {
glEnable(GL_FRAMEBUFFER_SRGB_EXT);
}
glEnable(GL_BLEND);
glEnable(GL_MULTISAMPLE);
glEnable(GL_CULL_FACE);
glFrontFace(GL_CCW);
if(!k3IsCore) {
glEnable(GL_NORMALIZE);
glEnable(GL_COLOR_MATERIAL);
glColorMaterial(GL_FRONT_AND_BACK, GL_AMBIENT_AND_DIFFUSE);
}
int l = 0, k = LightCount;
while(1) {
forward_subpass(projection, view, 0, l, k > 4 ? 4 : k, 0, renderQueueSize);
l += 4;
k -= 4;
if(k <= 0) {
break;
}
}
// Horribly inefficient.
for(size_t rble = 0; rble < renderQueueSize; rble++) {
l = 0, k = LightCount;
while(1) {
forward_subpass(projection, view, 1, l, k > 4 ? 4 : k, rble, rble + 1);
l += 4;
k -= 4;
if(k <= 0) {
break;
}
}
}
k3EndTimer(timer);
}
void k3PassDepthOnly(mat4 projection, mat4 cam, int clear, int cull) {
struct k3Timer timer = k3StartTimer("k3PassDepthOnly");
glm_mat4_copy(cam, CamMat);
glm_mat4_copy(projection, ProjMat);
queuesort();
mat4 view;
glm_mat4_inv_fast(cam, view);
setup_ff_projection(projection);
setup_arbprog_globals();
mat4 viewProj;
glm_mat4_mul(projection, view, viewProj);
vec4 cameraFrustum[6];
glm_frustum_planes(viewProj, cameraFrustum);
glEnable(GL_DEPTH_TEST);
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
glDepthMask(GL_TRUE);
glDepthFunc(GL_LESS);
if(clear) {
glClear(GL_DEPTH_BUFFER_BIT);
}
if(GLAD_GL_ARB_fragment_program)
glDisable(GL_FRAGMENT_PROGRAM_ARB);
if(GLAD_GL_ARB_vertex_program)
glDisable(GL_VERTEX_PROGRAM_ARB);
glFrontFace(GL_CCW);
if(cull) {
glEnable(GL_CULL_FACE);
} else {
glDisable(GL_CULL_FACE);
}
GLuint lastVP = 0;
struct k3GLSLP *lastGLSLP = NULL;
for(size_t rble = 0; rble < renderQueueSize; rble++) {
struct k3Mdl *mdl = renderQueue[rble].mdl;
struct k3Mesh *mesh = renderQueue[rble].mesh;
float *modelmat = (float*) renderQueue[rble].modelmat;
struct k3Mat *mat = &mesh->mat;
struct k3AnimationBone *bones = renderQueue[rble].bones;
if(mat->passes[0].additive || mat->passes[0].transparent) {
continue;
}
if(outside_frustum(mdl->aabb, modelmat, cameraFrustum)) {
continue;
}
struct k3GLSLP *glslp = renderQueue[rble].glslp;
GLuint arbvp = renderQueue[rble].arbvp;
if(glslp) {
if(lastGLSLP != glslp) {
if(k3IsCore)
glUseProgram(glslp->handle);
else
glUseProgramObjectARB(glslp->handle);
lastGLSLP = glslp;
setup_glsl_globals(glslp, view);
}
setup_core_projection(glslp, projection);
bind_mat_textures(mat, 0);
setup_glsl_mat_uniforms(glslp, mat, 0);
setup_glsl_model_uniforms(glslp, modelmat);
} else if(!k3IsCore) {
if(lastGLSLP && GLAD_GL_ARB_shading_language_100) {
if(k3IsCore)
glUseProgram(0);
else
glUseProgramObjectARB(0);
}
lastGLSLP = NULL;
if(arbvp != lastVP) {
if(arbvp && !lastVP) {
glEnable(GL_VERTEX_PROGRAM_ARB);
} else if(!arbvp && lastVP) {
glDisable(GL_VERTEX_PROGRAM_ARB);
}
if(arbvp) {
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, arbvp);
}
lastVP = arbvp;
}
}
if(!k3IsCore) {
glMatrixMode(GL_MODELVIEW);
mat4 modelview;
glm_mat4_mul(view, modelmat, modelview);
glLoadMatrixf((float*) modelview);
}
if(!k3IsCore) {
if(mat->passes[0].alphatest) {
glEnable(GL_ALPHA_TEST);
glAlphaFunc(GL_GREATER, 0.9f);
} else {
glDisable(GL_ALPHA_TEST);
}
}
glBindBufferARB(GL_ARRAY_BUFFER_ARB, mdl->vstore->gl);
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, mdl->estore->gl);
if(glslp) {
enable_glsl_bones(glslp, mdl, bones);
}
if(k3IsSoftSkinning && bones) {
apply_cpu_skinning(mdl, bones);
}
if(isnanf(mat->passes[0].aabb)) {
enable_vertex_buffers(mdl, glslp);
glDrawElements(GL_TRIANGLES, mesh->idxNumber, GL_UNSIGNED_SHORT, (void*) (mesh->idxStart * 2));
disable_vertex_buffers(mdl, glslp);
} else {
push_aabb(mat, 0, mdl);
}
if(glslp) {
disable_glsl_bones(mdl, glslp);
}
}
glFrontFace(GL_CCW);
k3EndTimer(timer);
}
static void split_frustum(mat4 proj, int cascades, mat4 croppeds[]) {
float fovy = glm_persp_fovy(proj);
float aspect = glm_persp_aspect(proj);
float near, far;
glm_persp_decomp_near(proj, &near);
glm_persp_decomp_far(proj, &far);
float depthChunk = (far - near) / cascades;
for(int c = 0; c < cascades; c++) {
glm_perspective(fovy, aspect, near + c * depthChunk, near + (c + 1) * depthChunk, croppeds[c]);
}
}
struct LightView {
uint8_t cams;
uint8_t projs;
mat4 c[6];
mat4 p[3];
};
static size_t compute_light_views(struct LightView lv[]) {
size_t totalTilesUsed = 0;
for(int i = 0; i < LightCount; i++) {
struct k3Light *l = &Lights[i];
if(l->type == k3_DIRECTIONAL) {
size_t CASCADE_COUNT = l->dir.cascadeCount;
lv[i].projs = CASCADE_COUNT;
lv[i].cams = CASCADE_COUNT;
mat4 croppeds[CASCADE_COUNT];
split_frustum(ProjMat, CASCADE_COUNT, croppeds);
for(int cascade = 0; cascade < CASCADE_COUNT; cascade++) {
mat4 invmainproj;
glm_mat4_inv_fast(croppeds[cascade], invmainproj);
mat4 frustummat;
glm_mat4_mul(CamMat, invmainproj, frustummat);
vec4 corners[8];
glm_frustum_corners(frustummat, corners);
vec4 viewcenter;
glm_frustum_center(corners, viewcenter);
mat4 lightview;
glm_look_anyup(viewcenter, l->dir.direction, lightview);
vec4 minaabb = {+HUGE_VALF, +HUGE_VALF, +HUGE_VALF};
vec4 maxaabb = {-HUGE_VALF, -HUGE_VALF, -HUGE_VALF};
for(int c = 0; c < 8; c++) {
vec4 lightspaceCorner;
glm_mat4_mulv(lightview, corners[c], lightspaceCorner);
glm_vec4_minv(minaabb, lightspaceCorner, minaabb);
glm_vec4_maxv(maxaabb, lightspaceCorner, maxaabb);
}
glm_ortho(minaabb[0], maxaabb[0], minaabb[1], maxaabb[1], minaabb[2] - 50, maxaabb[2], lv[i].p[cascade]);
glm_mat4_inv_fast(lightview, lv[i].c[cascade]);
}
} else if(l->type == k3_SPOT || l->type == k3_HALF_OMNI) {
lv[i].projs = 1;
lv[i].cams = 1;
glm_perspective(l->spot.angle, 1, 0.1, l->radius, lv[i].p[0]);
mat4 view;
glm_look_anyup(l->spot.position, l->spot.direction, view);
glm_mat4_inv_fast(view, lv[i].c[0]);
} else if(l->type == k3_OMNI) {
static const vec3 dirs[] = {{0, 0, -1}, {0, 0, 1}, {-1, 0, 0}, {1, 0, 0}, {0, -1, 0}, {0, 1, 0}};
static const vec3 ups[] = {{0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 0, 1}, {0, 0, -1}};
lv[i].projs = 1;
lv[i].cams = 6;
mat4 proj;
glm_perspective(glm_rad(120), 1, 0.1, l->radius, proj);
glm_mat4_copy(proj, lv[i].p[0]);
for(int d = 0; d < 6; d++) {
mat4 view;
glm_look(l->omni.position, dirs[d], ups[d], view);
glm_mat4_inv_fast(view, lv[i].c[d]);
}
}
totalTilesUsed += lv[i].cams;
}
return totalTilesUsed;
}
#ifdef k3_IRREGULAR_SHADOWS
static void pass_irregular(int passnum, mat4 mainproj, mat4 maincam, mat4 lightproj, mat4 lightcam, int lightshadow, int layer) {
glm_mat4_copy(passnum == 1 ? maincam : lightcam, CamMat);
glm_mat4_copy(passnum == 1 ? mainproj : lightproj, ProjMat);
queuesort();
mat4 mainview;
glm_mat4_inv_fast(maincam, mainview);
mat4 lightview;
glm_mat4_inv_fast(lightcam, lightview);
mat4 lightvp;
glm_mat4_mul(lightproj, lightview, lightvp);
setup_ff_projection(passnum == 1 ? mainproj : lightproj);
setup_arbprog_globals();
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
// For debugging
if(layer == 4) {
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
}
glEnable(GL_DEPTH_TEST);
if(passnum == 1) {
glDepthMask(GL_FALSE);
glDepthFunc(GL_EQUAL);
glEnable(GL_CULL_FACE);
} else {
glDepthMask(GL_FALSE);
glDepthFunc(GL_LEQUAL);
glDisable(GL_CULL_FACE);
}
glFrontFace(GL_CCW);
if(GLAD_GL_ARB_fragment_program)
glDisable(GL_FRAGMENT_PROGRAM_ARB);
if(GLAD_GL_ARB_vertex_program)
glDisable(GL_VERTEX_PROGRAM_ARB);
GLuint lastVP = 0;
GLhandleARB lastGLSL = -1;
for(size_t rble = 0; rble < renderQueueSize; rble++) {
struct k3Mdl *mdl = renderQueue[rble].mdl;
struct k3Mesh *mesh = renderQueue[rble].mesh;
float *modelmat = (float*) renderQueue[rble].modelmat;
struct k3Mat *mat = &mesh->mat;
struct k3AnimationBone *bones = renderQueue[rble].bones;
if(mat->passes[0].additive || mat->passes[0].transparent) {
continue;
}
GLhandleARB glsl;
if(passnum == 1) {
glsl = GL_FROM_K3GLSL(mat->passes[0].glsl.hpIrreg1);
} else if(passnum == 2) {
glsl = GL_FROM_K3GLSL(mat->passes[0].glsl.hpIrreg2);
}
if(glsl) {
if(lastGLSL != glsl) {
if(k3IsCore)
glUseProgram(glsl);
else
glUseProgramObjectARB(glsl);
lastGLSL = glsl;
setup_glsl_globals(glsl, passnum == 1 ? mainview : lightview);
}
setup_core_projection(glsl, passnum == 1 ? mainproj : lightproj);
setup_glsl_mat_uniforms(glsl, mat, 0);
glBindBufferBaseEXT(GL_SHADER_STORAGE_BUFFER, 0, LightShadows[lightshadow].multimapEls);
if(passnum == 2) {
glUniform1iARB(glGetUniformLocationARB(glsl, "u_lightnum"), lightshadow);
}
if(glBindImageTexture) {
glBindImageTexture(0, LightShadows[lightshadow].multimapHeads, 0, GL_FALSE, layer, GL_READ_WRITE_ARB, GL_R32UI);
glBindImageTexture(1, IrregPixelsInShadow, 0, GL_FALSE, 0, GL_READ_WRITE_ARB, GL_R32UI);
} else {
glBindImageTextureEXT(0, LightShadows[lightshadow].multimapHeads, 0, GL_FALSE, layer, GL_READ_WRITE_ARB, GL_R32UI);
glBindImageTextureEXT(1, IrregPixelsInShadow, 0, GL_FALSE, 0, GL_READ_WRITE_ARB, GL_R32UI);
}
glUniformMatrix4fvARB(glGetUniformLocationARB(glsl, "u_lightvp"), 1, GL_FALSE, (float*) lightvp);
setup_glsl_model_uniforms(glsl, modelmat);
} else {
k3Log(k3_WARN, "Missing irregular shadow pass %i shader", passnum);
continue;
}
if(!k3IsCore) {
glMatrixMode(GL_MODELVIEW);
mat4 modelview;
glm_mat4_mul(passnum == 1 ? mainview : lightview, modelmat, modelview);
glLoadMatrixf((float*) modelview);
}
glBindBufferARB(GL_ARRAY_BUFFER_ARB, mdl->vstore->gl);
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, mdl->estore->gl);
if(glslp) {
enable_glsl_bones(glslp, mdl, bones);
}
if(k3IsSoftSkinning && bones) {
apply_cpu_skinning(mdl, bones);
}
if(isnanf(mat->passes[0].aabb)) {
enable_vertex_buffers(mdl, glslp);
glDrawElements(GL_TRIANGLES, mesh->idxNumber, GL_UNSIGNED_SHORT, (void*) (mesh->idxStart * 2));
disable_vertex_buffers(mdl, glslp);
} else {
push_aabb(mat, 0, mdl);
}
if(glslp) {
disable_glsl_bones(mdl, glslp);
}
}
}
#define MULTIMAP_HEADS_SIZE 64
void k3PassIrregular(struct k3Offscreen *mainview, mat4 mainproj, mat4 maincam) {
static size_t cachedMainViewResolution = 0;
size_t mainViewWidth = mainview ? k3TexSzX(mainview->diffuse) : MainWidth;
size_t mainViewHeight = mainview ? k3TexSzY(mainview->diffuse) : MainHeight;
size_t mainViewResolution = mainViewWidth * mainViewHeight;
if(!IrregPixelsInShadow || cachedMainViewResolution != mainViewResolution) {
if(!IrregPixelsInShadow) {
glGenTextures(1, &IrregPixelsInShadow);
}
glBindTexture(GL_TEXTURE_2D, IrregPixelsInShadow);
glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, mainViewWidth, mainViewHeight, 0, GL_RED_INTEGER_EXT, GL_UNSIGNED_INT, NULL);
}
mat4 *projs;
mat4 *cams;
char *isCube;
count = compute_light_views(&projs, &cams, &isCube);
LightShadowsCount = count;
if(LightShadowsCount > LightShadowsCapacity) {
_mm_free(LightShadows);
LightShadows = _mm_malloc(sizeof(*LightShadows) * LightShadowsCount, 16);
memset(LightShadows, 0, sizeof(*LightShadows) * LightShadowsCount);
LightShadowsCapacity = LightShadowsCount;
}
LightShadowIrregularMode = true;
int ls = -1, layer = 0;
// TODO: document :)
for(int lv = 0; lv < count; lv++) {
if(isCube[lv]) {
layer++;
} else {
layer = 0;
ls++;
}
if(!isCube[lv]) {
if(!LightShadows[ls].multimapEls || cachedMainViewResolution != mainViewResolution) {
if(!LightShadows[ls].multimapEls) {
glGenBuffersARB(1, &LightShadows[ls].multimapEls);
}
glBindBufferARB(GL_SHADER_STORAGE_BUFFER, LightShadows[ls].multimapEls);
glBufferDataARB(GL_SHADER_STORAGE_BUFFER, 4 + 24 * (1500000), NULL, GL_STREAM_DRAW_ARB);
}
if(!LightShadows[ls].multimapHeads) {
glGenTextures(1, &LightShadows[ls].multimapHeads);
glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, LightShadows[ls].multimapHeads);
glTexImage3D(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_R32UI, MULTIMAP_HEADS_SIZE, MULTIMAP_HEADS_SIZE, 6, 0, GL_RED_INTEGER_EXT, GL_UNSIGNED_INT, NULL);
}
// Set multimap heads to all -1, to signify end of linked lists
glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, LightShadows[ls].multimapHeads);
void *allones = malloc(MULTIMAP_HEADS_SIZE * MULTIMAP_HEADS_SIZE * sizeof(GLuint) * 6);
memset(allones, 0xFF, sizeof(GLuint) * MULTIMAP_HEADS_SIZE * MULTIMAP_HEADS_SIZE * 6);
glTexSubImage3D(GL_TEXTURE_2D_ARRAY_EXT, 0, 0, 0, 0, MULTIMAP_HEADS_SIZE, MULTIMAP_HEADS_SIZE, 6, GL_RED_INTEGER_EXT, GL_UNSIGNED_INT, allones);
free(allones);
// Set next free index to 0
glBindBufferARB(GL_SHADER_STORAGE_BUFFER, LightShadows[ls].multimapEls);
glBufferSubDataARB(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint), &(GLuint) {0});
}
pass_irregular(1, mainproj, maincam, projs[lv], cams[lv], ls, layer);
}
cachedMainViewResolution = mainViewResolution;
glMemoryBarrier(GL_ALL_BARRIER_BITS);
glViewport(0, 0, MULTIMAP_HEADS_SIZE, MULTIMAP_HEADS_SIZE);
if(GLAD_GL_INTEL_conservative_rasterization) {
glEnable(GL_CONSERVATIVE_RASTERIZATION_INTEL);
} else if(GLAD_GL_NV_conservative_raster) {
glEnable(GL_CONSERVATIVE_RASTERIZATION_NV);
}
ls = -1, layer = 0;
for(int lv = 0; lv < count; lv++) {
if(isCube[lv]) {
layer++;
} else {
layer = 0;
ls++;
}
pass_irregular(2, mainproj, maincam, projs[lv], cams[lv], ls, layer);
}
_mm_free(projs);
_mm_free(cams);
_mm_free(isCube);
if(GLAD_GL_INTEL_conservative_rasterization) {
glDisable(GL_CONSERVATIVE_RASTERIZATION_INTEL);
} else if(GLAD_GL_NV_conservative_raster) {
glDisable(GL_CONSERVATIVE_RASTERIZATION_NV);
}
glViewport(0, 0, mainViewWidth, mainViewHeight);
glMemoryBarrier(GL_ALL_BARRIER_BITS);
}
#else
void k3PassIrregular(struct k3Offscreen *mainview, mat4 mainproj, mat4 maincam) {
k3Log(k3_WARN, "k3_IRREGULAR_SHADOWS is off");
}
#endif
// Constructs shadowmap atlas, saves `offscr` for own use
void k3PassShadowmap(mat4 projection, mat4 cam, struct k3Offscreen *offscr, float cellSizeLimit) {
struct k3Timer timer = k3StartTimer("k3PassShadowmap");
glm_mat4_copy(projection, ProjMat);
glm_mat4_copy(cam, CamMat);
struct LightView *views = alloca(sizeof(*views) * LightCount);
size_t totalTilesUsed = compute_light_views(views);
ShadowAtlas = offscr;
LightShadowIrregularMode = false;
LightShadowsCount = LightCount;
if(LightShadowsCount > LightShadowsCapacity) {
_mm_free(LightShadows);
LightShadows = _mm_malloc(sizeof(*LightShadows) * LightShadowsCount, 16);
memset(LightShadows, 0, sizeof(*LightShadows) * LightShadowsCount);
LightShadowsCapacity = LightShadowsCount;
}
if(totalTilesUsed == 0) {
return;
}
if(k3TexSzX(offscr->depth) != k3TexSzY(offscr->depth)) {
k3Log(k3_ERR, "Square texture expected for shadow map atlas");
return;
}
int cellsPerDimension = 0;
if(totalTilesUsed == 1) {
cellsPerDimension = 1;
} else {
int cellsTotalSqrt = 1;
while((cellsTotalSqrt * cellsTotalSqrt) < totalTilesUsed) {
cellsTotalSqrt++;
}
cellsPerDimension = cellsTotalSqrt;
}
uint16_t sz = k3TexSzX(offscr->depth);
float cellSz = (float) sz / cellsPerDimension;
if(cellSizeLimit > 0) {
cellSz = fminf(cellSizeLimit, cellSz);
}
k3BeginOffscreen(offscr);
size_t i = 0;
for(size_t li = 0; li < LightCount; li++) {
struct LightView *lv = &views[li];
for(int camIdx = 0; camIdx < lv->cams; camIdx++) {
int cellX = i % cellsPerDimension;
int cellY = i / cellsPerDimension;
int vp[] = {
(int) roundf(cellX * cellSz),
(int) roundf(cellY * cellSz),
(int) cellSz,
(int) cellSz
};
mat4 view = GLM_MAT4_IDENTITY_INIT;
glm_mat4_inv(lv->c[camIdx], view);
mat4 proj = GLM_MAT4_IDENTITY_INIT;
glm_mat4_copy(lv->projs == 1 ? lv->p[0] : lv->p[camIdx], proj);
LightShadows[li].vpCount = lv->projs;
if(lv->projs > 1 || camIdx == 0) {
glm_mat4_mul(proj, view, LightShadows[li].vp[camIdx]);
}
if(camIdx == 0) {
glm_vec4_copy((vec4) {i, (float) cellSz / sz, 0, 0}, LightShadows[li].atlasSegment);
}
glViewport(vp[0], vp[1], vp[2], vp[3]);
k3PassDepthOnly(proj, lv->c[camIdx], i == 0, false);
i++;
}
}
k3EndOffscreen(offscr);
k3EndTimer(timer);
}
void k3BatchClear() {
renderQueueSize = 0;
}
int k3CubemapTraditional(struct k3Tex *tex, mat4 proj, mat4 cam) {
if(!tex->cubemap) {
k3Log(k3_ERR, "k3CubemapTraditional accepts only cubemaps");
return 0;
}
glDepthMask(GL_FALSE);
glActiveTexture(GL_TEXTURE0);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
if(!k3IsCore) {
mat4 mv;
glm_mat4_inv_fast(cam, mv);
mv[3][0] = mv[3][1] = mv[3][2] = 0;
glMatrixMode(GL_PROJECTION);
glLoadMatrixf((float*) proj);
glMatrixMode(GL_MODELVIEW);
glLoadMatrixf((float*) mv);
if(!GLAD_GL_ARB_shading_language_100) {
glDisable(GL_LIGHTING);
}
glDisable(GL_CULL_FACE);
if(GLAD_GL_ARB_shading_language_100) {
glUseProgramObjectARB(0);
}
if(GLAD_GL_ARB_vertex_program) {
glDisable(GL_VERTEX_PROGRAM_ARB);
}
if(GLAD_GL_ARB_fragment_program) {
glDisable(GL_FRAGMENT_PROGRAM_ARB);
}
glEnable(GL_TEXTURE_CUBE_MAP);
//glEnable(0x884F);
glBegin(GL_QUADS);
glColor3f(1, 1, 1);
for(int z = -1; z <= 1; z += 2) {
glTexCoord3f(-1, -1, z);
glVertex3f(-1, -1, z);
glTexCoord3f(+1, -1, z);
glVertex3f(+1, -1, z);
glTexCoord3f(+1, +1, z);
glVertex3f(+1, +1, z);
glTexCoord3f(-1, +1, z);
glVertex3f(-1, +1, z);
}
for(int x = -1; x <= 1; x += 2) {
glTexCoord3f(x, -1, -1);
glVertex3f(x, -1, -1);
glTexCoord3f(x, +1, -1);
glVertex3f(x, +1, -1);
glTexCoord3f(x, +1, +1);
glVertex3f(x, +1, +1);
glTexCoord3f(x, -1, +1);
glVertex3f(x, -1, +1);
}
for(int y = -1; y <= 1; y += 2) {
glTexCoord3f(-1, y, -1);
glVertex3f(-1, y, -1);
glTexCoord3f(+1, y, -1);
glVertex3f(+1, y, -1);
glTexCoord3f(+1, y, +1);
glVertex3f(+1, y, +1);
glTexCoord3f(-1, y, +1);
glVertex3f(-1, y, +1);
}
glEnd();
glDisable(GL_TEXTURE_CUBE_MAP);
} else {
GLuint prog = GL_FROM_K3GLSL(basicCubemapProgram);
glUseProgram(prog);
mat4 cam0;
glm_mat4_copy(cam, cam0);
cam0[3][0] = 0;
cam0[3][1] = 0;
cam0[3][2] = 0;
mat4 pinv;
glm_mat4_inv_fast(proj, pinv);
mat4 vpinv;
glm_mat4_mul(cam0, pinv, vpinv);
glUniformMatrix4fv(glGetUniformLocation(prog, "u_vpinv"), 1, GL_FALSE, (float*) vpinv);
glDrawArrays(GL_TRIANGLES, 0, 3);
// For some reason this is necessary?!?!?!
// PLEASE SOMEONE FIGURE OUT
glUseProgram(0);
}
glDepthMask(GL_TRUE);
return 1;
}
void GlCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *message, const void *userParam) {
// Some messages end in newlines; remove.
if(message[length - 1] == '\n') {
length--;
}
// A history is employed to prevent spammings in the debug callback.
#define HISTORY_SIZE 8
static char *history[HISTORY_SIZE];
static int historyNext = 0;
int newish = 1;
for(int i = 0; i < HISTORY_SIZE; i++) {
if(history[i] && !strncmp(history[i], message, length)) {
newish = 0;
}
}
if(newish) {
enum k3LogLevel lvl;
switch(severity) {
case GL_DEBUG_SEVERITY_HIGH_ARB:
lvl = k3_ERR;
break;
case GL_DEBUG_SEVERITY_MEDIUM_ARB:
lvl = k3_WARN;
break;
default:
lvl = k3_INFO;
break;
}
k3Log(lvl, "%.*s", length, message);
history[historyNext] = realloc(history[historyNext], length + 1);
strncpy(history[historyNext], message, length);
history[historyNext][length] = 0;
historyNext = (historyNext + 1) % HISTORY_SIZE;
}
}
#include"compr/bc7enc.h"
static void *compress_rgba_bc7(const void *pixels, uint16_t width, uint16_t height, GLenum externalFmt, GLenum intype, size_t *compressedSize) {
static bool called = false;
if(!called) {
bc7enc_compress_block_init();
called = true;
}
assert(externalFmt == GL_RGBA);
assert(intype == GL_UNSIGNED_BYTE);
uint16_t blockWidth = (width + 3) / 4;
uint16_t blockHeight = (height + 3) / 4;
size_t blocks = blockWidth * blockHeight;
*compressedSize = blocks * 16;
uint8_t *output = calloc(blocks, 16);
#pragma omp parallel
{
bc7enc_compress_block_params p;
bc7enc_compress_block_params_init(&p);
#pragma omp for
for(size_t blk = 0; blk < blocks; blk++) {
uint8_t blockPixels[64];
uint16_t blkX = blk % blockWidth;
uint16_t blkY = blk / blockWidth;
memcpy(blockPixels + 0, pixels + 4 * (((blkY * 4) + 0) * width + (blkX * 4)), 16);
memcpy(blockPixels +16, pixels + 4 * (((blkY * 4) + 1) * width + (blkX * 4)), 16);
memcpy(blockPixels +32, pixels + 4 * (((blkY * 4) + 2) * width + (blkX * 4)), 16);
memcpy(blockPixels +48, pixels + 4 * (((blkY * 4) + 3) * width + (blkX * 4)), 16);
bc7enc_compress_block(output + blk * 16, blockPixels, &p);
}
}
return output;
}
void k3Init(bool enableTextureCompression, bool enableMipmapping) {
if(GLAD_GL_ARB_debug_output) {
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB);
glDebugMessageCallbackARB(GlCallback, NULL);
}
GLuint i;
glGetIntegerv(GL_CONTEXT_PROFILE_MASK, &i);
if(i & GL_CONTEXT_CORE_PROFILE_BIT) {
k3IsCore = 1;
} else {
k3IsCore = 0;
}
if(k3IsCore) {
GLuint lol;
glGenVertexArrays(1, &lol);
glBindVertexArray(lol);
basicBlitProgram = k3ProgramGLSL(k3ShaderGLSLV(
"#version 330\n"
"uniform vec2 u_sz;"
"const vec4 positions[3] = vec4[3] (vec4(-1, -1, 0, 1), vec4(3, -1, 0, 1), vec4(-1, 3, 0, 1));\n"
"out vec2 v_uv;\n"
"void main() {\n"
" v_uv = positions[gl_VertexID].xy * (1.0 + 1.0 / u_sz) * 0.5 + 0.5;\n"
" gl_Position = positions[gl_VertexID];\n"
"}\n",
NULL), k3ShaderGLSLF(
"#version 330\n"
"uniform sampler2D u_tex;\n"
"in vec2 v_uv;\n"
"out vec4 fragcol;\n"
"void main() {\n"
" fragcol = texture2D(u_tex, v_uv);\n"
"}\n",
NULL), NULL);
basicCubemapProgram = k3ProgramGLSL(k3ShaderGLSLV(
"#version 330\n"
"const vec4 positions[3] = vec4[3] (vec4(-1, -1, 0, 1), vec4(3, -1, 0, 1), vec4(-1, 3, 0, 1));\n"
"uniform mat4 u_vpinv;\n"
"out vec3 v_uv;\n"
"void main() {\n"
" v_uv = (u_vpinv * vec4(positions[gl_VertexID].xyz, 1)).xyz;\n"
" gl_Position = positions[gl_VertexID];\n"
"}\n"
, NULL), k3ShaderGLSLF(
"#version 330\n"
"uniform samplerCube u_tex;\n"
"in vec3 v_uv;\n"
"out vec4 fragcolor;\n"
"void main() {\n"
" fragcolor = texture(u_tex, v_uv);\n"
"}\n"
, NULL), NULL);
}
if(enableTextureCompression) {
/*if(GLAD_GL_KHR_texture_compression_astc_ldr) {
TextureCompressionEnabled = true;
} else */ if(GLAD_GL_ARB_texture_compression_bptc || GLAD_GL_VERSION_4_2) {
TextureCompressionEnabled = true;
TextureCompressionSRGBA = GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM;
TextureCompressionRGBA = GL_COMPRESSED_RGBA_BPTC_UNORM;
TextureOfflineCompressor = compress_rgba_bc7;
} else if(GL_EXT_texture_compression_s3tc) {
TextureCompressionEnabled = true;
TextureCompressionSRGBA = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT;
TextureCompressionRGBA = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
}
} else {
TextureCompressionEnabled = false;
}
MipmappingEnabled = enableMipmapping;
}
static size_t preprocess(char *src, const char*(*ldr)(const char *fn), const char ***strs, GLuint **sizes) {
size_t len = 1;
*strs = calloc(len, sizeof(**strs));
*sizes = calloc(len, sizeof(**sizes));
(*strs)[0] = src;
(*sizes)[0] = strlen(src);
for(size_t i = 0; i < len; i++) {
char *include = strstr((*strs)[i], "#include ");
if(include && (include == (*strs)[i] || include[-1] == '\n')) {
char *end = strchr(include, '\n');
*end = 0;
if(!ldr) {
k3Log(k3_ERR, "Cannot include %s without a loader.", include + 9);
} else {
len += 2;
*strs = realloc(*strs, sizeof(**strs) * (len));
*sizes = realloc(*sizes, sizeof(**sizes) * (len));
memmove(*strs + i + 3, *strs + i + 1, sizeof(**strs) * (len - i - 3));
memmove(*sizes + i + 3, *sizes + i + 1, sizeof(**sizes) * (len - i - 3));
(*sizes)[i] = include - (*strs)[i];
(*strs)[i + 1] = ldr(include + 9);
(*sizes)[i + 1] = strlen((*strs)[i + 1]);
(*strs)[i + 2] = end + 1;
(*sizes)[i + 2] = strlen((*strs)[i + 2]);
}
}
}
return len;
}
struct k3GLSLV *k3ShaderGLSLV(const char *src_, const char*(*ldr)(const char *fn)) {
GLhandleARB p;
char *src = strdup(src_);
const char **strs;
GLuint *sizes;
size_t listsz = preprocess(src, ldr, &strs, &sizes);
if(!k3IsCore) {
p = glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB);
glShaderSourceARB(p, listsz, strs, sizes);
glCompileShaderARB(p);
int i;
glGetObjectParameterivARB(p, GL_OBJECT_COMPILE_STATUS_ARB, &i);
if(i == GL_FALSE) {
char buf[256];
GLsizei len;
glGetInfoLogARB(p, sizeof(buf), &len, buf);
k3Log(k3_ERR, "Failed to compile GLSL vertex shader: %.*s", len, buf);
}
} else {
p = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(p, listsz, strs, sizes);
glCompileShader(p);
int i;
glGetShaderiv(p, GL_COMPILE_STATUS, &i);
if(i == GL_FALSE) {
char buf[256];
GLsizei len;
glGetShaderInfoLog(p, sizeof(buf), &len, buf);
k3Log(k3_ERR, "Failed to compile GLSL vertex shader: %.*s", len, buf);
}
}
free(strs);
free(sizes);
free(src);
return (struct k3GLSLV*) (uintptr_t) p;
}
struct k3GLSLF *k3ShaderGLSLF(const char *src_, const char*(*ldr)(const char *fn)) {
GLhandleARB p;
char *src = strdup(src_);
const char **strs;
GLuint *sizes;
size_t listsz = preprocess(src, ldr, &strs, &sizes);
if(!k3IsCore) {
p = glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB);
glShaderSourceARB(p, listsz, strs, sizes);
glCompileShaderARB(p);
int i;
glGetObjectParameterivARB(p, GL_OBJECT_COMPILE_STATUS_ARB, &i);
if(i == GL_FALSE) {
char buf[256];
GLsizei len;
glGetInfoLogARB(p, sizeof(buf), &len, buf);
k3Log(k3_ERR, "Failed to compile GLSL fragment shader: %.*s", len, buf);
}
} else {
p = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(p, listsz, strs, sizes);
glCompileShader(p);
int i;
glGetShaderiv(p, GL_COMPILE_STATUS, &i);
if(i == GL_FALSE) {
char buf[256];
GLsizei len;
glGetShaderInfoLog(p, sizeof(buf), &len, buf);
k3Log(k3_ERR, "Failed to compile GLSL fragment shader: %.*s", len, buf);
}
}
free(strs);
free(sizes);
free(src);
return (struct k3GLSLF*) (uintptr_t) p;
}
struct k3GLSLG *k3ShaderGLSLG(const char *src_, const char*(*ldr)(const char *fn)) {
GLhandleARB p = glCreateShaderObjectARB(GL_GEOMETRY_SHADER_EXT);
char *src = strdup(src_);
const char **strs;
GLuint *sizes;
size_t listsz = preprocess(src, ldr, &strs, &sizes);
glShaderSourceARB(p, listsz, strs, sizes);
glCompileShaderARB(p);
int i;
glGetObjectParameterivARB(p, GL_OBJECT_COMPILE_STATUS_ARB, &i);
if(i == GL_FALSE) {
char buf[256];
GLsizei len;
glGetInfoLogARB(p, sizeof(buf), &len, buf);
k3Log(k3_ERR, "Failed to compile GLSL geometry shader: %.*s", len, buf);
}
free(strs);
free(sizes);
free(src);
return (struct k3GLSLG*) (uintptr_t) p;
}
struct k3GLSLP *k3ProgramGLSL(struct k3GLSLV *vs, struct k3GLSLF *fs, struct k3GLSLG *gs) {
GLhandleARB prog;
struct k3GLSLP *ret = calloc(1, sizeof(*ret));
if(!k3IsCore) {
prog = glCreateProgramObjectARB();
glAttachObjectARB(prog, (uintptr_t) vs);
glAttachObjectARB(prog, (uintptr_t) fs);
if(gs) glAttachObjectARB(prog, (uintptr_t) gs);
glLinkProgramARB(prog);
int i;
glGetObjectParameterivARB(prog, GL_OBJECT_LINK_STATUS_ARB, &i);
if(i == GL_FALSE) {
char buf[256];
GLsizei len;
glGetInfoLogARB(prog, sizeof(buf), &len, buf);
k3Log(k3_ERR, "Failed to link GLSL program: %.*s", len, buf);
}
GLint maxLength;
glGetObjectParameterivARB(prog, GL_OBJECT_ACTIVE_UNIFORM_MAX_LENGTH_ARB, &maxLength);
char *name = alloca(maxLength + 1);
memset(name, 0, maxLength + 1);
GLint uniformCount;
glGetObjectParameterivARB(prog, GL_OBJECT_ACTIVE_UNIFORMS_ARB, &uniformCount);
ret->handle = prog;
ret->ucount = uniformCount;
ret->uloc = calloc(uniformCount, sizeof(*ret->uloc));
ret->uname = calloc(uniformCount, sizeof(*ret->uname));
for(i = 0; i < uniformCount; i++) {
int size;
int type;
glGetActiveUniformARB(prog, i, maxLength, NULL, &size, &type, name);
if(strchr(name, '[')) {
*strchr(name, '[') = '\0';
}
uint64_t idx = komihash(name, strlen(name), 0);
for(size_t iter = 0; iter < uniformCount; iter++, idx++) {
idx = idx % uniformCount;
if(ret->uname[idx] == NULL) {
ret->uname[idx] = strdup(name);
ret->uloc[idx] = glGetUniformLocationARB(prog, name);
break;
}
}
k3Log(k3_DEBUG, "%i %s @ %i", size, name, ret->uloc[idx]);
}
} else {
prog = glCreateProgram();
glAttachShader(prog, (uintptr_t) vs);
glAttachShader(prog, (uintptr_t) fs);
if(gs) glAttachShader(prog, (uintptr_t) gs);
glLinkProgram(prog);
int i;
glGetProgramiv(prog, GL_LINK_STATUS, &i);
if(i == GL_FALSE) {
char buf[256];
GLsizei len;
glGetProgramInfoLog(prog, sizeof(buf), &len, buf);
k3Log(k3_ERR, "Failed to link GLSL program: %.*s", len, buf);
}
GLint maxLength;
glGetProgramiv(prog, GL_ACTIVE_UNIFORM_MAX_LENGTH, &maxLength);
char *name = alloca(maxLength + 1);
memset(name, 0, maxLength + 1);
GLint uniformCount;
glGetProgramiv(prog, GL_ACTIVE_UNIFORMS, &uniformCount);
ret->handle = prog;
ret->ucount = uniformCount;
ret->uloc = calloc(uniformCount, sizeof(*ret->uloc));
ret->uname = calloc(uniformCount, sizeof(*ret->uname));
for(i = 0; i < uniformCount; i++) {
int size;
int type;
glGetActiveUniform(prog, i, maxLength, NULL, &size, &type, name);
if(strchr(name, '[')) {
*strchr(name, '[') = '\0';
}
uint64_t idx = komihash(name, strlen(name), 0);
for(size_t iter = 0; iter < uniformCount; iter++, idx++) {
idx = idx % uniformCount;
if(ret->uname[idx] == NULL) {
ret->uname[idx] = strdup(name);
ret->uloc[idx] = glGetUniformLocation(prog, name);
break;
}
}
k3Log(k3_DEBUG, "%i %s @ %i", size, name, ret->uloc[idx]);
}
}
return ret;
}
int16_t k3ProgramGetUId(struct k3GLSLP *p, const char *key) {
uint64_t idx = komihash(key, strlen(key), 0);
for(size_t i = 0; i < p->ucount; i++, idx++) {
idx = idx % p->ucount;
if(!strcmp(p->uname[idx], key)) {
return p->uloc[idx];
}
}
return -1;
}
struct k3ARBVP *k3ProgramARBVP(const char *src) {
if(!GLAD_GL_ARB_vertex_program) {
return NULL;
}
GLuint p;
glGenProgramsARB(1, &p);
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, p);
glGetError();
glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(src), src);
if(glGetError() == GL_INVALID_OPERATION) {
puts(glGetString(GL_PROGRAM_ERROR_STRING_ARB));
return NULL;
}
/*printf("VP\n");
GLint i;
glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_INSTRUCTIONS_ARB, &i);
printf("\tTotal instructions: %i\n", i);
glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_ALU_INSTRUCTIONS_ARB, &i);
printf("\tTotal ALU instructions: %i\n", i);
glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_TEX_INSTRUCTIONS_ARB, &i);
printf("\tTotal TEX instructions: %i\n", i);
glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_TEX_INDIRECTIONS_ARB, &i);
printf("\tTotal TEX indirections: %i\n", i);
glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_TEMPORARIES_ARB, &i);
printf("\tTotal temporaries: %i\n", i);
glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_PARAMETERS_ARB, &i);
printf("\tTotal parameters: %i\n", i);
glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_ATTRIBS_ARB, &i);
printf("\tTotal attribs: %i\n", i);
glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_ADDRESS_REGISTERS_ARB, &i);
printf("\tTotal addresses: %i\n", i);*/
return (struct k3ARBVP*) (uintptr_t) p;
}
struct k3ARBFP *k3ProgramARBFP(const char *src) {
if(!GLAD_GL_ARB_fragment_program) {
return NULL;
}
GLuint p;
glGenProgramsARB(1, &p);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, p);
glGetError();
glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(src), src);
if(glGetError() == GL_INVALID_OPERATION) {
puts(glGetString(GL_PROGRAM_ERROR_STRING_ARB));
return NULL;
}
/*printf("FP\n");
GLint i;
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_INSTRUCTIONS_ARB, &i);
printf("\tTotal instructions: %i\n", i);
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_ALU_INSTRUCTIONS_ARB, &i);
printf("\tTotal ALU instructions: %i\n", i);
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_TEX_INSTRUCTIONS_ARB, &i);
printf("\tTotal TEX instructions: %i\n", i);
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_TEX_INDIRECTIONS_ARB, &i);
printf("\tTotal TEX indirections: %i\n", i);
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_TEMPORARIES_ARB, &i);
printf("\tTotal temporaries: %i\n", i);
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_PARAMETERS_ARB, &i);
printf("\tTotal parameters: %i\n", i);
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_ATTRIBS_ARB, &i);
printf("\tTotal attribs: %i\n", i);*/
return (struct k3ARBFP*) (uintptr_t) p;
}
struct k3Offscreen *k3OffscreenCreateMultisampled(struct k3Tex *diffuse, struct k3Tex *depth, uint8_t samples) {
k3Log(k3_INFO, "Init %sFBO", !diffuse && depth ? "depth-only " : "");
if(samples && (!GLAD_GL_EXT_framebuffer_multisample || !GLAD_GL_EXT_framebuffer_blit)) {
samples = 0;
k3Log(k3_WARN, "Multisampled offscreens not supported.");
}
if(!GLAD_GL_EXT_framebuffer_object) {
k3Log(k3_ERR, "Non-FBO offscreens not implemented");
return NULL;
}
GLuint fbo = 0;
glGenFramebuffersEXT(1, &fbo);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fbo);
struct k3Offscreen *ret = calloc(1, sizeof(*ret));
ret->fbo = fbo;
ret->diffuse = diffuse;
ret->depth = depth;
ret->multisampling.samples = samples;
if(diffuse) {
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, GL_FROM_K3TEX(diffuse), 0);
} else {
glDrawBuffer(GL_NONE);
glReadBuffer(GL_NONE);
}
if(depth) {
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_2D, GL_FROM_K3TEX(depth), 0);
}
if(glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT) {
k3Log(k3_WARN, "Framebuffer incomplete");
}
if(samples) {
GLuint msfbo;
glGenFramebuffersEXT(1, &msfbo);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, msfbo);
ret->multisampling.fbo = msfbo;
if(diffuse) {
glGenRenderbuffersEXT(1, &ret->multisampling.rboDiffuse);
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, ret->multisampling.rboDiffuse);
glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, samples, diffuse->glInternalFormat, diffuse->szX, diffuse->szY);
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT, ret->multisampling.rboDiffuse);
}
if(depth) {
glGenRenderbuffersEXT(1, &ret->multisampling.rboDepth);
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, ret->multisampling.rboDepth);
glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, samples, depth->glInternalFormat, depth->szX, depth->szY);
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, ret->multisampling.rboDepth);
}
}
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
glDrawBuffer(GL_BACK); // XXX: this should not be necessary
return ret;
}
struct k3Offscreen *k3OffscreenCreate(struct k3Tex *diffuse, struct k3Tex *depth) {
return k3OffscreenCreateMultisampled(diffuse, depth, 0);
}
void k3BeginOffscreen(struct k3Offscreen *offscr) {
if(GLAD_GL_EXT_framebuffer_object) {
GLuint fbo = offscr->multisampling.samples > 0 ? offscr->multisampling.fbo : offscr->fbo;
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fbo);
struct k3Tex *t = offscr->diffuse ? offscr->diffuse : offscr->depth;
glViewport(0, 0, k3TexSzX(t), k3TexSzY(t));
} else {
k3Log(k3_ERR, "Non-FBO offscreens not implemented");
}
}
void k3EndOffscreen(struct k3Offscreen *offscr) {
if(GLAD_GL_EXT_framebuffer_object) {
if(offscr->multisampling.samples) {
glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, offscr->fbo);
glBlitFramebufferEXT(0, 0, offscr->diffuse->szX, offscr->diffuse->szY, 0, 0, offscr->diffuse->szX, offscr->diffuse->szY, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST);
}
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
glViewport(0, 0, MainWidth, MainHeight);
} else {
k3Log(k3_ERR, "Non-FBO offscreens not implemented");
}
}
void k3BlitToScreen(struct k3Offscreen *offscr, int additive) {
k3BlitToScreenEffect(offscr, additive, k3_NONE, NULL, NULL);
}
void k3BlitToScreenEffect(struct k3Offscreen *offscr, int additive, int effect, void *program, void *params) {
if(GLAD_GL_EXT_framebuffer_object) {
glActiveTexture(GL_TEXTURE0);
struct k3Tex *tex = offscr->diffuse ? offscr->diffuse : offscr->depth;
glBindTexture(GL_TEXTURE_2D, GL_FROM_K3TEX(tex));
if(additive) {
glEnable(GL_BLEND);
glBlendFunc(GL_ONE, GL_ONE);
} else {
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glDisable(GL_BLEND);
}
if(!k3IsCore) {
glDisable(GL_LIGHTING);
glDisable(GL_NORMALIZE);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glEnable(GL_TEXTURE_2D);
}
glDisable(GL_DEPTH_TEST);
if(GLAD_GL_ARB_vertex_program) {
glDisable(GL_VERTEX_PROGRAM_ARB);
}
if(GLAD_GL_ARB_fragment_program) {
if(effect != k3_ARBFRAG) {
glDisable(GL_FRAGMENT_PROGRAM_ARB);
} else {
glEnable(GL_FRAGMENT_PROGRAM_ARB);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, effect == k3_ARBFRAG ? GL_FROM_K3ARBFP(program) : 0);
}
}
if(k3IsCore) {
GLuint prog;
if(effect == k3_NONE) {
prog = GL_FROM_K3GLSL(basicBlitProgram);
} else {
assert(effect == k3_GLSL);
prog = GL_FROM_K3GLSL(program);
}
glUseProgram(prog);
GLint uSz = glGetUniformLocation(prog, "u_sz");
glUniform2f(uSz, k3TexSzX(tex), k3TexSzY(tex));
glDrawArrays(GL_TRIANGLES, 0, 3);
} else {
if(GLAD_GL_ARB_shader_objects) {
glUseProgramObjectARB(effect == k3_GLSL ? GL_FROM_K3GLSL(program) : 0);
}
float bleedW = 1.0f / k3TexSzX(tex);
float bleedH = 1.0f / k3TexSzY(tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glBegin(GL_QUADS);
glColor4f(1, 1, 1, 1);
glTexCoord2f(bleedW, bleedH);
glVertex2f(-1, -1);
glTexCoord2f(1 - bleedW, bleedH);
glVertex2f(1, -1);
glTexCoord2f(1 - bleedW, 1 - bleedH);
glVertex2f(1, 1);
glTexCoord2f(bleedW, 1 - bleedH);
glVertex2f(-1, 1);
glEnd();
}
} else {
k3Log(k3_ERR, "Non-FBO offscreens not implemented");
}
}
void k3OffscreenDestroy(struct k3Offscreen *offscr) {
}
static k3LogCallback CallbackFunc;
void k3SetLogCallback(k3LogCallback fun) {
CallbackFunc = fun;
}
void k3Log(enum k3LogLevel level, const char *format, ...) {
static char *buf = NULL;
static size_t bufLen = 0;
va_list vl;
va_start(vl, format);
va_list vlc;
va_copy(vlc, vl);
size_t len = vsnprintf(NULL, 0, format, vlc) + 1;
if(len > bufLen) {
buf = realloc(buf, len);
}
va_end(vlc);
vsnprintf(buf, len, format, vl);
va_end(vl);
CallbackFunc(level, buf, len);
}
uint16_t k3TexSzMax() {
GLint i;
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &i);
return i;
}
struct k3Timer *k3Timers;
size_t k3TimerCount;
void k3Update() {
for(size_t ti = 0; ti < k3TimerCount;) {
struct k3Timer *t = &k3Timers[ti];
GLint b1 = 0, b2 = 0;
glGetQueryObjectiv(t->qStart, GL_QUERY_RESULT_AVAILABLE, &b1);
glGetQueryObjectiv(t->qEnd, GL_QUERY_RESULT_AVAILABLE, &b2);
if(b1 && b2) {
uint64_t t1, t2;
glGetQueryObjectui64v(t->qStart, GL_QUERY_RESULT, &t1);
glGetQueryObjectui64v(t->qEnd, GL_QUERY_RESULT, &t2);
k3Log(k3_TRACE, "Routine %s took %lu us", t->name, (t2 - t1) / 1000);
glDeleteQueries(2, (GLuint*) t);
memmove(t, t + 1, sizeof(*t) * (k3TimerCount - ti - 1));
k3TimerCount--;
} else {
ti++;
}
}
}