#include"k3particles.h" #include"k3_internal.h" struct xorshift128_state { uint32_t x[4]; }; uint32_t xorshift128(struct xorshift128_state *state) { uint32_t t = state->x[3]; uint32_t s = state->x[0]; state->x[3] = state->x[2]; state->x[2] = state->x[1]; state->x[1] = s; t ^= t << 11; t ^= t >> 8; return state->x[0] = t ^ s ^ (s >> 19); } struct xorshift128_state xs = {{1, 2, 3, 4}}; static uint32_t randint(uint32_t min, uint32_t max) { uint32_t x; do { x = xorshift128(&xs); } while(0x100000000UL - 0x100000000UL % ((uint64_t) max + 1) <= x); return x / (0x100000000UL / ((uint64_t) max + 1)); } static float randfloat() { union { uint32_t u32; float f; } u = { .u32 = xorshift128(&xs) >> 9 | 0x3f800000 }; return u.f - 1.0; } void k3CPUQuadParticlesInit(struct k3CPUQuadParticles *this, struct k3Mat *mat) { uint16_t *inds = calloc(sizeof(*inds), this->capacity * 6); for(size_t i = 0; i < this->capacity; i++) { inds[i * 6 + 0] = i * 4 + 0; inds[i * 6 + 1] = i * 4 + 1; inds[i * 6 + 2] = i * 4 + 2; inds[i * 6 + 3] = i * 4 + 0; inds[i * 6 + 4] = i * 4 + 2; inds[i * 6 + 5] = i * 4 + 3; } this->mdl = k3MdlCreate(this->capacity * 4, this->capacity * 6, 0, k3_ATTRIB_EMPTY, k3_ATTRIB_EMPTY, k3_ATTRIB_EMPTY, k3_ATTRIB_EMPTY, NULL, NULL, inds, NULL, NULL); k3MdlAddMesh(this->mdl, mat, 0, this->capacity * 6); k3MdlSetDebugName(this->mdl, "k3CPUQuadParticles"); free(inds); this->positions = calloc(sizeof(*this->positions), this->capacity); this->velocities = calloc(sizeof(*this->velocities), this->capacity); this->sizes = calloc(sizeof(*this->sizes), this->capacity); this->lifetimes = calloc(sizeof(*this->lifetimes), this->capacity); } static void random_cone_vector(float coneAngle, vec3 output) { float minZ = cosf(coneAngle); float z = randfloat() * (1 - minZ) + minZ; float phi = randfloat() * 6.2831853; output[0] = sqrtf(1 - z * z) * cosf(phi); output[1] = z; output[2] = sqrtf(1 - z * z) * sinf(phi); } static void regenerate_model(struct k3CPUQuadParticles *this, vec3 cameraRight, vec3 cameraUp, vec3 cameraFront) { vec3 *vpos = _mm_malloc(sizeof(*vpos) * 4 * this->capacity, 16); uint8_t *vcols = _mm_malloc(4 * 4 * this->capacity, 16); vec2 *vuvs = _mm_malloc(sizeof(*vuvs) * 4 * this->capacity, 16); uint8_t *vnrms = _mm_malloc(3 * 4 * this->capacity, 16); vec3 halfRight, halfUp; glm_vec3_scale(cameraRight, 0.5, halfRight); glm_vec3_scale(cameraUp, 0.5, halfUp); #pragma omp parallel for for(size_t i = 0; i < this->count; i++) { glm_vec3_copy(this->positions[i], vpos[i * 4 + 0]); glm_vec3_copy(this->positions[i], vpos[i * 4 + 1]); glm_vec3_copy(this->positions[i], vpos[i * 4 + 2]); glm_vec3_copy(this->positions[i], vpos[i * 4 + 3]); glm_vec3_muladds(halfRight, -this->sizes[i], vpos[i * 4 + 0]); glm_vec3_muladds(halfUp, -this->sizes[i], vpos[i * 4 + 0]); glm_vec3_muladds(halfRight, this->sizes[i], vpos[i * 4 + 1]); glm_vec3_muladds(halfUp, -this->sizes[i], vpos[i * 4 + 1]); glm_vec3_muladds(halfRight, this->sizes[i], vpos[i * 4 + 2]); glm_vec3_muladds(halfUp, this->sizes[i], vpos[i * 4 + 2]); glm_vec3_muladds(halfRight, -this->sizes[i], vpos[i * 4 + 3]); glm_vec3_muladds(halfUp, this->sizes[i], vpos[i * 4 + 3]); vec4 color; glm_vec4_lerp(this->colorEnd, this->colorStart, this->lifetimes[i] / this->particleLifetime, color); for(size_t c = 0; c < 16; c += 4) { vcols[i * 16 + c + 0] = color[0] * 255; vcols[i * 16 + c + 1] = color[1] * 255; vcols[i * 16 + c + 2] = color[2] * 255; vcols[i * 16 + c + 3] = color[3] * 255; } glm_vec2_copy((vec2) {0, 0}, vuvs[i * 4 + 0]); glm_vec2_copy((vec2) {1, 0}, vuvs[i * 4 + 1]); glm_vec2_copy((vec2) {1, 1}, vuvs[i * 4 + 2]); glm_vec2_copy((vec2) {0, 1}, vuvs[i * 4 + 3]); for(size_t c = 0; c < 12; c += 3) { vnrms[i * 12 + c + 0] = (uint8_t) (cameraFront[0] * 127); vnrms[i * 12 + c + 1] = (uint8_t) (cameraFront[1] * 127); vnrms[i * 12 + c + 2] = (uint8_t) (cameraFront[2] * 127); } } // This update the AABB k3MdlUpdatePos(this->mdl, vpos); glBindBufferARB(GL_ARRAY_BUFFER_ARB, this->mdl->vstore->gl); //glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, this->mdl->offV, sizeof(*vpos) * this->capacity * 4, vpos); glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, this->mdl->offC, sizeof(*vcols) * this->capacity * 4 * 4, vcols); glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, this->mdl->offU, sizeof(*vuvs) * this->capacity * 4, vuvs); glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, this->mdl->offN, sizeof(*vnrms) * this->capacity * 4 * 3, vnrms); this->mdl->meshes[0].idxNumber = this->count * 6; _mm_free(vpos); _mm_free(vcols); _mm_free(vuvs); _mm_free(vnrms); } static void copy_particle(struct k3CPUQuadParticles *this, size_t from, size_t to) { glm_vec3_copy(this->positions[from], this->positions[to]); glm_vec3_copy(this->velocities[from], this->velocities[to]); this->sizes[to] = this->sizes[from]; this->lifetimes[to] = this->lifetimes[from]; } void k3CPUQuadParticlesUpdate(struct k3CPUQuadParticles *this, float dt, vec3 cameraRight, vec3 cameraUp, vec3 cameraFront) { size_t numGenerated = dt * this->emissionRate; if((this->emissionLifetime -= dt) <= 0) { this->emissionEnabled = false; } if(!this->emissionEnabled) numGenerated = 0; if(this->count + numGenerated > this->capacity) { numGenerated = this->capacity - this->count; } for(size_t i = this->count, j = 0; j < numGenerated; i++, j++, this->count++) { glm_vec3_copy(this->origin, this->positions[i]); random_cone_vector(this->emissionConeAngle, this->velocities[i]); glm_vec3_scale(this->velocities[i], 2, this->velocities[i]); this->sizes[i] = 1; this->lifetimes[i] = this->particleLifetime; } vec3 accdt; glm_vec3_scale(this->gravity, dt, accdt); for(size_t i = 0; i < this->count;) { glm_vec3_add(this->velocities[i], accdt, this->velocities[i]); vec3 veldt; glm_vec3_scale(this->velocities[i], dt, veldt); glm_vec3_add(this->positions[i], veldt, this->positions[i]); this->lifetimes[i] -= dt; if(this->lifetimes[i] <= 0) { copy_particle(this, --this->count, i); } else { i++; } } regenerate_model(this, cameraRight, cameraUp, cameraFront); }