cuticle/hi/node.c

1579 lines
46 KiB
C

#include"node.h"
#include<stdlib.h>
#include"img.h"
#include<sail/sail.h>
#include<sail-manip/sail-manip.h>
#include<assert.h>
#include<string.h>
#include<tmmintrin.h>
#include<smmintrin.h>
#include<pango/pango.h>
#include<pango/pangoft2.h>
#include<freetype/ftbitmap.h>
#include"mode.h"
#include<math.h>
#include<sched.h>
#include<limits.h>
#include"minitrace.h"
#include"linearity.h"
static size_t bisect(const void *key, const void *base, size_t nmemb, size_t size, ssize_t(*compar)(const void*, const void*)) {
size_t low = 0, high = nmemb;
while(low < high) {
size_t middle = (low + high) / 2;
if(compar((const void*) ((uintptr_t) base + size * middle), key) < 0) {
low = middle + 1;
} else {
high = middle;
}
}
return low;
}
static ssize_t float_compar(const void *A, const void *B) {
float a = *(float*) A;
float b = *(float*) B;
return (a > b) - (a < b);
}
static int adjacencycmp(const void *a, const void *b) {
size_t v = (uintptr_t) ((CHiAdjacency*) a)[0] - (uintptr_t) ((CHiAdjacency*) b)[0];
return v ? v : (uintptr_t) ((CHiAdjacency*) a)[1] - (uintptr_t) ((CHiAdjacency*) b)[1];
}
static void adjacency_add(CHiPubNode *source, CHiPubNode *sink) {
CHiNodeGraph *ng = source->ng;
if(ng->adjacencyCount == ng->adjacencyCapacity) {
ng->adjacencies = realloc(ng->adjacencies, sizeof(CHiAdjacency) * (ng->adjacencyCapacity *= 2));
}
ng->adjacencies[ng->adjacencyCount][0] = source;
ng->adjacencies[ng->adjacencyCount][1] = sink;
ng->adjacencyCount++;
qsort(ng->adjacencies, ng->adjacencyCount, sizeof(CHiAdjacency), adjacencycmp);
}
static void adjacency_remove(CHiPubNode *source, CHiPubNode *sink) {
CHiNodeGraph *ng = source->ng;
CHiAdjacency *adj = bsearch(&(CHiAdjacency) {source, sink}, ng->adjacencies, ng->adjacencyCount, sizeof(CHiAdjacency), adjacencycmp);
if(adj) {
memmove(adj, adj + 1, sizeof(CHiAdjacency) * (ng->adjacencyCount - (adj - ng->adjacencies) - 1));
ng->adjacencyCount--;
}
}
CUTIVIS CHiNodeGraph *CHi_NewNodeGraph() {
static int inited = 0;
if(!inited) {
inited = 1;
mtr_init("CHiTrace.json");
}
CHiNodeGraph *ret = calloc(1, sizeof(*ret));
CHi_NodeGraphReset(ret);
return ret;
}
CUTIVIS CHiNodeGraph *CHi_NodeGraphReset(CHiNodeGraph *ng) {
for(size_t n = 0; n < ng->count; n++) {
if(ng->nodes[n]->Destroy) {
ng->nodes[n]->Destroy(ng->nodes[n]);
} else {
free(ng->nodes[n]);
}
}
if(ng->nodes) {
free(ng->nodes);
}
if(ng->adjacencies) {
free(ng->adjacencies);
}
if(ng->keyframesList.keyframes) {
free(ng->keyframesList.keyframes);
}
void *eOnStop = ng->eventOnStopComplete;
void *eOnFrame = ng->eventOnFrameComplete;
void *ud = ng->ud;
memset(ng, 0, sizeof(*ng));
ng->count = 0;
ng->nodes = malloc(sizeof(*ng->nodes) * (ng->capacity = 8));
ng->eventOnStopComplete = NULL;
ng->eventOnFrameComplete = NULL;
ng->compilationStatus = CUTIHI_COMP_READY;
ng->adjacencyCount = 0;
ng->adjacencyCapacity = 8;
ng->adjacencies = malloc(sizeof(CHiAdjacency) * ng->adjacencyCapacity);
ng->eventOnStopComplete = eOnStop;
ng->eventOnFrameComplete = eOnFrame;
ng->ud = ud;
return ng;
}
CUTIVIS CHiValue *CHi_Crawl(CHiValue *v) {
while(v->type == CUTIHI_VAL_LINKED || v->type == CUTIHI_VAL_KEYED) {
if(v->type == CUTIHI_VAL_LINKED) {
v = &v->data.linked.to->sources[v->data.linked.idx];
} else if(v->type == CUTIHI_VAL_KEYED) {
v = &v->data.keyed->current;
}
}
return v;
}
CUTIVIS void CHi_RegisterNode(CHiNodeGraph* ng, CHiPubNode* n) {
if(ng->count == ng->capacity) {
ng->nodes = realloc(ng->nodes, sizeof(*ng->nodes) * (ng->capacity = ng->capacity * 3 / 2));
}
ng->nodes[ng->count++] = n;
n->ng = ng;
if(ng->compilationStatus == CUTIHI_COMP_RUNNING) {
n->Start(n);
}
}
CUTIVIS void CHi_MakeDirty(CHiNodeGraph *ng, CHiPubNode *n) {
for(int adj = 0; adj < ng->adjacencyCount; adj++) {
if(ng->adjacencies[adj][0] == n) {
//n->clean = 0;
}
}
}
static int dfs_visit(size_t *resultCount, CHiPubNode ***result, CHiPubNode *n) {
if(n->_dfsmark == 2) return 1;
else if(n->_dfsmark == 1) return 0;
n->_dfsmark = 1;
for(size_t s = 0; s < n->sinkCount; s++) {
if(n->sinks[s].type == CUTIHI_VAL_LINKED) {
if(!dfs_visit(resultCount, result, n->sinks[s].data.linked.to)) {
return 0;
}
}
}
n->_dfsmark++;
(*result)[(*resultCount)++] = n;
return 1;
}
static int topological_sort(CHiNodeGraph *ng) {
size_t resultCount = 0;
CHiPubNode **result = malloc(sizeof(*result) * ng->capacity);
for(size_t i = 0; i < ng->count; i++) {
ng->nodes[i]->_dfsmark = 0;
}
for(size_t i = 0; i < ng->count; i++) {
if(!dfs_visit(&resultCount, &result, ng->nodes[i])) {
free(result);
return 0;
}
}
assert(resultCount == ng->count);
free(ng->nodes);
ng->nodes = result;
return 1;
}
CUTIVIS int CHi_ConfigureSink(CHiPubNode *n, size_t i, CHiValue v) {
if(n->sinkCount <= i) {
n->sinks = realloc(n->sinks, sizeof(*n->sinks) * (i + 1));
memset(&n->sinks[i], 0, sizeof(*n->sinks));
}
if(n->sinks[i].type == CUTIHI_VAL_KEYED) {
n->sinks[i].data.keyed->current = v;
return 1;
}
if(v.type == CUTIHI_VAL_LINKED && n == v.data.linked.to) return 0;
CHiValue old = n->sinks[i];
if(old.type == CUTIHI_VAL_LINKED) {
adjacency_remove(old.data.linked.to, n);
}
// Check if viable
n->sinks[i] = v;
if(n->ng && !topological_sort(n->ng)) {
n->sinks[i] = old;
if(old.type == CUTIHI_VAL_LINKED) {
adjacency_add(old.data.linked.to, n);
}
return 0;
}
if(v.type == CUTIHI_VAL_LINKED) {
adjacency_add(v.data.linked.to, n);
}
CHi_MakeDirty(n->ng, n);
return 1;
}
CUTIVIS void CHi_MakeKeyframe(CHiNodeGraph *ng, CHiPubNode *n, size_t i) {
if(n->sinks[i].type != CUTIHI_VAL_KEYED) {
CHiKeyframes *kfs = calloc(1, sizeof(*kfs));
kfs->type = n->sinks[i].type;
kfs->count = 1;
kfs->times = malloc(sizeof(*kfs->times));
*kfs->times = ng->time;
kfs->values = malloc(sizeof(*kfs->values));
memcpy(kfs->values, &n->sinks[i].data, sizeof(CHiValueRaw));
memcpy(&kfs->current, &n->sinks[i], sizeof(CHiValueRaw));
kfs->node = n;
n->sinks[i].type = CUTIHI_VAL_KEYED;
n->sinks[i].data.keyed = kfs;
ng->keyframesList.keyframes = realloc(ng->keyframesList.keyframes, sizeof(*ng->keyframesList.keyframes) * (++ng->keyframesList.count));
ng->keyframesList.keyframes[ng->keyframesList.count - 1] = kfs;
} else {
CHiKeyframes *kfs = n->sinks[i].data.keyed;
float now = ng->time;
size_t idx = bisect(&now, kfs->times, kfs->count, sizeof(now), float_compar);
if(idx < kfs->count && kfs->times[idx] == now) {
kfs->values[idx] = kfs->current.data;
} else {
kfs->count++;
kfs->values = realloc(kfs->values, sizeof(*kfs->values) * kfs->count);
kfs->times = realloc(kfs->times, sizeof(*kfs->times) * kfs->count);
memmove(kfs->values + idx + 1, kfs->values + idx, sizeof(*kfs->values) * (kfs->count - idx - 1));
memmove(kfs->times + idx + 1, kfs->times + idx, sizeof(*kfs->times) * (kfs->count - idx - 1));
kfs->values[idx] = kfs->current.data;
kfs->times[idx] = now;
}
}
}
CUTIVIS size_t CHi_MoveKeyframe(CHiNodeGraph *ng, CHiKeyframes *kfs, size_t idx, float to) {
CHiValueRaw val = kfs->values[idx];
while(idx < kfs->count - 1 && to > kfs->times[idx + 1]) {
memcpy(&kfs->values[idx], &kfs->values[idx + 1], sizeof(*kfs->values));
memcpy(&kfs->times[idx], &kfs->times[idx + 1], sizeof(*kfs->times));
idx++;
}
while(idx > 0 && to < kfs->times[idx - 1]) {
memcpy(&kfs->values[idx], &kfs->values[idx - 1], sizeof(*kfs->values));
memcpy(&kfs->times[idx], &kfs->times[idx - 1], sizeof(*kfs->times));
idx--;
}
kfs->times[idx] = to;
kfs->values[idx] = val;
return idx;
}
CUTIVIS size_t CHi_MoveKeyframeBy(CHiNodeGraph *ng, CHiKeyframes *kfs, size_t idx, float dt) {
return CHi_MoveKeyframe(ng, kfs, idx, kfs->times[idx] + dt);
}
CUTIVIS void CHi_DeleteKeyframe(CHiNodeGraph *ng, CHiKeyframes *kfs, size_t idx) {
memmove(&kfs->times[idx], &kfs->times[idx + 1], (kfs->count - idx - 1) * sizeof(*kfs->times));
memmove(&kfs->values[idx], &kfs->values[idx + 1], (kfs->count - idx - 1) * sizeof(*kfs->values));
kfs->count--;
}
CUTIVIS size_t CHi_GetClosestKeyframe(CHiNodeGraph *ng, CHiKeyframes *kfs, float t) {
if(kfs->count == 1) {
return 0;
}
size_t idx = bisect(&t, kfs->times, kfs->count, sizeof(*kfs->times), float_compar);
if(idx == 0) {
return idx;
}
if(idx == kfs->count) {
return kfs->count - 1;
}
if(fabs(kfs->times[idx] - t) < fabs(kfs->times[idx - 1] - t)) {
return idx;
} else {
return idx - 1;
}
}
CUTIVIS void CHi_SetExtrapolationMode(CHiNodeGraph *ng, CHiPubNode *n, size_t sinkIdx, CHiExtrapolationMode mode, float* params) {
if(n->sinks[sinkIdx].type != CUTIHI_VAL_KEYED) {
return;
}
CHiKeyframes *kfs = n->sinks[sinkIdx].data.keyed;
kfs->extrapolationMode = mode;
memcpy(kfs->extrapolationParameter, params, sizeof(kfs->extrapolationParameter));
}
CUTIVIS void CHi_SetDuration(CHiNodeGraph *ng, float d) {
ng->duration = d;
}
CUTIVIS int CHi_Hysteresis(CHiPubNode *root) {
if(root->ng->compilationStatus != CUTIHI_COMP_READY) return 0;
for(size_t s = 0; s < root->sinkCount; s++) {
if(root->sinks[s].type == CUTIHI_VAL_LINKED) {
CHi_Hysteresis(root->sinks[s].data.linked.to);
}
}
//if(!root->clean) {
root->Perform(root);
//}
return 1;
}
static bool error_changes(CHiPubNode *n) {
for(int e = 0; e < CUTIHI_MAX_ERRORS; e++) {
if(n->errors.active[e] != n->errors.activeLast[e]) {
return true;
}
}
return false;
}
static void save_errors(CHiPubNode *n) {
for(int e = 0; e < CUTIHI_MAX_ERRORS; e++) {
n->errors.activeLast[e] = n->errors.active[e];
n->errors.active[e] = false;
}
}
static void perform_step(CHiNodeGraph *ng) {
pthread_mutex_lock(&ng->mut);
for(size_t nIdx = 0; nIdx < ng->count; nIdx++) {
save_errors(ng->nodes[nIdx]);
}
for(size_t nIdx = 0; nIdx < ng->count; nIdx++) {
CHiPubNode *n = ng->nodes[nIdx];
n->Perform(n);
if(error_changes(n)) {
if(ng->eventOnError)
ng->eventOnError(ng, n);
}
}
if(ng->eventOnFrameComplete) {
ng->eventOnFrameComplete(ng);
}
pthread_mutex_unlock(&ng->mut);
}
bool timespec_less(const struct timespec l, const struct timespec r) {
if(l.tv_sec == r.tv_sec) {
return l.tv_nsec < r.tv_nsec;
} else {
return l.tv_sec < r.tv_sec;
}
}
struct timespec timespec_sub(const struct timespec l, const struct timespec r) {
struct timespec ret;
ret.tv_sec = l.tv_sec - r.tv_sec;
ret.tv_nsec = l.tv_nsec - r.tv_nsec;
if(ret.tv_nsec < 0) {
ret.tv_nsec += 1000000000L;
ret.tv_sec--;
}
return ret;
}
struct timespec timespec_addf(const struct timespec l, const float r) {
struct timespec ret;
ret.tv_sec = l.tv_sec + floorf(r);
ret.tv_nsec = l.tv_nsec + (r - floorf(r)) * 1000000000L;
if(ret.tv_nsec > 1000000000L) {
ret.tv_sec++;
ret.tv_nsec -= 1000000000L;
}
return ret;
}
struct timespec timespec_add(const struct timespec l, const struct timespec r) {
struct timespec ret;
ret.tv_sec = l.tv_sec + r.tv_sec;
ret.tv_nsec = l.tv_nsec + r.tv_nsec;
if(ret.tv_nsec > 1000000000L) {
ret.tv_nsec -= 1000000000L;
ret.tv_sec++;
}
return ret;
}
float timespecToFloat(const struct timespec t) {
return t.tv_sec + t.tv_nsec / 1000000000.f;
}
struct CompileCtx {
CHiNodeGraph *ng;
};
void *compile_thread(void *ctx_) {
struct CompileCtx *ctx = ctx_;
ctx->ng->time = ctx->ng->timedelta = 0;
puts("START");
{
ssize_t nIdx;
for(nIdx = 0; nIdx < ctx->ng->count; nIdx++) {
bool success;
if(ctx->ng->nodes[nIdx]->Start) {
success = ctx->ng->nodes[nIdx]->Start(ctx->ng->nodes[nIdx]);
} else {
success = ctx->ng->nodes[nIdx]->Perform(ctx->ng->nodes[nIdx]);
}
if(!success) {
break;
}
}
if(nIdx != ctx->ng->count) {
// Starting failed; stop all previous nodes
ctx->ng->eventOnError(ctx->ng, ctx->ng->nodes[nIdx]);
nIdx--;
for(; nIdx >= 0; nIdx--) {
if(ctx->ng->nodes[nIdx]->Stop) {
ctx->ng->nodes[nIdx]->Stop(ctx->ng->nodes[nIdx]);
}
}
goto stop;
}
}
if(CHi_GetMode() == CUTIHI_MODE_LIVE) {
struct timespec start;
clock_gettime(CLOCK_MONOTONIC, &start);
struct timespec finish = timespec_addf(start, ctx->ng->duration);
for(size_t frm = 0; ctx->ng->compilationStatus != CUTIHI_COMP_KILL_YOURSELF; frm++) {
struct timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
if(ctx->ng->duration != -1 && timespec_less(finish, now)) {
break;
}
struct timespec end = timespec_addf(now, 0.033333333333333333333333);
CHi_Time_Set(ctx->ng, timespecToFloat(timespec_sub(now, start)));
perform_step(ctx->ng);
do {
clock_gettime(CLOCK_MONOTONIC, &now);
} while(timespec_less(now, end));
}
} else {
__uint128_t diff;
for(uint64_t frm = 0; ctx->ng->compilationStatus != CUTIHI_COMP_KILL_YOURSELF && (ctx->ng->duration == -1 || frm < ctx->ng->duration * 30);) {
CHi_Time_Set(ctx->ng, frm / 30.f);
perform_step(ctx->ng);
struct timespec last;
clock_gettime(CLOCK_MONOTONIC, &last);
struct timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
diff += timespec_sub(now, last).tv_nsec;
frm++;
}
}
for(size_t nIdx = 0; nIdx < ctx->ng->count; nIdx++) {
if(ctx->ng->nodes[nIdx]->Stop) {
ctx->ng->nodes[nIdx]->Stop(ctx->ng->nodes[nIdx]);
}
}
puts("END");
if(ctx->ng->eventOnStopComplete) {
ctx->ng->eventOnStopComplete(ctx->ng);
}
stop:
ctx->ng->compilationStatus = CUTIHI_COMP_READY;
free(ctx);
mtr_flush();
return NULL;
}
CUTIVIS void CHi_BeginCompilation(CHiNodeGraph *ng) {
ng->compilationStatus = CUTIHI_COMP_RUNNING;
struct CompileCtx *ctx = calloc(sizeof(*ctx), 1);
ctx->ng = ng;
pthread_t thrd;
pthread_create(&thrd, NULL, &compile_thread, ctx);
}
CUTIVIS void CHi_StopCompilation(CHiNodeGraph *ng) {
if(ng->compilationStatus == CUTIHI_COMP_RUNNING) {
ng->compilationStatus = CUTIHI_COMP_KILL_YOURSELF;
}
}
typedef struct {
CHiPubNode pubn;
char *cachePath;
CHiImage *cacheImg;
} ImageNode;
static int image_perform(CHiPubNode *node) {
ImageNode *internal = (ImageNode*) node;
node->sources->type = CUTIHI_VAL_SAMPLE;
const char *fn = node->sinks[CUTIHI_IMAGE_IN_FILE].data.text;
if(fn && (!internal->cachePath || strcmp(internal->cachePath, fn))) {
if(node->sinks[CUTIHI_IMAGE_IN_FILE].type == CUTIHI_VAL_NONE) {
return 1;
}
if(node->sinks[CUTIHI_IMAGE_IN_FILE].type != CUTIHI_VAL_TEXT) {
node->errors.active[0] = true;
strncpy(node->errors.code[0], "invalid type", CUTIHI_ERR_SIZE);
node->errors.sink[0] = CUTIHI_IMAGE_IN_FILE;
return 1;
}
if(internal->cacheImg) {
CHi_Image_Free(internal->cacheImg);
internal->cacheImg = NULL;
}
}
if(!internal->cacheImg) {
struct sail_image *simg;
if(sail_load_from_file(fn, &simg) != SAIL_OK) {
node->errors.active[0] = true;
strncpy(node->errors.code[0], "invalid file", CUTIHI_ERR_SIZE);
node->errors.sink[0] = CUTIHI_IMAGE_IN_FILE;
return 1;
}
struct sail_image *cimg;
sail_convert_image(simg, SAIL_PIXEL_FORMAT_BPP64_BGRA, &cimg);
sail_destroy_image(simg);
simg = NULL;
CHiImage *img = CHi_Image_New(2, 4, (cimg->bytes_per_line + 15) & ~15, cimg->width, cimg->height, NULL);
CHi_Restride(cimg->pixels, img->data16, cimg->bytes_per_line, img->stride, img->height);
internal->cacheImg = img;
for(size_t y = 0; y < img->height; y++) {
for(size_t x = 0; x < img->stride; x += 16) {
__m128i pixels = _mm_load_si128((__m128i*) ((uintptr_t) img->data16 + y * img->stride + x));
pixels = apply_gamma_epi16(pixels, _mm_set_ps(1.0f, 2.2f, 2.2f, 2.2f));
_mm_stream_si128((__m128i*) ((uintptr_t) img->data16 + y * img->stride + x), pixels);
}
}
sail_destroy_image(cimg);
}
if(CHi_Node_Active(node)) {
node->sources->data.sample = internal->cacheImg;
} else {
node->sources->data.sample = NULL;
}
return 1;
}
CUTIVIS CHiPubNode *CHi_Image() {
CHiPubNode *n = calloc(1, sizeof(ImageNode));
n->type = CUTIHI_T('CIma','ge ');
n->Start = n->Stop = NULL;
n->Perform = image_perform;
n->sinkCount = 1;
n->sinks = calloc(sizeof(*n->sinks), 1);
n->sourceCount = 1;
n->sources = calloc(sizeof(*n->sources), 1);
((ImageNode*) n)->cachePath = strdup("");
return n;
}
static int embed_perform(CHiPubNode *node) {
MTR_BEGIN("CHi", "embed_perform");
node->sources[0].type = CUTIHI_VAL_SAMPLE;
CHiImage *main = CHi_Crawl(&node->sinks[0])->data.sample;
if(node->sources->data.sample) CHi_Image_Free(node->sources->data.sample);
CHiImage *dest = node->sources->data.sample = CHi_Image_New(2, 4, main->stride, main->width, main->height, NULL);
memcpy(dest->data16, main->data16, main->stride * main->height);
for(int sid = 0; sid < (node->sinkCount - 1) / 3; sid++) {
CHiImage *sub = CHi_Crawl(&node->sinks[1 + sid * 3])->data.sample;
if(!sub) continue;
int sy = 0;
int dy = (int16_t) CHi_Crawl(&node->sinks[2 + sid * 3])->data.vec4[1];
if(dy < 0) {
sy = -dy;
dy = 0;
}
for(; sy < sub->height && dy < dest->height; sy++, dy++) {
int sx = 0;
int dx = (int16_t) CHi_Crawl(&node->sinks[2 + sid * 3])->data.vec4[0];
if(dx < 0) {
sx = -dx;
dx = 0;
}
for(; sx < sub->width && dx < dest->width; sx += 2, dx += 2) {
__m128i bottom = _mm_loadu_si128((__m128i*) ((uintptr_t) dest->data16 + dy * dest->stride + dx * 8));
__m128i top = _mm_loadu_si128((__m128i*) ((uintptr_t) sub->data16 + sy * sub->stride + sx * 8));
__m128i alpha = _mm_shuffle_epi8(top, _mm_set_epi8(15, 14, 15, 14, 15, 14, 15, 14, 7, 6, 7, 6, 7, 6, 7, 6));
__m128i invAlpha = _mm_sub_epi16(_mm_set1_epi16(0xFFFF), alpha);
__m128i result = _mm_add_epi16(_mm_mulhi_epu16(top, alpha), _mm_mulhi_epu16(bottom, invAlpha));
_mm_storeu_si128((__m128i*) ((uintptr_t) dest->data16 + dy * dest->stride + dx * 8), result);
}
}
}
MTR_END("CHi", "embed_perform");
return 1;
}
CUTIVIS CHiPubNode *CHi_Embed() {
CHiPubNode *n = calloc(1, sizeof(*n));
n->type = CUTIHI_T('CEmb','ed ');
n->Start = n->Stop = NULL;
n->Perform = embed_perform;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 1 + 3 * CUTIHI_EMBED_MAX_SMALLS);
n->sources = calloc(sizeof(*n->sources), n->sourceCount = 1);
return n;
}
static int constantsample_perform(CHiPubNode *node) {
node->sources[0].type = CUTIHI_VAL_SAMPLE;
if(node->sources->data.sample) CHi_Image_Free(node->sources->data.sample);
CHiValue *sink = CHi_Crawl(&node->sinks[0]);
CHiValue *sz = CHi_Crawl(&node->sinks[1]);
size_t w = sz->data.vec4[0] < 1 ? 1 : sz->data.vec4[0];
size_t h = sz->data.vec4[1] < 1 ? 1 : sz->data.vec4[1];
CHiImage *img = CHi_Image_New(2, 4, 8 * w, w, h, NULL);
if(CHi_Node_Active(node)) {
for(size_t i = 0; i < w * h; i++) {
img->data16[i * 4 + 0] = sink->data.vec4[2] * 65535;
img->data16[i * 4 + 1] = sink->data.vec4[1] * 65535;
img->data16[i * 4 + 2] = sink->data.vec4[0] * 65535;
img->data16[i * 4 + 3] = 65535;
}
}
node->sources->data.sample = img;
return 1;
}
CUTIVIS CHiPubNode *CHi_ConstantSample() {
CHiPubNode *n = calloc(1, sizeof(*n));
n->type = CUTIHI_T('CCns','tCol');
n->Start = n->Stop = NULL;
n->Perform = constantsample_perform;
n->sinkCount = 2;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount);
n->sourceCount = 1;
n->sources = calloc(sizeof(*n->sources), n->sourceCount);
n->sinks[0].type = CUTIHI_VAL_VEC4;
n->sinks[0].data.vec4[0] = 1280;
n->sinks[0].data.vec4[1] = 720;
return n;
}
static __m128i _mm_mullo_epi32(__m128i a, __m128i b) {
// Plagiarized from a plagiarization of Agner Fog's code
__m128i a13 = _mm_shuffle_epi32(a, 0xF5); // (-,a3,-,a1)
__m128i b13 = _mm_shuffle_epi32(b, 0xF5); // (-,b3,-,b1)
__m128i prod02 = _mm_mul_epu32(a, b); // (-,a2*b2,-,a0*b0)
__m128i prod13 = _mm_mul_epu32(a13, b13); // (-,a3*b3,-,a1*b1)
__m128i prod01 = _mm_unpacklo_epi32(prod02,prod13); // (-,-,a1*b1,a0*b0)
__m128i prod23 = _mm_unpackhi_epi32(prod02,prod13); // (-,-,a3*b3,a2*b2)
__m128i prod = _mm_unpacklo_epi64(prod01,prod23); // (ab3,ab2,ab1,ab0)
return prod;
}
static int modulate_perform(CHiPubNode *node) {
MTR_BEGIN("CHi", "modulate_perform");
CHiValue *imgsrc = CHi_Crawl(&node->sinks[0]);
if(!imgsrc || imgsrc->type == CUTIHI_VAL_NONE) {
return 1;
}
if(imgsrc->type != CUTIHI_VAL_SAMPLE) {
node->errors.active[0] = true;
strncpy(node->errors.code[0], "invalid type", CUTIHI_ERR_SIZE);
node->errors.sink[0] = 0;
return 1;
}
node->sources[0].type = CUTIHI_VAL_SAMPLE;
if(node->sources->data.sample) CHi_Image_Free(node->sources->data.sample);
if(!CHi_Node_Active(node)) {
node->sources->data.sample = NULL;
return;
}
CHiImage *src = imgsrc->data.sample;
assert(src->stride % 16 == 0);
CHiImage *dst = CHi_Image_New(2, 4, src->stride, src->width, src->height, NULL);
node->sources->data.sample = dst;
float V = CHi_Crawl(&node->sinks[1])->data.vec4[0];
float S = CHi_Crawl(&node->sinks[2])->data.vec4[0];
float H = CHi_Crawl(&node->sinks[3])->data.vec4[0] * 3.1415926535897 / 180;
float sH = sinf(H);
float cH = cosf(H);
__m128i row1 = _mm_set_epi32(
0,
32768 * (+0.180472 * S * sH + 0.7874000 * S * cH + 0.2126 * V),
32768 * (-0.715274 * S * cH + 0.6069280 * S * sH + 0.7152 * V),
32768 * (-0.787400 * S * sH - 0.0721258 * S * cH + 0.0722 * V)
);
__m128i row2 = _mm_set_epi32(
0,
32768 * (-0.212585 * S * cH - 0.1472940 * S * sH + 0.2126 * V),
32768 * (-0.095334 * S * sH + 0.2847960 * S * cH + 0.7152 * V),
32768 * (-0.072211 * S * cH + 0.2426280 * S * sH + 0.0722 * V)
);
__m128i row3 = _mm_set_epi32(
0,
32768 * (-0.212652 * S * cH + 0.9278000 * S * sH + 0.2126 * V),
32768 * (-0.842814 * S * sH - 0.7151480 * S * cH + 0.7152 * V),
32768 * (-0.084987 * S * sH + 0.9278000 * S * cH + 0.0722 * V)
);
for(size_t b = 0; b < dst->stride * dst->height; b += 16) {
__m128i rgba2U16 = _mm_load_si128((__m128i*) ((uintptr_t) src->data16 + b));
__m128i rgba2S16 = _mm_srli_epi16(rgba2U16, 1);
__m128i rgbaS16Lo = _mm_unpacklo_epi16(rgba2S16, _mm_setzero_si128());
__m128i rgbaS16Hi = _mm_unpackhi_epi16(rgba2S16, _mm_setzero_si128());
rgba2S16 = _mm_setzero_si128();
do {
__m128i newR = _mm_mullo_epi32(rgbaS16Lo, row1);
__m128i newG = _mm_mullo_epi32(rgbaS16Lo, row2);
__m128i newB = _mm_mullo_epi32(rgbaS16Lo, row3);
__m128i newA = _mm_mullo_epi32(rgbaS16Lo, _mm_set_epi32(1, 0, 0, 0));
newR = _mm_srai_epi32(newR, 16);
newG = _mm_srai_epi32(newG, 16);
newB = _mm_srai_epi32(newB, 16);
newR = _mm_hadd_epi32(newR, _mm_setzero_si128());
newG = _mm_hadd_epi32(newG, _mm_setzero_si128());
newB = _mm_hadd_epi32(newB, _mm_setzero_si128());
newA = _mm_hadd_epi32(newA, _mm_setzero_si128());
newR = _mm_hadd_epi32(newR, _mm_setzero_si128());
newG = _mm_hadd_epi32(newG, _mm_setzero_si128());
newB = _mm_hadd_epi32(newB, _mm_setzero_si128());
newA = _mm_hadd_epi32(newA, _mm_setzero_si128());
newR = _mm_max_epi16(_mm_min_epi16(newR, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newG = _mm_max_epi16(_mm_min_epi16(newG, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newB = _mm_max_epi16(_mm_min_epi16(newB, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newA = _mm_max_epi16(_mm_min_epi16(newA, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newR = _mm_shuffle_epi8(newR, _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, 0, -128, -128, -128, -128));
newG = _mm_shuffle_epi8(newG, _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, 0, -128, -128));
newB = _mm_shuffle_epi8(newB, _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, 0));
newA = _mm_shuffle_epi8(newA, _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, 1, 0, -128, -128, -128, -128, -128, -128));
rgba2S16 = _mm_or_si128(rgba2S16, _mm_or_si128(_mm_or_si128(_mm_or_si128(newR, newG), newB), newA));
} while(0);
do {
__m128i newR = _mm_mullo_epi32(rgbaS16Hi, row1);
__m128i newG = _mm_mullo_epi32(rgbaS16Hi, row2);
__m128i newB = _mm_mullo_epi32(rgbaS16Hi, row3);
__m128i newA = _mm_mullo_epi32(rgbaS16Hi, _mm_set_epi32(1, 0, 0, 0));
newR = _mm_srai_epi32(newR, 16);
newG = _mm_srai_epi32(newG, 16);
newB = _mm_srai_epi32(newB, 16);
newR = _mm_hadd_epi32(newR, _mm_setzero_si128());
newG = _mm_hadd_epi32(newG, _mm_setzero_si128());
newB = _mm_hadd_epi32(newB, _mm_setzero_si128());
newA = _mm_hadd_epi32(newA, _mm_setzero_si128());
newR = _mm_hadd_epi32(newR, _mm_setzero_si128());
newG = _mm_hadd_epi32(newG, _mm_setzero_si128());
newB = _mm_hadd_epi32(newB, _mm_setzero_si128());
newA = _mm_hadd_epi32(newA, _mm_setzero_si128());
newR = _mm_max_epi16(_mm_min_epi16(newR, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newG = _mm_max_epi16(_mm_min_epi16(newG, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newB = _mm_max_epi16(_mm_min_epi16(newB, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newA = _mm_max_epi16(_mm_min_epi16(newA, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newR = _mm_shuffle_epi8(newR, _mm_set_epi8(-128, -128, 1, 0, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128));
newG = _mm_shuffle_epi8(newG, _mm_set_epi8(-128, -128, -128, -128, 1, 0, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128));
newB = _mm_shuffle_epi8(newB, _mm_set_epi8(-128, -128, -128, -128, -128, -128, 1, 0, -128, -128, -128, -128, -128, -128, -128, -128));
newA = _mm_shuffle_epi8(newA, _mm_set_epi8(1, 0, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128));
rgba2S16 = _mm_or_si128(rgba2S16, _mm_or_si128(_mm_or_si128(_mm_or_si128(newR, newG), newB), newA));
} while(0);
rgba2U16 = _mm_slli_epi16(rgba2S16, 2);
_mm_store_si128((__m128i*) ((uintptr_t) dst->data16 + b), rgba2U16);
}
MTR_END("CHi", "modulate_perform");
return 1;
}
CUTIVIS CHiPubNode *CHi_Modulate() {
CHiPubNode *n = calloc(1, sizeof(*n));
n->type = CUTIHI_T('CMod','ulat');
n->Start = n->Stop = NULL;
n->Perform = modulate_perform;
n->sinkCount = 4;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount);
n->sourceCount = 1;
n->sources = calloc(sizeof(*n->sources), n->sourceCount);
n->sinks[0].type = CUTIHI_VAL_VEC4;
n->sinks[0].data.vec4[0] = 1;
n->sinks[1].type = CUTIHI_VAL_VEC4;
n->sinks[1].data.vec4[0] = 1;
return n;
}
static void update_keyed_values(CHiNodeGraph *ng) {
for(size_t kfsIdx = 0; kfsIdx < ng->keyframesList.count; kfsIdx++) {
CHiKeyframes *kfs = ng->keyframesList.keyframes[kfsIdx];
kfs->current.type = kfs->type;
float now = ng->time;
size_t idx = bisect(&now, kfs->times, kfs->count, sizeof(now), float_compar);
if(idx == 0) {
kfs->current.data = kfs->values[idx];
if(kfs->current.type == CUTIHI_VAL_VEC4 && kfs->extrapolationMode == CUTIHI_EXTRAPOLATION_CONSTANT) {
kfs->current.data.vec4[0] += (now - kfs->times[0]) * kfs->extrapolationParameter[0];
kfs->current.data.vec4[1] += (now - kfs->times[0]) * kfs->extrapolationParameter[1];
kfs->current.data.vec4[2] += (now - kfs->times[0]) * kfs->extrapolationParameter[2];
kfs->current.data.vec4[3] += (now - kfs->times[0]) * kfs->extrapolationParameter[3];
}
} else if(idx == kfs->count) {
kfs->current.data = kfs->values[idx - 1];
if(kfs->current.type == CUTIHI_VAL_VEC4 && kfs->extrapolationMode == CUTIHI_EXTRAPOLATION_CONSTANT) {
kfs->current.data.vec4[0] += (now - kfs->times[kfs->count - 1]) * kfs->extrapolationParameter[0];
kfs->current.data.vec4[1] += (now - kfs->times[kfs->count - 1]) * kfs->extrapolationParameter[1];
kfs->current.data.vec4[2] += (now - kfs->times[kfs->count - 1]) * kfs->extrapolationParameter[2];
kfs->current.data.vec4[3] += (now - kfs->times[kfs->count - 1]) * kfs->extrapolationParameter[3];
}
} else {
if(kfs->type == CUTIHI_VAL_VEC4) {
float alpha = (now - kfs->times[idx - 1]) / (kfs->times[idx] - kfs->times[idx - 1]);
kfs->current.data.vec4[0] = kfs->values[idx - 1].vec4[0] + (kfs->values[idx].vec4[0] - kfs->values[idx - 1].vec4[0]) * alpha;
kfs->current.data.vec4[1] = kfs->values[idx - 1].vec4[1] + (kfs->values[idx].vec4[1] - kfs->values[idx - 1].vec4[1]) * alpha;
kfs->current.data.vec4[2] = kfs->values[idx - 1].vec4[2] + (kfs->values[idx].vec4[2] - kfs->values[idx - 1].vec4[2]) * alpha;
kfs->current.data.vec4[3] = kfs->values[idx - 1].vec4[3] + (kfs->values[idx].vec4[3] - kfs->values[idx - 1].vec4[3]) * alpha;
} else {
kfs->current.data = kfs->values[idx - 1];
}
}
}
}
static int time_perform(CHiPubNode *node) {
node->sources->type = CUTIHI_VAL_VEC4;
node->sources->data.vec4[0] = node->ng->time;
return 1;
}
CUTIVIS void CHi_Time_Set(CHiNodeGraph *ng, float f) {
ng->timedelta = f - ng->time;
ng->time = f;
update_keyed_values(ng);
}
CUTIVIS float CHi_Time_Get(CHiNodeGraph *ng) {
return ng->time;
}
CUTIVIS float CHi_Time_GetDelta(CHiNodeGraph *ng) {
return ng->timedelta;
}
CUTIVIS CHiPubNode *CHi_Time() {
CHiPubNode *n = calloc(1, sizeof(*n));
n->type = CUTIHI_T('CTim','e ');
n->Start = n->Stop = NULL;
n->Perform = time_perform;
n->sinkCount = 0;
n->sinks = NULL;
n->sourceCount = 1;
n->sources = calloc(sizeof(*n->sources), 1);
return n;
}
struct TextNode {
CHiPubNode pubn;
PangoFontMap *pfontmap;
PangoContext *pcontext;
PangoFontDescription * pfontdesc;
PangoLayout *playout;
char *cacheText;
char *cacheFontName;
};
static int text_perform(CHiPubNode *n) {
MTR_BEGIN("CHi", "text_perform");
struct TextNode *this = (struct TextNode*) n;
CHiValue *valFontName = CHi_Crawl(&n->sinks[3]);
CHiValue *valDPI = CHi_Crawl(&n->sinks[2]);
CHiValue *valCol = CHi_Crawl(&n->sinks[1]);
CHiValue *valText = CHi_Crawl(&n->sinks[0]);
if(!this->cacheFontName || strcmp(this->cacheFontName, valFontName->data.text)) {
if(this->cacheFontName) free(this->cacheFontName);
this->cacheFontName = strdup(valFontName->data.text);
this->pfontmap = pango_ft2_font_map_new();
pango_ft2_font_map_set_resolution(PANGO_FT2_FONT_MAP(this->pfontmap), 72, 72);
this->pcontext = pango_font_map_create_context(this->pfontmap);
pango_context_set_language(this->pcontext, pango_language_from_string("en_US"));
pango_context_set_base_dir(this->pcontext, PANGO_DIRECTION_LTR);
this->pfontdesc = pango_font_description_from_string(this->cacheFontName);
this->playout = pango_layout_new(this->pcontext);
pango_layout_set_font_description(this->playout, this->pfontdesc);
free(this->cacheText);
this->cacheText = NULL;
}
if(!this->cacheText || strcmp(this->cacheText, valText->data.text)) {
if(this->cacheText) free(this->cacheText);
this->cacheText = strdup(valText->data.text);
pango_layout_set_markup(this->playout, valText->data.text, -1);
}
pango_ft2_font_map_set_resolution(PANGO_FT2_FONT_MAP(this->pfontmap), valDPI->data.vec4[0], valDPI->data.vec4[0]);
PangoRectangle extents;
pango_layout_get_extents(this->playout, NULL, &extents);
n->sources[0].type = CUTIHI_VAL_SAMPLE;
if(n->sources->data.sample) {
CHi_Image_Free(n->sources->data.sample);
n->sources->data.sample = NULL;
}
if(!CHi_Node_Active(n)) {
return 1;
}
size_t width = (PANGO_PIXELS(extents.width) + 15) & ~15;
CHiImage *chiret = CHi_Image_New(2, 4, 8 * width, width, PANGO_PIXELS(extents.height), NULL);
n->sources->data.sample = chiret;
FT_Bitmap bmp = {};
FT_Bitmap_New(&bmp);
bmp.width = chiret->width;
bmp.rows = chiret->height;
bmp.buffer = calloc(bmp.width, bmp.rows);
bmp.pitch = chiret->width;
bmp.pixel_mode = FT_PIXEL_MODE_GRAY;
bmp.num_grays = 256;
pango_ft2_render_layout(&bmp, this->playout, PANGO_PIXELS(extents.x) + (PANGO_PIXELS(extents.width) + 15) % 16 / 4, PANGO_PIXELS(extents.y));
__m128i ones = _mm_set1_epi64x(
(((size_t) (valCol->data.vec4[2] * 255) % 256) << 0) |
(((size_t) (valCol->data.vec4[1] * 255) % 256) << 16) |
(((size_t) (valCol->data.vec4[0] * 255) % 256) << 32) |
0x0100000000000000
);
for(size_t p = 0; p < bmp.width * bmp.rows; p += 2) {
__m128i alphad0 =
_mm_mullo_epi16(ones, _mm_set_epi16(bmp.buffer[p + 1], 0xFF, 0xFF, 0xFF, bmp.buffer[p + 0], 0xFF, 0xFF, 0xFF));
_mm_stream_si128((__m128i*) &chiret->data16[p * 4], alphad0);
}
free(bmp.buffer);
MTR_END("CHi", "text_perform");
return 1;
}
CUTIVIS CHiPubNode *CHi_Text() {
CHiPubNode *n = calloc(1, sizeof(struct TextNode));
n->type = CUTIHI_T('CTex','t ');
n->Start = n->Stop = NULL;
n->Perform = text_perform;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 4);
n->sources = calloc(sizeof(*n->sources), n->sourceCount = 1);
n->sinks[0].type = CUTIHI_VAL_TEXT;
n->sinks[0].data.text = strdup("Title Text");
n->sinks[2].type = CUTIHI_VAL_VEC4;
n->sinks[2].data.vec4[0] = 256;
n->sinks[3].type = CUTIHI_VAL_TEXT;
n->sinks[3].data.text = strdup("Sans-Serif");
return n;
}
static int mixer_perform(CHiPubNode *n) {
if(n->sinkCount == 0) {
return 1;
}
n->sources[0].type = CUTIHI_VAL_SAMPLE;
MTR_BEGIN("CHi", "mixer_perform");
if(n->sources[0].data.sample) {
CHi_Image_Free(n->sources[0].data.sample);
n->sources[0].data.sample = NULL;
}
size_t width = 0, height = 0, stride = 0;
for(int s = 0; s < n->sinkCount; s++) {
CHiValue *val = CHi_Crawl(&n->sinks[s]);
if(val && val->type == CUTIHI_VAL_SAMPLE) {
if(width == 0 || height == 0) {
width = val->data.sample->width;
height = val->data.sample->height;
stride = val->data.sample->stride;
} else {
assert(val->data.sample->width == width && val->data.sample->height == height);
}
}
}
if(width == 0 || height == 0) {
return 1;
}
n->sources[0].data.sample = CHi_Image_New(2, 1, (stride + 15) & ~15, width, height, NULL);
for(size_t b = 0; b < stride; b += 16) {
__m128i sum = _mm_setzero_si128();
for(int s = 0; s < n->sinkCount; s++) {
CHiValue *val = CHi_Crawl(&n->sinks[s]);
if(val && val->type == CUTIHI_VAL_SAMPLE) {
sum = _mm_adds_epi16(sum, _mm_load_si128((__m128i*) ((uintptr_t) val->data.sample->data16 + b)));
}
}
_mm_stream_si128((__m128i*) ((uintptr_t) n->sources[0].data.sample->data16 + b), sum);
}
MTR_END("CHi", "mixer_perform");
return 1;
}
CUTIVIS CHiPubNode *CHi_Mixer() {
CHiPubNode *n = calloc(1, sizeof(*n));
n->type = CUTIHI_T('CMix','er ');
n->Start = n->Stop = NULL;
n->Perform = mixer_perform;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 2);
n->sources = calloc(sizeof(*n->sources), n->sourceCount = 1);
return n;
}
static int preview_perform(CHiPubNode *n) {
return 1;
}
CUTIVIS CHiPubNode *CHi_Preview() {
CHiPubNode *n = calloc(1, sizeof(*n));
n->type = CUTIHI_T('CPre','view');
n->Start = n->Stop = NULL;
n->Perform = preview_perform;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 1);
n->sources = NULL;
n->sourceCount = 0;
n->sinks[0].type = CUTIHI_VAL_SAMPLE;
n->sinks[0].data.sample = NULL;
return n;
}
static int chromakey_perform(CHiPubNode *n) {
CHiValue *sampleV = CHi_Crawl(&n->sinks[0]);
CHiValue *colorV = CHi_Crawl(&n->sinks[1]);
if(!sampleV || sampleV->type != CUTIHI_VAL_SAMPLE || !sampleV->data.sample) {
return 1;
}
CHiImage *src = sampleV->data.sample;
if(n->sources[0].data.sample) {
CHi_Image_Free(n->sources[0].data.sample);
}
n->sources[0].type = CUTIHI_VAL_SAMPLE;
CHiImage *dst = n->sources[0].data.sample = CHi_Image_New(2, 4, (src->width * src->bpc * src->channels + 15) & ~15, src->width, src->height, NULL);
int16_t uKey = 32767 * (colorV->data.vec4[0] * -0.1146 + colorV->data.vec4[1] * -0.3854 + colorV->data.vec4[2] * +0.5000);
int16_t vKey = 32767 * (colorV->data.vec4[0] * +0.5000 + colorV->data.vec4[1] * -0.4542 + colorV->data.vec4[2] * -0.0458);
__m128i row2 = _mm_set_epi32(0, -3755, -12628, 16384);
__m128i row3 = _mm_set_epi32(0, 16384, -14883, -1501);
float threshold0 = 300;
float threshold1 = 3000;
for(size_t y = 0; y < src->height; y++) {
for(size_t off = 0; off < dst->stride; off += 16) {
__m128i rgba2U16 = _mm_load_si128((__m128i*) ((uintptr_t) src->data16 + y * src->stride + off));
__m128i rgba2S16 = _mm_srli_epi16(rgba2U16, 1);
__m128i rgbaS16Lo = _mm_unpacklo_epi16(rgba2S16, _mm_setzero_si128());
__m128i rgbaS16Hi = _mm_unpackhi_epi16(rgba2S16, _mm_setzero_si128());
__m128i alphas = _mm_setzero_si128();
{
__m128i uProd = _mm_mullo_epi32(row2, rgbaS16Lo);
__m128i vProd = _mm_mullo_epi32(row3, rgbaS16Lo);
uProd = _mm_srai_epi32(uProd, 15);
uProd = _mm_hadd_epi32(uProd, _mm_setzero_si128());
uProd = _mm_hadd_epi32(uProd, _mm_setzero_si128());
vProd = _mm_srai_epi32(vProd, 15);
vProd = _mm_hadd_epi32(vProd, _mm_setzero_si128());
vProd = _mm_hadd_epi32(vProd, _mm_setzero_si128());
__m128 diffU = _mm_cvtepi32_ps(_mm_sub_epi32(uProd, _mm_set1_epi32(uKey)));
__m128 diffV = _mm_cvtepi32_ps(_mm_sub_epi32(vProd, _mm_set1_epi32(vKey)));
__m128 distance = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(diffU, diffU), _mm_mul_ps(diffV, diffV)));
__m128 alpha = (__m128) _mm_bslli_si128((__m128i) distance, 12);
alpha = _mm_sub_ps(alpha, _mm_set1_ps(threshold0));
alpha = _mm_max_ps(alpha, _mm_set1_ps(0));
alpha = _mm_mul_ps(alpha, _mm_set1_ps(65535 / (threshold1 - threshold0)));
alpha = _mm_min_ps(alpha, _mm_set1_ps(65535));
__m128i z = _mm_hadd_epi16(_mm_add_epi32(_mm_and_si128(_mm_cvtps_epi32(alpha), _mm_set_epi32(0xFFFFFFFF, 0, 0, 0)), _mm_set_epi32(0, 65535, 65535, 65535)), _mm_setzero_si128());
alphas = _mm_or_si128(alphas, z);
}
{
__m128i uProd = _mm_mullo_epi32(row2, rgbaS16Hi);
__m128i vProd = _mm_mullo_epi32(row3, rgbaS16Hi);
uProd = _mm_srai_epi32(uProd, 15);
uProd = _mm_hadd_epi32(uProd, _mm_setzero_si128());
uProd = _mm_hadd_epi32(uProd, _mm_setzero_si128());
vProd = _mm_srai_epi32(vProd, 15);
vProd = _mm_hadd_epi32(vProd, _mm_setzero_si128());
vProd = _mm_hadd_epi32(vProd, _mm_setzero_si128());
__m128 diffU = _mm_cvtepi32_ps(_mm_sub_epi32(uProd, _mm_set1_epi32(uKey)));
__m128 diffV = _mm_cvtepi32_ps(_mm_sub_epi32(vProd, _mm_set1_epi32(vKey)));
__m128 distance = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(diffU, diffU), _mm_mul_ps(diffV, diffV)));
__m128 alpha = (__m128) _mm_bslli_si128((__m128i) distance, 12);
alpha = _mm_sub_ps(alpha, _mm_set1_ps(threshold0));
alpha = _mm_max_ps(alpha, _mm_set1_ps(0));
alpha = _mm_mul_ps(alpha, _mm_set1_ps(65535 / (threshold1 - threshold0)));
alpha = _mm_min_ps(alpha, _mm_set1_ps(65535));
__m128i z = _mm_hadd_epi16(_mm_add_epi32(_mm_and_si128(_mm_cvtps_epi32(alpha), _mm_set_epi32(0xFFFFFFFF, 0, 0, 0)), _mm_set_epi32(0, 65535, 65535, 65535)), _mm_setzero_si128());
alphas = _mm_or_si128(alphas, _mm_bslli_si128(z, 8));
}
rgba2U16 = _mm_mulhi_epu16(rgba2U16, alphas);
_mm_stream_si128((__m128i*) ((uintptr_t) dst->data16 + y * src->stride + off), rgba2U16);
}
}
return 1;
}
CUTIVIS CHiPubNode *CHi_ChromaKey() {
CHiPubNode *n = calloc(1, sizeof(*n));
n->type = CUTIHI_T('CChr','omaK');
n->Start = n->Stop = NULL;
n->Perform = chromakey_perform;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 2);
n->sources = calloc(sizeof(*n->sources), n->sourceCount = 1);
n->sinks[1].type = CUTIHI_VAL_VEC4; // Default green
n->sinks[1].data.vec4[0] = 0;
n->sinks[1].data.vec4[1] = 1;
n->sinks[1].data.vec4[2] = 0;
n->sinks[1].data.vec4[3] = 1;
return n;
}
static void save_chival(CHiNodeGraph *ng, CHiSaveWriter writer, CHiValType type, CHiValueRaw data, void *ud) {
if(type == CUTIHI_VAL_TEXT) {
size_t len = strlen(data.text);
writer(ud, &(uint32_t) {len}, sizeof(uint32_t));
writer(ud, data.text, len);
} else if(type == CUTIHI_VAL_VEC4) {
writer(ud, data.vec4, sizeof(data.vec4));
} else if(type == CUTIHI_VAL_LINKED) {
size_t index;
for(index = 0; index < ng->count; index++) {
if(ng->nodes[index] == data.linked.to) {
break;
}
}
assert(index < ng->count);
writer(ud, &(uint64_t) {index}, sizeof(uint64_t));
writer(ud, &(uint16_t) {data.linked.idx}, sizeof(uint16_t));
} else if(type == CUTIHI_VAL_KEYED) {
size_t index;
for(index = 0; index < ng->keyframesList.count; index++) {
if(ng->keyframesList.keyframes[index] == data.keyed) {
break;
}
}
assert(index < ng->count);
writer(ud, &(uint64_t) {index}, sizeof(uint64_t));
}
}
static void load_chival(CHiNodeGraph *ng, CHiLoadReader reader, CHiValType type, CHiValueRaw *data, void *ud) {
if(type == CUTIHI_VAL_TEXT) {
uint32_t len;
reader(ud, &len, sizeof(len));
data->text = malloc(len + 1);
reader(ud, data->text, len);
data->text[len] = 0;
} else if(type == CUTIHI_VAL_VEC4) {
reader(ud, data->vec4, sizeof(data->vec4));
} else if(type == CUTIHI_VAL_LINKED) {
uint64_t index;
reader(ud, &index, sizeof(index));
data->linked.to = ng->nodes[index];
uint16_t idx;
reader(ud, &idx, sizeof(idx));
data->linked.idx = idx;
} else if(type == CUTIHI_VAL_KEYED) {
uint64_t index;
reader(ud, &index, sizeof(index));
data->keyed = ng->keyframesList.keyframes[index];
}
}
CUTIVIS int CHi_NodeGraphSave(CHiNodeGraph *ng, CHiSaveWriter writer, void *ud) {
writer(ud, "\x71\x74\xCE\xA0", 4);
writer(ud, &(float) {ng->duration}, sizeof(float));
writer(ud, &(float) {ng->time}, sizeof(float));
writer(ud, &(uint64_t) {ng->keyframesList.count}, sizeof(uint64_t));
for(size_t i = 0; i < ng->keyframesList.count; i++) {
CHiKeyframes *kfs = ng->keyframesList.keyframes[i];
writer(ud, &(uint16_t) {kfs->type}, sizeof(uint16_t));
writer(ud, &(uint64_t) {kfs->count}, sizeof(uint64_t));
writer(ud, kfs->times, sizeof(*kfs->times) * kfs->count);
for(size_t k = 0; k < kfs->count; k++) {
save_chival(ng, writer, kfs->type, kfs->values[k], ud);
}
writer(ud, &(uint16_t) {kfs->extrapolationMode}, sizeof(uint16_t));
writer(ud, kfs->extrapolationParameter, sizeof(kfs->extrapolationParameter));
}
writer(ud, &(uint64_t) {ng->count}, sizeof(uint64_t));
for(size_t i = 0; i < ng->count; i++) {
CHiPubNode *node = ng->nodes[i];
writer(ud, &(uint64_t) {node->type}, sizeof(uint64_t));
}
for(size_t i = 0; i < ng->count; i++) {
CHiPubNode *node = ng->nodes[i];
if(node->Save) {
node->Save(node, ud, writer);
}
writer(ud, &(uint16_t) {node->sinkCount}, sizeof(uint16_t));
for(size_t sink = 0; sink < node->sinkCount; sink++) {
writer(ud, &(uint16_t) {node->sinks[sink].type}, sizeof(uint16_t));
save_chival(ng, writer, node->sinks[sink].type, node->sinks[sink].data, ud);
}
}
return 0;
}
CUTIVIS int CHi_NodeGraphLoad(CHiNodeGraph *ng, CHiLoadReader reader, void *ud) {
{
char magic[4];
reader(ud, magic, sizeof(magic));
if(memcmp(magic, "\x71\x74\xCE\xA0", 4)) {
return 1;
}
}
CHi_NodeGraphReset(ng);
reader(ud, &ng->duration, sizeof(float));
reader(ud, &ng->time, sizeof(float));
{
uint64_t count;
reader(ud, &count, sizeof(count));
ng->keyframesList.count = count;
}
for(size_t i = 0; i < ng->keyframesList.count; i++) {
CHiKeyframes *kfs = ng->keyframesList.keyframes[i] = calloc(1, sizeof(*kfs));
{
uint16_t type;
reader(ud, &type, sizeof(type));
kfs->type = type;
}
{
uint64_t count;
reader(ud, &count, sizeof(count));
kfs->count = count;
}
kfs->times = calloc(kfs->count, sizeof(*kfs->times));
reader(ud, kfs->times, kfs->count * sizeof(*kfs->times));
for(size_t k = 0; k < kfs->count; k++) {
load_chival(ng, reader, kfs->type, &kfs->values[k], ud);
}
{
uint16_t extrap;
reader(ud, &extrap, sizeof(extrap));
kfs->extrapolationMode = extrap;
}
reader(ud, kfs->extrapolationParameter, sizeof(kfs->extrapolationParameter));
}
{
uint64_t count;
reader(ud, &count, sizeof(count));
ng->count = count;
}
ng->capacity = ng->count < 8 ? 8 : ng->count;
ng->nodes = calloc(ng->capacity, sizeof(*ng->nodes));
for(size_t i = 0; i < ng->count; i++) {
uint64_t type;
reader(ud, &type, sizeof(type));
CHiPubNode *n = NULL;
if(type == CUTIHI_T('CPre','view')) {
n = CHi_Preview();
} else if(type == CUTIHI_T('CMix','er ')) {
n = CHi_Mixer();
} else if(type == CUTIHI_T('CTex','t ')) {
n = CHi_Text();
} else if(type == CUTIHI_T('CTim','e ')) {
n = CHi_Time();
} else if(type == CUTIHI_T('CMod','ulat')) {
n = CHi_Modulate();
} else if(type == CUTIHI_T('CCns','tCol')) {
n = CHi_ConstantSample();
} else if(type == CUTIHI_T('CEmb','ed ')) {
n = CHi_Embed();
} else if(type == CUTIHI_T('CIma','ge ')) {
n = CHi_Image();
} else if(type == CUTIHI_T('CWin','dow ')) {
n = CHi_Window();
} else if(type == CUTIHI_T('CInA','udio')) {
n = CHi_Microphone();
} else if(type == CUTIHI_T('CExp','Wave')) {
n = CHi_ExportWav();
} else if(type == CUTIHI_T('CMov','ie ')) {
n = CHi_Movie();
} else if(type == CUTIHI_T('CEnc','GVP8')) {
n = CHi_EncodeVP8();
} else if(type == CUTIHI_T('CEnc','GVP9')) {
n = CHi_EncodeVP9();
} else if(type == CUTIHI_T('CExp','Webm')) {
n = CHi_MuxWebm();
} else if(type == CUTIHI_T('CKey','hook')) {
n = CHi_Keyhook();
} else if(type == CUTIHI_T('CKey','hook')) {
n = CHi_Keyhook();
} else if(type == CUTIHI_T('CEnc','Opus')) {
n = CHi_EncodeOpus();
} else if(type == CUTIHI_T('CWeb','Cam ')) {
n = CHi_Camera();
} else if(type == CUTIHI_T('CCmp','nScl')) {
n = CHi_ComponentScale();
} else if(type == CUTIHI_T('CEnc','H264')) {
n = CHi_EncodeH264();
} else if(type == CUTIHI_T('CStr','RTMP')) {
n = CHi_StreamRTMP();
} else if(type == CUTIHI_T('CEnc','AACL')) {
n = CHi_EncodeAAC();
}
n->ng = ng;
if(!n) {
CHi_NodeGraphReset(ng);
puts("Error: Unknown node type!");
return 1;
}
ng->nodes[i] = n;
}
for(size_t i = 0; i < ng->count; i++) {
CHiPubNode *n = ng->nodes[i];
{
uint16_t u16;
reader(ud, &u16, sizeof(u16));
n->sinkCount = u16;
}
n->sinks = calloc(n->sinkCount, sizeof(*n->sinks));
for(size_t s = 0; s < n->sinkCount; s++) {
{
uint16_t u16;
reader(ud, &u16, sizeof(u16));
n->sinks[s].type = u16;
}
load_chival(ng, reader, n->sinks[s].type, &n->sinks[s].data, ud);
if(n->sinks[s].type == CUTIHI_VAL_LINKED) {
adjacency_add(n->sinks[s].data.linked.to, n);
}
}
}
update_keyed_values(ng);
return 0;
}
CUTIVIS bool CHi_Node_Active(CHiPubNode *pubn) {
float now = CHi_Time_Get(pubn->ng);
return pubn->lifespan.start <= now && (pubn->lifespan.end == 0 || now < pubn->lifespan.end);
}