#include"node.h" #include #include"img.h" #include #include #include #include #include #include #include #include #include #include"mode.h" #include #include #include #include"minitrace.h" #include"linearity.h" static size_t bisect(const void *key, const void *base, size_t nmemb, size_t size, ssize_t(*compar)(const void*, const void*)) { size_t low = 0, high = nmemb; while(low < high) { size_t middle = (low + high) / 2; if(compar((const void*) ((uintptr_t) base + size * middle), key) < 0) { low = middle + 1; } else { high = middle; } } return low; } static ssize_t float_compar(const void *A, const void *B) { float a = *(float*) A; float b = *(float*) B; return (a > b) - (a < b); } static int adjacencycmp(const void *a, const void *b) { size_t v = (uintptr_t) ((CHiAdjacency*) a)[0] - (uintptr_t) ((CHiAdjacency*) b)[0]; return v ? v : (uintptr_t) ((CHiAdjacency*) a)[1] - (uintptr_t) ((CHiAdjacency*) b)[1]; } static void adjacency_add(CHiPubNode *source, CHiPubNode *sink) { CHiNodeGraph *ng = source->ng; if(ng->adjacencyCount == ng->adjacencyCapacity) { ng->adjacencies = realloc(ng->adjacencies, sizeof(CHiAdjacency) * (ng->adjacencyCapacity *= 2)); } ng->adjacencies[ng->adjacencyCount][0] = source; ng->adjacencies[ng->adjacencyCount][1] = sink; ng->adjacencyCount++; qsort(ng->adjacencies, ng->adjacencyCount, sizeof(CHiAdjacency), adjacencycmp); } static void adjacency_remove(CHiPubNode *source, CHiPubNode *sink) { CHiNodeGraph *ng = source->ng; CHiAdjacency *adj = bsearch(&(CHiAdjacency) {source, sink}, ng->adjacencies, ng->adjacencyCount, sizeof(CHiAdjacency), adjacencycmp); if(adj) { memmove(adj, adj + 1, sizeof(CHiAdjacency) * (ng->adjacencyCount - (adj - ng->adjacencies) - 1)); ng->adjacencyCount--; } } CUTIVIS CHiNodeGraph *CHi_NewNodeGraph() { static int inited = 0; if(!inited) { inited = 1; mtr_init("CHiTrace.json"); } CHiNodeGraph *ret = calloc(1, sizeof(*ret)); CHi_NodeGraphReset(ret); return ret; } CUTIVIS CHiNodeGraph *CHi_NodeGraphReset(CHiNodeGraph *ng) { for(size_t n = 0; n < ng->count; n++) { if(ng->nodes[n]->Destroy) { ng->nodes[n]->Destroy(ng->nodes[n]); } else { free(ng->nodes[n]); } } if(ng->nodes) { free(ng->nodes); } if(ng->adjacencies) { free(ng->adjacencies); } if(ng->keyframesList.keyframes) { free(ng->keyframesList.keyframes); } void *eOnStop = ng->eventOnStopComplete; void *eOnFrame = ng->eventOnFrameComplete; void *ud = ng->ud; memset(ng, 0, sizeof(*ng)); ng->count = 0; ng->nodes = malloc(sizeof(*ng->nodes) * (ng->capacity = 8)); ng->eventOnStopComplete = NULL; ng->eventOnFrameComplete = NULL; ng->compilationStatus = CUTIHI_COMP_READY; ng->adjacencyCount = 0; ng->adjacencyCapacity = 8; ng->adjacencies = malloc(sizeof(CHiAdjacency) * ng->adjacencyCapacity); ng->eventOnStopComplete = eOnStop; ng->eventOnFrameComplete = eOnFrame; ng->ud = ud; return ng; } CUTIVIS CHiValue *CHi_Crawl(CHiValue *v) { while(v->type == CUTIHI_VAL_LINKED || v->type == CUTIHI_VAL_KEYED) { if(v->type == CUTIHI_VAL_LINKED) { v = &v->data.linked.to->sources[v->data.linked.idx]; } else if(v->type == CUTIHI_VAL_KEYED) { v = &v->data.keyed->current; } } return v; } CUTIVIS void CHi_RegisterNode(CHiNodeGraph* ng, CHiPubNode* n) { if(ng->count == ng->capacity) { ng->nodes = realloc(ng->nodes, sizeof(*ng->nodes) * (ng->capacity = ng->capacity * 3 / 2)); } ng->nodes[ng->count++] = n; n->ng = ng; if(ng->compilationStatus == CUTIHI_COMP_RUNNING) { n->Start(n); } } CUTIVIS void CHi_MakeDirty(CHiNodeGraph *ng, CHiPubNode *n) { for(int adj = 0; adj < ng->adjacencyCount; adj++) { if(ng->adjacencies[adj][0] == n) { //n->clean = 0; } } } static int dfs_visit(size_t *resultCount, CHiPubNode ***result, CHiPubNode *n) { if(n->_dfsmark == 2) return 1; else if(n->_dfsmark == 1) return 0; n->_dfsmark = 1; for(size_t s = 0; s < n->sinkCount; s++) { if(n->sinks[s].type == CUTIHI_VAL_LINKED) { if(!dfs_visit(resultCount, result, n->sinks[s].data.linked.to)) { return 0; } } } n->_dfsmark++; (*result)[(*resultCount)++] = n; return 1; } static int topological_sort(CHiNodeGraph *ng) { size_t resultCount = 0; CHiPubNode **result = malloc(sizeof(*result) * ng->capacity); for(size_t i = 0; i < ng->count; i++) { ng->nodes[i]->_dfsmark = 0; } for(size_t i = 0; i < ng->count; i++) { if(!dfs_visit(&resultCount, &result, ng->nodes[i])) { free(result); return 0; } } assert(resultCount == ng->count); free(ng->nodes); ng->nodes = result; return 1; } CUTIVIS int CHi_ConfigureSink(CHiPubNode *n, size_t i, CHiValue v) { if(n->sinkCount <= i) { n->sinks = realloc(n->sinks, sizeof(*n->sinks) * (i + 1)); memset(&n->sinks[i], 0, sizeof(*n->sinks)); } if(n->sinks[i].type == CUTIHI_VAL_KEYED) { n->sinks[i].data.keyed->current = v; return 1; } if(v.type == CUTIHI_VAL_LINKED && n == v.data.linked.to) return 0; CHiValue old = n->sinks[i]; if(old.type == CUTIHI_VAL_LINKED) { adjacency_remove(old.data.linked.to, n); } // Check if viable n->sinks[i] = v; if(n->ng && !topological_sort(n->ng)) { n->sinks[i] = old; if(old.type == CUTIHI_VAL_LINKED) { adjacency_add(old.data.linked.to, n); } return 0; } if(v.type == CUTIHI_VAL_LINKED) { adjacency_add(v.data.linked.to, n); } CHi_MakeDirty(n->ng, n); return 1; } CUTIVIS void CHi_MakeKeyframe(CHiNodeGraph *ng, CHiPubNode *n, size_t i) { if(n->sinks[i].type != CUTIHI_VAL_KEYED) { CHiKeyframes *kfs = calloc(1, sizeof(*kfs)); kfs->type = n->sinks[i].type; kfs->count = 1; kfs->times = malloc(sizeof(*kfs->times)); *kfs->times = ng->time; kfs->values = malloc(sizeof(*kfs->values)); memcpy(kfs->values, &n->sinks[i].data, sizeof(CHiValueRaw)); memcpy(&kfs->current, &n->sinks[i], sizeof(CHiValueRaw)); kfs->node = n; n->sinks[i].type = CUTIHI_VAL_KEYED; n->sinks[i].data.keyed = kfs; ng->keyframesList.keyframes = realloc(ng->keyframesList.keyframes, sizeof(*ng->keyframesList.keyframes) * (++ng->keyframesList.count)); ng->keyframesList.keyframes[ng->keyframesList.count - 1] = kfs; } else { CHiKeyframes *kfs = n->sinks[i].data.keyed; float now = ng->time; size_t idx = bisect(&now, kfs->times, kfs->count, sizeof(now), float_compar); if(idx < kfs->count && kfs->times[idx] == now) { kfs->values[idx] = kfs->current.data; } else { kfs->count++; kfs->values = realloc(kfs->values, sizeof(*kfs->values) * kfs->count); kfs->times = realloc(kfs->times, sizeof(*kfs->times) * kfs->count); memmove(kfs->values + idx + 1, kfs->values + idx, sizeof(*kfs->values) * (kfs->count - idx - 1)); memmove(kfs->times + idx + 1, kfs->times + idx, sizeof(*kfs->times) * (kfs->count - idx - 1)); kfs->values[idx] = kfs->current.data; kfs->times[idx] = now; } } } CUTIVIS size_t CHi_MoveKeyframe(CHiNodeGraph *ng, CHiKeyframes *kfs, size_t idx, float to) { CHiValueRaw val = kfs->values[idx]; while(idx < kfs->count - 1 && to > kfs->times[idx + 1]) { memcpy(&kfs->values[idx], &kfs->values[idx + 1], sizeof(*kfs->values)); memcpy(&kfs->times[idx], &kfs->times[idx + 1], sizeof(*kfs->times)); idx++; } while(idx > 0 && to < kfs->times[idx - 1]) { memcpy(&kfs->values[idx], &kfs->values[idx - 1], sizeof(*kfs->values)); memcpy(&kfs->times[idx], &kfs->times[idx - 1], sizeof(*kfs->times)); idx--; } kfs->times[idx] = to; kfs->values[idx] = val; return idx; } CUTIVIS size_t CHi_MoveKeyframeBy(CHiNodeGraph *ng, CHiKeyframes *kfs, size_t idx, float dt) { return CHi_MoveKeyframe(ng, kfs, idx, kfs->times[idx] + dt); } CUTIVIS void CHi_DeleteKeyframe(CHiNodeGraph *ng, CHiKeyframes *kfs, size_t idx) { memmove(&kfs->times[idx], &kfs->times[idx + 1], (kfs->count - idx - 1) * sizeof(*kfs->times)); memmove(&kfs->values[idx], &kfs->values[idx + 1], (kfs->count - idx - 1) * sizeof(*kfs->values)); kfs->count--; } CUTIVIS size_t CHi_GetClosestKeyframe(CHiNodeGraph *ng, CHiKeyframes *kfs, float t) { if(kfs->count == 1) { return 0; } size_t idx = bisect(&t, kfs->times, kfs->count, sizeof(*kfs->times), float_compar); if(idx == 0) { return idx; } if(idx == kfs->count) { return kfs->count - 1; } if(fabs(kfs->times[idx] - t) < fabs(kfs->times[idx - 1] - t)) { return idx; } else { return idx - 1; } } CUTIVIS void CHi_SetExtrapolationMode(CHiNodeGraph *ng, CHiPubNode *n, size_t sinkIdx, CHiExtrapolationMode mode, float* params) { if(n->sinks[sinkIdx].type != CUTIHI_VAL_KEYED) { return; } CHiKeyframes *kfs = n->sinks[sinkIdx].data.keyed; kfs->extrapolationMode = mode; memcpy(kfs->extrapolationParameter, params, sizeof(kfs->extrapolationParameter)); } CUTIVIS void CHi_SetDuration(CHiNodeGraph *ng, float d) { ng->duration = d; } CUTIVIS int CHi_Hysteresis(CHiPubNode *root) { if(root->ng->compilationStatus != CUTIHI_COMP_READY) return 0; for(size_t s = 0; s < root->sinkCount; s++) { if(root->sinks[s].type == CUTIHI_VAL_LINKED) { CHi_Hysteresis(root->sinks[s].data.linked.to); } } //if(!root->clean) { root->Perform(root); //} return 1; } static bool error_changes(CHiPubNode *n) { for(int e = 0; e < CUTIHI_MAX_ERRORS; e++) { if(n->errors.active[e] != n->errors.activeLast[e]) { return true; } } return false; } static void save_errors(CHiPubNode *n) { for(int e = 0; e < CUTIHI_MAX_ERRORS; e++) { n->errors.activeLast[e] = n->errors.active[e]; n->errors.active[e] = false; } } static void perform_step(CHiNodeGraph *ng) { pthread_mutex_lock(&ng->mut); for(size_t nIdx = 0; nIdx < ng->count; nIdx++) { save_errors(ng->nodes[nIdx]); } for(size_t nIdx = 0; nIdx < ng->count; nIdx++) { CHiPubNode *n = ng->nodes[nIdx]; n->Perform(n); if(error_changes(n)) { if(ng->eventOnError) ng->eventOnError(ng, n); } } if(ng->eventOnFrameComplete) { ng->eventOnFrameComplete(ng); } pthread_mutex_unlock(&ng->mut); } bool timespec_less(const struct timespec l, const struct timespec r) { if(l.tv_sec == r.tv_sec) { return l.tv_nsec < r.tv_nsec; } else { return l.tv_sec < r.tv_sec; } } struct timespec timespec_sub(const struct timespec l, const struct timespec r) { struct timespec ret; ret.tv_sec = l.tv_sec - r.tv_sec; ret.tv_nsec = l.tv_nsec - r.tv_nsec; if(ret.tv_nsec < 0) { ret.tv_nsec += 1000000000L; ret.tv_sec--; } return ret; } struct timespec timespec_addf(const struct timespec l, const float r) { struct timespec ret; ret.tv_sec = l.tv_sec + floorf(r); ret.tv_nsec = l.tv_nsec + (r - floorf(r)) * 1000000000L; if(ret.tv_nsec > 1000000000L) { ret.tv_sec++; ret.tv_nsec -= 1000000000L; } return ret; } struct timespec timespec_add(const struct timespec l, const struct timespec r) { struct timespec ret; ret.tv_sec = l.tv_sec + r.tv_sec; ret.tv_nsec = l.tv_nsec + r.tv_nsec; if(ret.tv_nsec > 1000000000L) { ret.tv_nsec -= 1000000000L; ret.tv_sec++; } return ret; } float timespecToFloat(const struct timespec t) { return t.tv_sec + t.tv_nsec / 1000000000.f; } struct CompileCtx { CHiNodeGraph *ng; }; void *compile_thread(void *ctx_) { struct CompileCtx *ctx = ctx_; ctx->ng->time = ctx->ng->timedelta = 0; puts("START"); { ssize_t nIdx; for(nIdx = 0; nIdx < ctx->ng->count; nIdx++) { bool success; if(ctx->ng->nodes[nIdx]->Start) { success = ctx->ng->nodes[nIdx]->Start(ctx->ng->nodes[nIdx]); } else { success = ctx->ng->nodes[nIdx]->Perform(ctx->ng->nodes[nIdx]); } if(!success) { break; } } if(nIdx != ctx->ng->count) { // Starting failed; stop all previous nodes ctx->ng->eventOnError(ctx->ng, ctx->ng->nodes[nIdx]); nIdx--; for(; nIdx >= 0; nIdx--) { if(ctx->ng->nodes[nIdx]->Stop) { ctx->ng->nodes[nIdx]->Stop(ctx->ng->nodes[nIdx]); } } goto stop; } } if(CHi_GetMode() == CUTIHI_MODE_LIVE) { struct timespec start; clock_gettime(CLOCK_MONOTONIC, &start); struct timespec finish = timespec_addf(start, ctx->ng->duration); for(size_t frm = 0; ctx->ng->compilationStatus != CUTIHI_COMP_KILL_YOURSELF; frm++) { struct timespec now; clock_gettime(CLOCK_MONOTONIC, &now); if(ctx->ng->duration != -1 && timespec_less(finish, now)) { break; } struct timespec end = timespec_addf(now, 0.033333333333333333333333); CHi_Time_Set(ctx->ng, timespecToFloat(timespec_sub(now, start))); perform_step(ctx->ng); do { clock_gettime(CLOCK_MONOTONIC, &now); } while(timespec_less(now, end)); } } else { __uint128_t diff; for(uint64_t frm = 0; ctx->ng->compilationStatus != CUTIHI_COMP_KILL_YOURSELF && (ctx->ng->duration == -1 || frm < ctx->ng->duration * 30);) { CHi_Time_Set(ctx->ng, frm / 30.f); perform_step(ctx->ng); struct timespec last; clock_gettime(CLOCK_MONOTONIC, &last); struct timespec now; clock_gettime(CLOCK_MONOTONIC, &now); diff += timespec_sub(now, last).tv_nsec; frm++; } } for(size_t nIdx = 0; nIdx < ctx->ng->count; nIdx++) { if(ctx->ng->nodes[nIdx]->Stop) { ctx->ng->nodes[nIdx]->Stop(ctx->ng->nodes[nIdx]); } } puts("END"); if(ctx->ng->eventOnStopComplete) { ctx->ng->eventOnStopComplete(ctx->ng); } stop: ctx->ng->compilationStatus = CUTIHI_COMP_READY; free(ctx); mtr_flush(); return NULL; } CUTIVIS void CHi_BeginCompilation(CHiNodeGraph *ng) { ng->compilationStatus = CUTIHI_COMP_RUNNING; struct CompileCtx *ctx = calloc(sizeof(*ctx), 1); ctx->ng = ng; pthread_t thrd; pthread_create(&thrd, NULL, &compile_thread, ctx); } CUTIVIS void CHi_StopCompilation(CHiNodeGraph *ng) { if(ng->compilationStatus == CUTIHI_COMP_RUNNING) { ng->compilationStatus = CUTIHI_COMP_KILL_YOURSELF; } } typedef struct { CHiPubNode pubn; char *cachePath; CHiImage *cacheImg; } ImageNode; static int image_perform(CHiPubNode *node) { ImageNode *internal = (ImageNode*) node; node->sources->type = CUTIHI_VAL_SAMPLE; const char *fn = node->sinks[CUTIHI_IMAGE_IN_FILE].data.text; if(fn && (!internal->cachePath || strcmp(internal->cachePath, fn))) { if(node->sinks[CUTIHI_IMAGE_IN_FILE].type == CUTIHI_VAL_NONE) { return 1; } if(node->sinks[CUTIHI_IMAGE_IN_FILE].type != CUTIHI_VAL_TEXT) { node->errors.active[0] = true; strncpy(node->errors.code[0], "invalid type", CUTIHI_ERR_SIZE); node->errors.sink[0] = CUTIHI_IMAGE_IN_FILE; return 1; } if(internal->cacheImg) { CHi_Image_Free(internal->cacheImg); internal->cacheImg = NULL; } } if(!internal->cacheImg) { struct sail_image *simg; if(sail_load_from_file(fn, &simg) != SAIL_OK) { node->errors.active[0] = true; strncpy(node->errors.code[0], "invalid file", CUTIHI_ERR_SIZE); node->errors.sink[0] = CUTIHI_IMAGE_IN_FILE; return 1; } struct sail_image *cimg; sail_convert_image(simg, SAIL_PIXEL_FORMAT_BPP64_BGRA, &cimg); sail_destroy_image(simg); simg = NULL; CHiImage *img = CHi_Image_New(2, 4, (cimg->bytes_per_line + 15) & ~15, cimg->width, cimg->height, NULL); CHi_Restride(cimg->pixels, img->data16, cimg->bytes_per_line, img->stride, img->height); internal->cacheImg = img; for(size_t y = 0; y < img->height; y++) { for(size_t x = 0; x < img->stride; x += 16) { __m128i pixels = _mm_load_si128((__m128i*) ((uintptr_t) img->data16 + y * img->stride + x)); pixels = apply_gamma_epi16(pixels, _mm_set_ps(1.0f, 2.2f, 2.2f, 2.2f)); _mm_stream_si128((__m128i*) ((uintptr_t) img->data16 + y * img->stride + x), pixels); } } sail_destroy_image(cimg); } if(CHi_Node_Active(node)) { node->sources->data.sample = internal->cacheImg; } else { node->sources->data.sample = NULL; } return 1; } CUTIVIS CHiPubNode *CHi_Image() { CHiPubNode *n = calloc(1, sizeof(ImageNode)); n->type = CUTIHI_T('CIma','ge '); n->Start = n->Stop = NULL; n->Perform = image_perform; n->sinkCount = 1; n->sinks = calloc(sizeof(*n->sinks), 1); n->sourceCount = 1; n->sources = calloc(sizeof(*n->sources), 1); ((ImageNode*) n)->cachePath = strdup(""); return n; } static int embed_perform(CHiPubNode *node) { MTR_BEGIN("CHi", "embed_perform"); node->sources[0].type = CUTIHI_VAL_SAMPLE; CHiImage *main = CHi_Crawl(&node->sinks[0])->data.sample; if(node->sources->data.sample) CHi_Image_Free(node->sources->data.sample); CHiImage *dest = node->sources->data.sample = CHi_Image_New(2, 4, main->stride, main->width, main->height, NULL); memcpy(dest->data16, main->data16, main->stride * main->height); for(int sid = 0; sid < (node->sinkCount - 1) / 3; sid++) { CHiImage *sub = CHi_Crawl(&node->sinks[1 + sid * 3])->data.sample; if(!sub) continue; int sy = 0; int dy = (int16_t) CHi_Crawl(&node->sinks[2 + sid * 3])->data.vec4[1]; if(dy < 0) { sy = -dy; dy = 0; } for(; sy < sub->height && dy < dest->height; sy++, dy++) { int sx = 0; int dx = (int16_t) CHi_Crawl(&node->sinks[2 + sid * 3])->data.vec4[0]; if(dx < 0) { sx = -dx; dx = 0; } for(; sx < sub->width && dx < dest->width; sx += 2, dx += 2) { __m128i bottom = _mm_loadu_si128((__m128i*) ((uintptr_t) dest->data16 + dy * dest->stride + dx * 8)); __m128i top = _mm_loadu_si128((__m128i*) ((uintptr_t) sub->data16 + sy * sub->stride + sx * 8)); __m128i alpha = _mm_shuffle_epi8(top, _mm_set_epi8(15, 14, 15, 14, 15, 14, 15, 14, 7, 6, 7, 6, 7, 6, 7, 6)); __m128i invAlpha = _mm_sub_epi16(_mm_set1_epi16(0xFFFF), alpha); __m128i result = _mm_add_epi16(_mm_mulhi_epu16(top, alpha), _mm_mulhi_epu16(bottom, invAlpha)); _mm_storeu_si128((__m128i*) ((uintptr_t) dest->data16 + dy * dest->stride + dx * 8), result); } } } MTR_END("CHi", "embed_perform"); return 1; } CUTIVIS CHiPubNode *CHi_Embed() { CHiPubNode *n = calloc(1, sizeof(*n)); n->type = CUTIHI_T('CEmb','ed '); n->Start = n->Stop = NULL; n->Perform = embed_perform; n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 1 + 3 * CUTIHI_EMBED_MAX_SMALLS); n->sources = calloc(sizeof(*n->sources), n->sourceCount = 1); return n; } static int constantsample_perform(CHiPubNode *node) { node->sources[0].type = CUTIHI_VAL_SAMPLE; if(node->sources->data.sample) CHi_Image_Free(node->sources->data.sample); CHiValue *sink = CHi_Crawl(&node->sinks[0]); CHiValue *sz = CHi_Crawl(&node->sinks[1]); size_t w = sz->data.vec4[0] < 1 ? 1 : sz->data.vec4[0]; size_t h = sz->data.vec4[1] < 1 ? 1 : sz->data.vec4[1]; CHiImage *img = CHi_Image_New(2, 4, 8 * w, w, h, NULL); if(CHi_Node_Active(node)) { for(size_t i = 0; i < w * h; i++) { img->data16[i * 4 + 0] = sink->data.vec4[2] * 65535; img->data16[i * 4 + 1] = sink->data.vec4[1] * 65535; img->data16[i * 4 + 2] = sink->data.vec4[0] * 65535; img->data16[i * 4 + 3] = 65535; } } node->sources->data.sample = img; return 1; } CUTIVIS CHiPubNode *CHi_ConstantSample() { CHiPubNode *n = calloc(1, sizeof(*n)); n->type = CUTIHI_T('CCns','tCol'); n->Start = n->Stop = NULL; n->Perform = constantsample_perform; n->sinkCount = 2; n->sinks = calloc(sizeof(*n->sinks), n->sinkCount); n->sourceCount = 1; n->sources = calloc(sizeof(*n->sources), n->sourceCount); n->sinks[0].type = CUTIHI_VAL_VEC4; n->sinks[0].data.vec4[0] = 1280; n->sinks[0].data.vec4[1] = 720; return n; } static __m128i _mm_mullo_epi32(__m128i a, __m128i b) { // Plagiarized from a plagiarization of Agner Fog's code __m128i a13 = _mm_shuffle_epi32(a, 0xF5); // (-,a3,-,a1) __m128i b13 = _mm_shuffle_epi32(b, 0xF5); // (-,b3,-,b1) __m128i prod02 = _mm_mul_epu32(a, b); // (-,a2*b2,-,a0*b0) __m128i prod13 = _mm_mul_epu32(a13, b13); // (-,a3*b3,-,a1*b1) __m128i prod01 = _mm_unpacklo_epi32(prod02,prod13); // (-,-,a1*b1,a0*b0) __m128i prod23 = _mm_unpackhi_epi32(prod02,prod13); // (-,-,a3*b3,a2*b2) __m128i prod = _mm_unpacklo_epi64(prod01,prod23); // (ab3,ab2,ab1,ab0) return prod; } static int modulate_perform(CHiPubNode *node) { MTR_BEGIN("CHi", "modulate_perform"); CHiValue *imgsrc = CHi_Crawl(&node->sinks[0]); if(!imgsrc || imgsrc->type == CUTIHI_VAL_NONE) { return 1; } if(imgsrc->type != CUTIHI_VAL_SAMPLE) { node->errors.active[0] = true; strncpy(node->errors.code[0], "invalid type", CUTIHI_ERR_SIZE); node->errors.sink[0] = 0; return 1; } node->sources[0].type = CUTIHI_VAL_SAMPLE; if(node->sources->data.sample) CHi_Image_Free(node->sources->data.sample); if(!CHi_Node_Active(node)) { node->sources->data.sample = NULL; return; } CHiImage *src = imgsrc->data.sample; assert(src->stride % 16 == 0); CHiImage *dst = CHi_Image_New(2, 4, src->stride, src->width, src->height, NULL); node->sources->data.sample = dst; float V = CHi_Crawl(&node->sinks[1])->data.vec4[0]; float S = CHi_Crawl(&node->sinks[2])->data.vec4[0]; float H = CHi_Crawl(&node->sinks[3])->data.vec4[0] * 3.1415926535897 / 180; float sH = sinf(H); float cH = cosf(H); __m128i row1 = _mm_set_epi32( 0, 32768 * (+0.180472 * S * sH + 0.7874000 * S * cH + 0.2126 * V), 32768 * (-0.715274 * S * cH + 0.6069280 * S * sH + 0.7152 * V), 32768 * (-0.787400 * S * sH - 0.0721258 * S * cH + 0.0722 * V) ); __m128i row2 = _mm_set_epi32( 0, 32768 * (-0.212585 * S * cH - 0.1472940 * S * sH + 0.2126 * V), 32768 * (-0.095334 * S * sH + 0.2847960 * S * cH + 0.7152 * V), 32768 * (-0.072211 * S * cH + 0.2426280 * S * sH + 0.0722 * V) ); __m128i row3 = _mm_set_epi32( 0, 32768 * (-0.212652 * S * cH + 0.9278000 * S * sH + 0.2126 * V), 32768 * (-0.842814 * S * sH - 0.7151480 * S * cH + 0.7152 * V), 32768 * (-0.084987 * S * sH + 0.9278000 * S * cH + 0.0722 * V) ); for(size_t b = 0; b < dst->stride * dst->height; b += 16) { __m128i rgba2U16 = _mm_load_si128((__m128i*) ((uintptr_t) src->data16 + b)); __m128i rgba2S16 = _mm_srli_epi16(rgba2U16, 1); __m128i rgbaS16Lo = _mm_unpacklo_epi16(rgba2S16, _mm_setzero_si128()); __m128i rgbaS16Hi = _mm_unpackhi_epi16(rgba2S16, _mm_setzero_si128()); rgba2S16 = _mm_setzero_si128(); do { __m128i newR = _mm_mullo_epi32(rgbaS16Lo, row1); __m128i newG = _mm_mullo_epi32(rgbaS16Lo, row2); __m128i newB = _mm_mullo_epi32(rgbaS16Lo, row3); __m128i newA = _mm_mullo_epi32(rgbaS16Lo, _mm_set_epi32(1, 0, 0, 0)); newR = _mm_srai_epi32(newR, 16); newG = _mm_srai_epi32(newG, 16); newB = _mm_srai_epi32(newB, 16); newR = _mm_hadd_epi32(newR, _mm_setzero_si128()); newG = _mm_hadd_epi32(newG, _mm_setzero_si128()); newB = _mm_hadd_epi32(newB, _mm_setzero_si128()); newA = _mm_hadd_epi32(newA, _mm_setzero_si128()); newR = _mm_hadd_epi32(newR, _mm_setzero_si128()); newG = _mm_hadd_epi32(newG, _mm_setzero_si128()); newB = _mm_hadd_epi32(newB, _mm_setzero_si128()); newA = _mm_hadd_epi32(newA, _mm_setzero_si128()); newR = _mm_max_epi16(_mm_min_epi16(newR, _mm_set1_epi32(16383)), _mm_set1_epi32(0)); newG = _mm_max_epi16(_mm_min_epi16(newG, _mm_set1_epi32(16383)), _mm_set1_epi32(0)); newB = _mm_max_epi16(_mm_min_epi16(newB, _mm_set1_epi32(16383)), _mm_set1_epi32(0)); newA = _mm_max_epi16(_mm_min_epi16(newA, _mm_set1_epi32(16383)), _mm_set1_epi32(0)); newR = _mm_shuffle_epi8(newR, _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, 0, -128, -128, -128, -128)); newG = _mm_shuffle_epi8(newG, _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, 0, -128, -128)); newB = _mm_shuffle_epi8(newB, _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, 0)); newA = _mm_shuffle_epi8(newA, _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, 1, 0, -128, -128, -128, -128, -128, -128)); rgba2S16 = _mm_or_si128(rgba2S16, _mm_or_si128(_mm_or_si128(_mm_or_si128(newR, newG), newB), newA)); } while(0); do { __m128i newR = _mm_mullo_epi32(rgbaS16Hi, row1); __m128i newG = _mm_mullo_epi32(rgbaS16Hi, row2); __m128i newB = _mm_mullo_epi32(rgbaS16Hi, row3); __m128i newA = _mm_mullo_epi32(rgbaS16Hi, _mm_set_epi32(1, 0, 0, 0)); newR = _mm_srai_epi32(newR, 16); newG = _mm_srai_epi32(newG, 16); newB = _mm_srai_epi32(newB, 16); newR = _mm_hadd_epi32(newR, _mm_setzero_si128()); newG = _mm_hadd_epi32(newG, _mm_setzero_si128()); newB = _mm_hadd_epi32(newB, _mm_setzero_si128()); newA = _mm_hadd_epi32(newA, _mm_setzero_si128()); newR = _mm_hadd_epi32(newR, _mm_setzero_si128()); newG = _mm_hadd_epi32(newG, _mm_setzero_si128()); newB = _mm_hadd_epi32(newB, _mm_setzero_si128()); newA = _mm_hadd_epi32(newA, _mm_setzero_si128()); newR = _mm_max_epi16(_mm_min_epi16(newR, _mm_set1_epi32(16383)), _mm_set1_epi32(0)); newG = _mm_max_epi16(_mm_min_epi16(newG, _mm_set1_epi32(16383)), _mm_set1_epi32(0)); newB = _mm_max_epi16(_mm_min_epi16(newB, _mm_set1_epi32(16383)), _mm_set1_epi32(0)); newA = _mm_max_epi16(_mm_min_epi16(newA, _mm_set1_epi32(16383)), _mm_set1_epi32(0)); newR = _mm_shuffle_epi8(newR, _mm_set_epi8(-128, -128, 1, 0, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128)); newG = _mm_shuffle_epi8(newG, _mm_set_epi8(-128, -128, -128, -128, 1, 0, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128)); newB = _mm_shuffle_epi8(newB, _mm_set_epi8(-128, -128, -128, -128, -128, -128, 1, 0, -128, -128, -128, -128, -128, -128, -128, -128)); newA = _mm_shuffle_epi8(newA, _mm_set_epi8(1, 0, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128)); rgba2S16 = _mm_or_si128(rgba2S16, _mm_or_si128(_mm_or_si128(_mm_or_si128(newR, newG), newB), newA)); } while(0); rgba2U16 = _mm_slli_epi16(rgba2S16, 2); _mm_store_si128((__m128i*) ((uintptr_t) dst->data16 + b), rgba2U16); } MTR_END("CHi", "modulate_perform"); return 1; } CUTIVIS CHiPubNode *CHi_Modulate() { CHiPubNode *n = calloc(1, sizeof(*n)); n->type = CUTIHI_T('CMod','ulat'); n->Start = n->Stop = NULL; n->Perform = modulate_perform; n->sinkCount = 4; n->sinks = calloc(sizeof(*n->sinks), n->sinkCount); n->sourceCount = 1; n->sources = calloc(sizeof(*n->sources), n->sourceCount); n->sinks[0].type = CUTIHI_VAL_VEC4; n->sinks[0].data.vec4[0] = 1; n->sinks[1].type = CUTIHI_VAL_VEC4; n->sinks[1].data.vec4[0] = 1; return n; } static void update_keyed_values(CHiNodeGraph *ng) { for(size_t kfsIdx = 0; kfsIdx < ng->keyframesList.count; kfsIdx++) { CHiKeyframes *kfs = ng->keyframesList.keyframes[kfsIdx]; kfs->current.type = kfs->type; float now = ng->time; size_t idx = bisect(&now, kfs->times, kfs->count, sizeof(now), float_compar); if(idx == 0) { kfs->current.data = kfs->values[idx]; if(kfs->current.type == CUTIHI_VAL_VEC4 && kfs->extrapolationMode == CUTIHI_EXTRAPOLATION_CONSTANT) { kfs->current.data.vec4[0] += (now - kfs->times[0]) * kfs->extrapolationParameter[0]; kfs->current.data.vec4[1] += (now - kfs->times[0]) * kfs->extrapolationParameter[1]; kfs->current.data.vec4[2] += (now - kfs->times[0]) * kfs->extrapolationParameter[2]; kfs->current.data.vec4[3] += (now - kfs->times[0]) * kfs->extrapolationParameter[3]; } } else if(idx == kfs->count) { kfs->current.data = kfs->values[idx - 1]; if(kfs->current.type == CUTIHI_VAL_VEC4 && kfs->extrapolationMode == CUTIHI_EXTRAPOLATION_CONSTANT) { kfs->current.data.vec4[0] += (now - kfs->times[kfs->count - 1]) * kfs->extrapolationParameter[0]; kfs->current.data.vec4[1] += (now - kfs->times[kfs->count - 1]) * kfs->extrapolationParameter[1]; kfs->current.data.vec4[2] += (now - kfs->times[kfs->count - 1]) * kfs->extrapolationParameter[2]; kfs->current.data.vec4[3] += (now - kfs->times[kfs->count - 1]) * kfs->extrapolationParameter[3]; } } else { if(kfs->type == CUTIHI_VAL_VEC4) { float alpha = (now - kfs->times[idx - 1]) / (kfs->times[idx] - kfs->times[idx - 1]); kfs->current.data.vec4[0] = kfs->values[idx - 1].vec4[0] + (kfs->values[idx].vec4[0] - kfs->values[idx - 1].vec4[0]) * alpha; kfs->current.data.vec4[1] = kfs->values[idx - 1].vec4[1] + (kfs->values[idx].vec4[1] - kfs->values[idx - 1].vec4[1]) * alpha; kfs->current.data.vec4[2] = kfs->values[idx - 1].vec4[2] + (kfs->values[idx].vec4[2] - kfs->values[idx - 1].vec4[2]) * alpha; kfs->current.data.vec4[3] = kfs->values[idx - 1].vec4[3] + (kfs->values[idx].vec4[3] - kfs->values[idx - 1].vec4[3]) * alpha; } else { kfs->current.data = kfs->values[idx - 1]; } } } } static int time_perform(CHiPubNode *node) { node->sources->type = CUTIHI_VAL_VEC4; node->sources->data.vec4[0] = node->ng->time; return 1; } CUTIVIS void CHi_Time_Set(CHiNodeGraph *ng, float f) { ng->timedelta = f - ng->time; ng->time = f; update_keyed_values(ng); } CUTIVIS float CHi_Time_Get(CHiNodeGraph *ng) { return ng->time; } CUTIVIS float CHi_Time_GetDelta(CHiNodeGraph *ng) { return ng->timedelta; } CUTIVIS CHiPubNode *CHi_Time() { CHiPubNode *n = calloc(1, sizeof(*n)); n->type = CUTIHI_T('CTim','e '); n->Start = n->Stop = NULL; n->Perform = time_perform; n->sinkCount = 0; n->sinks = NULL; n->sourceCount = 1; n->sources = calloc(sizeof(*n->sources), 1); return n; } struct TextNode { CHiPubNode pubn; PangoFontMap *pfontmap; PangoContext *pcontext; PangoFontDescription * pfontdesc; PangoLayout *playout; char *cacheText; char *cacheFontName; }; static int text_perform(CHiPubNode *n) { MTR_BEGIN("CHi", "text_perform"); struct TextNode *this = (struct TextNode*) n; CHiValue *valFontName = CHi_Crawl(&n->sinks[3]); CHiValue *valDPI = CHi_Crawl(&n->sinks[2]); CHiValue *valCol = CHi_Crawl(&n->sinks[1]); CHiValue *valText = CHi_Crawl(&n->sinks[0]); if(!this->cacheFontName || strcmp(this->cacheFontName, valFontName->data.text)) { if(this->cacheFontName) free(this->cacheFontName); this->cacheFontName = strdup(valFontName->data.text); this->pfontmap = pango_ft2_font_map_new(); pango_ft2_font_map_set_resolution(PANGO_FT2_FONT_MAP(this->pfontmap), 72, 72); this->pcontext = pango_font_map_create_context(this->pfontmap); pango_context_set_language(this->pcontext, pango_language_from_string("en_US")); pango_context_set_base_dir(this->pcontext, PANGO_DIRECTION_LTR); this->pfontdesc = pango_font_description_from_string(this->cacheFontName); this->playout = pango_layout_new(this->pcontext); pango_layout_set_font_description(this->playout, this->pfontdesc); free(this->cacheText); this->cacheText = NULL; } if(!this->cacheText || strcmp(this->cacheText, valText->data.text)) { if(this->cacheText) free(this->cacheText); this->cacheText = strdup(valText->data.text); pango_layout_set_markup(this->playout, valText->data.text, -1); } pango_ft2_font_map_set_resolution(PANGO_FT2_FONT_MAP(this->pfontmap), valDPI->data.vec4[0], valDPI->data.vec4[0]); PangoRectangle extents; pango_layout_get_extents(this->playout, NULL, &extents); n->sources[0].type = CUTIHI_VAL_SAMPLE; if(n->sources->data.sample) { CHi_Image_Free(n->sources->data.sample); n->sources->data.sample = NULL; } if(!CHi_Node_Active(n)) { return 1; } size_t width = (PANGO_PIXELS(extents.width) + 15) & ~15; CHiImage *chiret = CHi_Image_New(2, 4, 8 * width, width, PANGO_PIXELS(extents.height), NULL); n->sources->data.sample = chiret; FT_Bitmap bmp = {}; FT_Bitmap_New(&bmp); bmp.width = chiret->width; bmp.rows = chiret->height; bmp.buffer = calloc(bmp.width, bmp.rows); bmp.pitch = chiret->width; bmp.pixel_mode = FT_PIXEL_MODE_GRAY; bmp.num_grays = 256; pango_ft2_render_layout(&bmp, this->playout, PANGO_PIXELS(extents.x) + (PANGO_PIXELS(extents.width) + 15) % 16 / 4, PANGO_PIXELS(extents.y)); __m128i ones = _mm_set1_epi64x( (((size_t) (valCol->data.vec4[2] * 255) % 256) << 0) | (((size_t) (valCol->data.vec4[1] * 255) % 256) << 16) | (((size_t) (valCol->data.vec4[0] * 255) % 256) << 32) | 0x0100000000000000 ); for(size_t p = 0; p < bmp.width * bmp.rows; p += 2) { __m128i alphad0 = _mm_mullo_epi16(ones, _mm_set_epi16(bmp.buffer[p + 1], 0xFF, 0xFF, 0xFF, bmp.buffer[p + 0], 0xFF, 0xFF, 0xFF)); _mm_stream_si128((__m128i*) &chiret->data16[p * 4], alphad0); } free(bmp.buffer); MTR_END("CHi", "text_perform"); return 1; } CUTIVIS CHiPubNode *CHi_Text() { CHiPubNode *n = calloc(1, sizeof(struct TextNode)); n->type = CUTIHI_T('CTex','t '); n->Start = n->Stop = NULL; n->Perform = text_perform; n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 4); n->sources = calloc(sizeof(*n->sources), n->sourceCount = 1); n->sinks[0].type = CUTIHI_VAL_TEXT; n->sinks[0].data.text = strdup("Title Text"); n->sinks[2].type = CUTIHI_VAL_VEC4; n->sinks[2].data.vec4[0] = 256; n->sinks[3].type = CUTIHI_VAL_TEXT; n->sinks[3].data.text = strdup("Sans-Serif"); return n; } static int mixer_perform(CHiPubNode *n) { if(n->sinkCount == 0) { return 1; } n->sources[0].type = CUTIHI_VAL_SAMPLE; MTR_BEGIN("CHi", "mixer_perform"); if(n->sources[0].data.sample) { CHi_Image_Free(n->sources[0].data.sample); n->sources[0].data.sample = NULL; } size_t width = 0, height = 0, stride = 0; for(int s = 0; s < n->sinkCount; s++) { CHiValue *val = CHi_Crawl(&n->sinks[s]); if(val && val->type == CUTIHI_VAL_SAMPLE) { if(width == 0 || height == 0) { width = val->data.sample->width; height = val->data.sample->height; stride = val->data.sample->stride; } else { assert(val->data.sample->width == width && val->data.sample->height == height); } } } if(width == 0 || height == 0) { return 1; } n->sources[0].data.sample = CHi_Image_New(2, 1, (stride + 15) & ~15, width, height, NULL); for(size_t b = 0; b < stride; b += 16) { __m128i sum = _mm_setzero_si128(); for(int s = 0; s < n->sinkCount; s++) { CHiValue *val = CHi_Crawl(&n->sinks[s]); if(val && val->type == CUTIHI_VAL_SAMPLE) { sum = _mm_adds_epi16(sum, _mm_load_si128((__m128i*) ((uintptr_t) val->data.sample->data16 + b))); } } _mm_stream_si128((__m128i*) ((uintptr_t) n->sources[0].data.sample->data16 + b), sum); } MTR_END("CHi", "mixer_perform"); return 1; } CUTIVIS CHiPubNode *CHi_Mixer() { CHiPubNode *n = calloc(1, sizeof(*n)); n->type = CUTIHI_T('CMix','er '); n->Start = n->Stop = NULL; n->Perform = mixer_perform; n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 2); n->sources = calloc(sizeof(*n->sources), n->sourceCount = 1); return n; } static int preview_perform(CHiPubNode *n) { return 1; } CUTIVIS CHiPubNode *CHi_Preview() { CHiPubNode *n = calloc(1, sizeof(*n)); n->type = CUTIHI_T('CPre','view'); n->Start = n->Stop = NULL; n->Perform = preview_perform; n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 1); n->sources = NULL; n->sourceCount = 0; n->sinks[0].type = CUTIHI_VAL_SAMPLE; n->sinks[0].data.sample = NULL; return n; } static int chromakey_perform(CHiPubNode *n) { CHiValue *sampleV = CHi_Crawl(&n->sinks[0]); CHiValue *colorV = CHi_Crawl(&n->sinks[1]); if(!sampleV || sampleV->type != CUTIHI_VAL_SAMPLE || !sampleV->data.sample) { return 1; } CHiImage *src = sampleV->data.sample; if(n->sources[0].data.sample) { CHi_Image_Free(n->sources[0].data.sample); } n->sources[0].type = CUTIHI_VAL_SAMPLE; CHiImage *dst = n->sources[0].data.sample = CHi_Image_New(2, 4, (src->width * src->bpc * src->channels + 15) & ~15, src->width, src->height, NULL); int16_t uKey = 32767 * (colorV->data.vec4[0] * -0.1146 + colorV->data.vec4[1] * -0.3854 + colorV->data.vec4[2] * +0.5000); int16_t vKey = 32767 * (colorV->data.vec4[0] * +0.5000 + colorV->data.vec4[1] * -0.4542 + colorV->data.vec4[2] * -0.0458); __m128i row2 = _mm_set_epi32(0, -3755, -12628, 16384); __m128i row3 = _mm_set_epi32(0, 16384, -14883, -1501); float threshold0 = 300; float threshold1 = 3000; for(size_t y = 0; y < src->height; y++) { for(size_t off = 0; off < dst->stride; off += 16) { __m128i rgba2U16 = _mm_load_si128((__m128i*) ((uintptr_t) src->data16 + y * src->stride + off)); __m128i rgba2S16 = _mm_srli_epi16(rgba2U16, 1); __m128i rgbaS16Lo = _mm_unpacklo_epi16(rgba2S16, _mm_setzero_si128()); __m128i rgbaS16Hi = _mm_unpackhi_epi16(rgba2S16, _mm_setzero_si128()); __m128i alphas = _mm_setzero_si128(); { __m128i uProd = _mm_mullo_epi32(row2, rgbaS16Lo); __m128i vProd = _mm_mullo_epi32(row3, rgbaS16Lo); uProd = _mm_srai_epi32(uProd, 15); uProd = _mm_hadd_epi32(uProd, _mm_setzero_si128()); uProd = _mm_hadd_epi32(uProd, _mm_setzero_si128()); vProd = _mm_srai_epi32(vProd, 15); vProd = _mm_hadd_epi32(vProd, _mm_setzero_si128()); vProd = _mm_hadd_epi32(vProd, _mm_setzero_si128()); __m128 diffU = _mm_cvtepi32_ps(_mm_sub_epi32(uProd, _mm_set1_epi32(uKey))); __m128 diffV = _mm_cvtepi32_ps(_mm_sub_epi32(vProd, _mm_set1_epi32(vKey))); __m128 distance = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(diffU, diffU), _mm_mul_ps(diffV, diffV))); __m128 alpha = (__m128) _mm_bslli_si128((__m128i) distance, 12); alpha = _mm_sub_ps(alpha, _mm_set1_ps(threshold0)); alpha = _mm_max_ps(alpha, _mm_set1_ps(0)); alpha = _mm_mul_ps(alpha, _mm_set1_ps(65535 / (threshold1 - threshold0))); alpha = _mm_min_ps(alpha, _mm_set1_ps(65535)); __m128i z = _mm_hadd_epi16(_mm_add_epi32(_mm_and_si128(_mm_cvtps_epi32(alpha), _mm_set_epi32(0xFFFFFFFF, 0, 0, 0)), _mm_set_epi32(0, 65535, 65535, 65535)), _mm_setzero_si128()); alphas = _mm_or_si128(alphas, z); } { __m128i uProd = _mm_mullo_epi32(row2, rgbaS16Hi); __m128i vProd = _mm_mullo_epi32(row3, rgbaS16Hi); uProd = _mm_srai_epi32(uProd, 15); uProd = _mm_hadd_epi32(uProd, _mm_setzero_si128()); uProd = _mm_hadd_epi32(uProd, _mm_setzero_si128()); vProd = _mm_srai_epi32(vProd, 15); vProd = _mm_hadd_epi32(vProd, _mm_setzero_si128()); vProd = _mm_hadd_epi32(vProd, _mm_setzero_si128()); __m128 diffU = _mm_cvtepi32_ps(_mm_sub_epi32(uProd, _mm_set1_epi32(uKey))); __m128 diffV = _mm_cvtepi32_ps(_mm_sub_epi32(vProd, _mm_set1_epi32(vKey))); __m128 distance = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(diffU, diffU), _mm_mul_ps(diffV, diffV))); __m128 alpha = (__m128) _mm_bslli_si128((__m128i) distance, 12); alpha = _mm_sub_ps(alpha, _mm_set1_ps(threshold0)); alpha = _mm_max_ps(alpha, _mm_set1_ps(0)); alpha = _mm_mul_ps(alpha, _mm_set1_ps(65535 / (threshold1 - threshold0))); alpha = _mm_min_ps(alpha, _mm_set1_ps(65535)); __m128i z = _mm_hadd_epi16(_mm_add_epi32(_mm_and_si128(_mm_cvtps_epi32(alpha), _mm_set_epi32(0xFFFFFFFF, 0, 0, 0)), _mm_set_epi32(0, 65535, 65535, 65535)), _mm_setzero_si128()); alphas = _mm_or_si128(alphas, _mm_bslli_si128(z, 8)); } rgba2U16 = _mm_mulhi_epu16(rgba2U16, alphas); _mm_stream_si128((__m128i*) ((uintptr_t) dst->data16 + y * src->stride + off), rgba2U16); } } return 1; } CUTIVIS CHiPubNode *CHi_ChromaKey() { CHiPubNode *n = calloc(1, sizeof(*n)); n->type = CUTIHI_T('CChr','omaK'); n->Start = n->Stop = NULL; n->Perform = chromakey_perform; n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 2); n->sources = calloc(sizeof(*n->sources), n->sourceCount = 1); n->sinks[1].type = CUTIHI_VAL_VEC4; // Default green n->sinks[1].data.vec4[0] = 0; n->sinks[1].data.vec4[1] = 1; n->sinks[1].data.vec4[2] = 0; n->sinks[1].data.vec4[3] = 1; return n; } static void save_chival(CHiNodeGraph *ng, CHiSaveWriter writer, CHiValType type, CHiValueRaw data, void *ud) { if(type == CUTIHI_VAL_TEXT) { size_t len = strlen(data.text); writer(ud, &(uint32_t) {len}, sizeof(uint32_t)); writer(ud, data.text, len); } else if(type == CUTIHI_VAL_VEC4) { writer(ud, data.vec4, sizeof(data.vec4)); } else if(type == CUTIHI_VAL_LINKED) { size_t index; for(index = 0; index < ng->count; index++) { if(ng->nodes[index] == data.linked.to) { break; } } assert(index < ng->count); writer(ud, &(uint64_t) {index}, sizeof(uint64_t)); writer(ud, &(uint16_t) {data.linked.idx}, sizeof(uint16_t)); } else if(type == CUTIHI_VAL_KEYED) { size_t index; for(index = 0; index < ng->keyframesList.count; index++) { if(ng->keyframesList.keyframes[index] == data.keyed) { break; } } assert(index < ng->count); writer(ud, &(uint64_t) {index}, sizeof(uint64_t)); } } static void load_chival(CHiNodeGraph *ng, CHiLoadReader reader, CHiValType type, CHiValueRaw *data, void *ud) { if(type == CUTIHI_VAL_TEXT) { uint32_t len; reader(ud, &len, sizeof(len)); data->text = malloc(len + 1); reader(ud, data->text, len); data->text[len] = 0; } else if(type == CUTIHI_VAL_VEC4) { reader(ud, data->vec4, sizeof(data->vec4)); } else if(type == CUTIHI_VAL_LINKED) { uint64_t index; reader(ud, &index, sizeof(index)); data->linked.to = ng->nodes[index]; uint16_t idx; reader(ud, &idx, sizeof(idx)); data->linked.idx = idx; } else if(type == CUTIHI_VAL_KEYED) { uint64_t index; reader(ud, &index, sizeof(index)); data->keyed = ng->keyframesList.keyframes[index]; } } CUTIVIS int CHi_NodeGraphSave(CHiNodeGraph *ng, CHiSaveWriter writer, void *ud) { writer(ud, "\x71\x74\xCE\xA0", 4); writer(ud, &(float) {ng->duration}, sizeof(float)); writer(ud, &(float) {ng->time}, sizeof(float)); writer(ud, &(uint64_t) {ng->keyframesList.count}, sizeof(uint64_t)); for(size_t i = 0; i < ng->keyframesList.count; i++) { CHiKeyframes *kfs = ng->keyframesList.keyframes[i]; writer(ud, &(uint16_t) {kfs->type}, sizeof(uint16_t)); writer(ud, &(uint64_t) {kfs->count}, sizeof(uint64_t)); writer(ud, kfs->times, sizeof(*kfs->times) * kfs->count); for(size_t k = 0; k < kfs->count; k++) { save_chival(ng, writer, kfs->type, kfs->values[k], ud); } writer(ud, &(uint16_t) {kfs->extrapolationMode}, sizeof(uint16_t)); writer(ud, kfs->extrapolationParameter, sizeof(kfs->extrapolationParameter)); } writer(ud, &(uint64_t) {ng->count}, sizeof(uint64_t)); for(size_t i = 0; i < ng->count; i++) { CHiPubNode *node = ng->nodes[i]; writer(ud, &(uint64_t) {node->type}, sizeof(uint64_t)); } for(size_t i = 0; i < ng->count; i++) { CHiPubNode *node = ng->nodes[i]; if(node->Save) { node->Save(node, ud, writer); } writer(ud, &(uint16_t) {node->sinkCount}, sizeof(uint16_t)); for(size_t sink = 0; sink < node->sinkCount; sink++) { writer(ud, &(uint16_t) {node->sinks[sink].type}, sizeof(uint16_t)); save_chival(ng, writer, node->sinks[sink].type, node->sinks[sink].data, ud); } } return 0; } CUTIVIS int CHi_NodeGraphLoad(CHiNodeGraph *ng, CHiLoadReader reader, void *ud) { { char magic[4]; reader(ud, magic, sizeof(magic)); if(memcmp(magic, "\x71\x74\xCE\xA0", 4)) { return 1; } } CHi_NodeGraphReset(ng); reader(ud, &ng->duration, sizeof(float)); reader(ud, &ng->time, sizeof(float)); { uint64_t count; reader(ud, &count, sizeof(count)); ng->keyframesList.count = count; } for(size_t i = 0; i < ng->keyframesList.count; i++) { CHiKeyframes *kfs = ng->keyframesList.keyframes[i] = calloc(1, sizeof(*kfs)); { uint16_t type; reader(ud, &type, sizeof(type)); kfs->type = type; } { uint64_t count; reader(ud, &count, sizeof(count)); kfs->count = count; } kfs->times = calloc(kfs->count, sizeof(*kfs->times)); reader(ud, kfs->times, kfs->count * sizeof(*kfs->times)); for(size_t k = 0; k < kfs->count; k++) { load_chival(ng, reader, kfs->type, &kfs->values[k], ud); } { uint16_t extrap; reader(ud, &extrap, sizeof(extrap)); kfs->extrapolationMode = extrap; } reader(ud, kfs->extrapolationParameter, sizeof(kfs->extrapolationParameter)); } { uint64_t count; reader(ud, &count, sizeof(count)); ng->count = count; } ng->capacity = ng->count < 8 ? 8 : ng->count; ng->nodes = calloc(ng->capacity, sizeof(*ng->nodes)); for(size_t i = 0; i < ng->count; i++) { uint64_t type; reader(ud, &type, sizeof(type)); CHiPubNode *n = NULL; if(type == CUTIHI_T('CPre','view')) { n = CHi_Preview(); } else if(type == CUTIHI_T('CMix','er ')) { n = CHi_Mixer(); } else if(type == CUTIHI_T('CTex','t ')) { n = CHi_Text(); } else if(type == CUTIHI_T('CTim','e ')) { n = CHi_Time(); } else if(type == CUTIHI_T('CMod','ulat')) { n = CHi_Modulate(); } else if(type == CUTIHI_T('CCns','tCol')) { n = CHi_ConstantSample(); } else if(type == CUTIHI_T('CEmb','ed ')) { n = CHi_Embed(); } else if(type == CUTIHI_T('CIma','ge ')) { n = CHi_Image(); } else if(type == CUTIHI_T('CWin','dow ')) { n = CHi_Window(); } else if(type == CUTIHI_T('CInA','udio')) { n = CHi_Microphone(); } else if(type == CUTIHI_T('CExp','Wave')) { n = CHi_ExportWav(); } else if(type == CUTIHI_T('CMov','ie ')) { n = CHi_Movie(); } else if(type == CUTIHI_T('CEnc','GVP8')) { n = CHi_EncodeVP8(); } else if(type == CUTIHI_T('CEnc','GVP9')) { n = CHi_EncodeVP9(); } else if(type == CUTIHI_T('CExp','Webm')) { n = CHi_MuxWebm(); } else if(type == CUTIHI_T('CKey','hook')) { n = CHi_Keyhook(); } else if(type == CUTIHI_T('CKey','hook')) { n = CHi_Keyhook(); } else if(type == CUTIHI_T('CEnc','Opus')) { n = CHi_EncodeOpus(); } else if(type == CUTIHI_T('CWeb','Cam ')) { n = CHi_Camera(); } else if(type == CUTIHI_T('CCmp','nScl')) { n = CHi_ComponentScale(); } else if(type == CUTIHI_T('CEnc','H264')) { n = CHi_EncodeH264(); } else if(type == CUTIHI_T('CStr','RTMP')) { n = CHi_StreamRTMP(); } else if(type == CUTIHI_T('CEnc','AACL')) { n = CHi_EncodeAAC(); } n->ng = ng; if(!n) { CHi_NodeGraphReset(ng); puts("Error: Unknown node type!"); return 1; } ng->nodes[i] = n; } for(size_t i = 0; i < ng->count; i++) { CHiPubNode *n = ng->nodes[i]; { uint16_t u16; reader(ud, &u16, sizeof(u16)); n->sinkCount = u16; } n->sinks = calloc(n->sinkCount, sizeof(*n->sinks)); for(size_t s = 0; s < n->sinkCount; s++) { { uint16_t u16; reader(ud, &u16, sizeof(u16)); n->sinks[s].type = u16; } load_chival(ng, reader, n->sinks[s].type, &n->sinks[s].data, ud); if(n->sinks[s].type == CUTIHI_VAL_LINKED) { adjacency_add(n->sinks[s].data.linked.to, n); } } } update_keyed_values(ng); return 0; } CUTIVIS bool CHi_Node_Active(CHiPubNode *pubn) { float now = CHi_Time_Get(pubn->ng); return pubn->lifespan.start <= now && (pubn->lifespan.end == 0 || now < pubn->lifespan.end); }