Support Twitch streaming, chroma key, errors, fixed modulation, node lifespan, fix bugs, many optimizations

This commit is contained in:
mid
2025-03-09 10:25:39 +02:00
parent 8f053bbdb1
commit 6fc29ba5f8
17 changed files with 1307 additions and 678 deletions

View File

@@ -6,6 +6,7 @@ extern "C" {
#endif
#define CUTIHI_BS_FLAG_KEY 1
#define CUTIHI_BS_SETUP_PACKET 2
typedef struct {
uint64_t timestamp;
@@ -23,4 +24,4 @@ typedef struct {
}
#endif
#endif
#endif

View File

@@ -15,6 +15,7 @@ typedef struct CHiImage {
uint16_t height;
union {
uint16_t *data16;
uint8_t *data8;
};
uint8_t owned;
} CHiImage;

View File

@@ -83,6 +83,10 @@ static inline __m128 log2f4(__m128 x)
return _mm_add_ps(p, e);
}
__attribute__((optimize("O3"))) static inline __m128 apply_gamma_ps(__m128 z, __m128 gamma) {
return exp2f4(_mm_mul_ps(log2f4(z), gamma));
}
__attribute__((optimize("O3"))) static inline __m128i apply_gamma_epi32(__m128i z, __m128 gamma) {
__m128 zf = _mm_cvtepi32_ps(z);
zf = _mm_mul_ps(zf, _mm_set1_ps(1.f / 65535));

View File

@@ -40,8 +40,8 @@ static int pacallback(const void *input_, void *output, unsigned long samples, c
/*static size_t total = 0;
for(size_t i = 0; i < pabufsize; i++) {
paBuffer[paBufferWriteIdx] = sin(total++ * 440.0 / 24000 * 3.141592653) * 0.1;
paBufferWriteIdx = (paBufferWriteIdx + 1) % pabufsize;
node->paBuffer[node->paBufferWriteIdx] = sin(total++ * 440.0 / 24000 * 3.141592653) * 0.3;
node->paBufferWriteIdx = (node->paBufferWriteIdx + 1) % pabufsize;
}*/
return paContinue;
@@ -111,8 +111,6 @@ static int microphone_perform(CHiPubNode *pubn) {
pubn->sources[0].type = CUTIHI_VAL_SAMPLE;
pubn->sources[0].data.sample = ret;
pubn->clean = 0;
MTR_END("CHi", "microphone_perform");
return 1;
@@ -130,7 +128,6 @@ CUTIVIS CHiPubNode *CHi_Microphone() {
n->Start = microphone_start;
n->Perform = microphone_perform;
n->Stop = microphone_stop;
n->clean = 0;
n->sinkCount = 1;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount);
n->sourceCount = 1;
@@ -180,7 +177,6 @@ static int exportwav_perform(CHiPubNode *pubn) {
fwrite(buf->data16, 2, buf->width, n->output);
pubn->clean = 0;
return 1;
}
CUTIVIS int CHi_ExportWav_Stop(CHiPubNode *pubn) {
@@ -204,7 +200,6 @@ CUTIVIS CHiPubNode *CHi_ExportWav() {
n->pubn.Start = CHi_ExportWav_Start;
n->pubn.Perform = exportwav_perform;
n->pubn.Stop = CHi_ExportWav_Stop;
n->pubn.clean = 0;
n->pubn.sinkCount = 2;
n->pubn.sinks = calloc(sizeof(*n->pubn.sinks), n->pubn.sinkCount);
n->pubn.sourceCount = 0;

684
hi/node.c
View File

@@ -142,10 +142,17 @@ CUTIVIS void CHi_RegisterNode(CHiNodeGraph* ng, CHiPubNode* n) {
ng->nodes[ng->count++] = n;
n->ng = ng;
if(ng->compilationStatus == CUTIHI_COMP_RUNNING) {
n->Start(n);
}
}
CUTIVIS void CHi_MakeDirty(CHiNodeGraph *ng, CHiPubNode *n) {
for(size_t i = 0; i < ng->count; i++) {
for(int adj = 0; adj < ng->adjacencyCount; adj++) {
if(ng->adjacencies[adj][0] == n) {
//n->clean = 0;
}
}
}
@@ -192,6 +199,11 @@ static int topological_sort(CHiNodeGraph *ng) {
return 1;
}
CUTIVIS int CHi_ConfigureSink(CHiPubNode *n, size_t i, CHiValue v) {
if(n->sinkCount <= i) {
n->sinks = realloc(n->sinks, sizeof(*n->sinks) * (i + 1));
memset(&n->sinks[i], 0, sizeof(*n->sinks));
}
if(n->sinks[i].type == CUTIHI_VAL_KEYED) {
n->sinks[i].data.keyed->current = v;
return 1;
@@ -205,6 +217,7 @@ CUTIVIS int CHi_ConfigureSink(CHiPubNode *n, size_t i, CHiValue v) {
adjacency_remove(old.data.linked.to, n);
}
// Check if viable
n->sinks[i] = v;
if(n->ng && !topological_sort(n->ng)) {
n->sinks[i] = old;
@@ -220,6 +233,8 @@ CUTIVIS int CHi_ConfigureSink(CHiPubNode *n, size_t i, CHiValue v) {
adjacency_add(v.data.linked.to, n);
}
CHi_MakeDirty(n->ng, n);
return 1;
}
@@ -299,9 +314,7 @@ CUTIVIS void CHi_DeleteKeyframe(CHiNodeGraph *ng, CHiKeyframes *kfs, size_t idx)
kfs->count--;
}
CUTIVIS size_t CHi_GetClosestKeyframe(CHiNodeGraph *ng, size_t kfsIdx, float t) {
CHiKeyframes *kfs = ng->keyframesList.keyframes[kfsIdx];
CUTIVIS size_t CHi_GetClosestKeyframe(CHiNodeGraph *ng, CHiKeyframes *kfs, float t) {
if(kfs->count == 1) {
return 0;
}
@@ -347,11 +360,55 @@ CUTIVIS int CHi_Hysteresis(CHiPubNode *root) {
}
}
root->Perform(root);
//if(!root->clean) {
root->Perform(root);
//}
return 1;
}
static bool error_changes(CHiPubNode *n) {
for(int e = 0; e < CUTIHI_MAX_ERRORS; e++) {
if(n->errors.active[e] != n->errors.activeLast[e]) {
return true;
}
}
return false;
}
static void save_errors(CHiPubNode *n) {
for(int e = 0; e < CUTIHI_MAX_ERRORS; e++) {
n->errors.activeLast[e] = n->errors.active[e];
n->errors.active[e] = false;
}
}
static void perform_step(CHiNodeGraph *ng) {
pthread_mutex_lock(&ng->mut);
for(size_t nIdx = 0; nIdx < ng->count; nIdx++) {
save_errors(ng->nodes[nIdx]);
}
for(size_t nIdx = 0; nIdx < ng->count; nIdx++) {
CHiPubNode *n = ng->nodes[nIdx];
n->Perform(n);
if(error_changes(n)) {
if(ng->eventOnError)
ng->eventOnError(ng, n);
}
}
if(ng->eventOnFrameComplete) {
ng->eventOnFrameComplete(ng);
}
pthread_mutex_unlock(&ng->mut);
}
bool timespec_less(const struct timespec l, const struct timespec r) {
if(l.tv_sec == r.tv_sec) {
return l.tv_nsec < r.tv_nsec;
@@ -401,11 +458,35 @@ void *compile_thread(void *ctx_) {
ctx->ng->time = ctx->ng->timedelta = 0;
puts("START");
for(size_t nIdx = 0; nIdx < ctx->ng->count; nIdx++) {
if(ctx->ng->nodes[nIdx]->Start) {
ctx->ng->nodes[nIdx]->Start(ctx->ng->nodes[nIdx]);
} else {
ctx->ng->nodes[nIdx]->Perform(ctx->ng->nodes[nIdx]);
{
ssize_t nIdx;
for(nIdx = 0; nIdx < ctx->ng->count; nIdx++) {
bool success;
if(ctx->ng->nodes[nIdx]->Start) {
success = ctx->ng->nodes[nIdx]->Start(ctx->ng->nodes[nIdx]);
} else {
success = ctx->ng->nodes[nIdx]->Perform(ctx->ng->nodes[nIdx]);
}
if(!success) {
break;
}
}
if(nIdx != ctx->ng->count) {
// Starting failed; stop all previous nodes
ctx->ng->eventOnError(ctx->ng, ctx->ng->nodes[nIdx]);
nIdx--;
for(; nIdx >= 0; nIdx--) {
if(ctx->ng->nodes[nIdx]->Stop) {
ctx->ng->nodes[nIdx]->Stop(ctx->ng->nodes[nIdx]);
}
}
goto stop;
}
}
@@ -427,13 +508,7 @@ void *compile_thread(void *ctx_) {
CHi_Time_Set(ctx->ng, timespecToFloat(timespec_sub(now, start)));
for(size_t nIdx = 0; nIdx < ctx->ng->count; nIdx++) {
ctx->ng->nodes[nIdx]->Perform(ctx->ng->nodes[nIdx]);
}
if(ctx->ng->eventOnFrameComplete) {
ctx->ng->eventOnFrameComplete(ctx->ng);
}
perform_step(ctx->ng);
do {
clock_gettime(CLOCK_MONOTONIC, &now);
@@ -444,9 +519,7 @@ void *compile_thread(void *ctx_) {
for(uint64_t frm = 0; ctx->ng->compilationStatus != CUTIHI_COMP_KILL_YOURSELF && (ctx->ng->duration == -1 || frm < ctx->ng->duration * 30);) {
CHi_Time_Set(ctx->ng, frm / 30.f);
for(size_t nIdx = 0; nIdx < ctx->ng->count; nIdx++) {
ctx->ng->nodes[nIdx]->Perform(ctx->ng->nodes[nIdx]);
}
perform_step(ctx->ng);
struct timespec last;
clock_gettime(CLOCK_MONOTONIC, &last);
@@ -454,10 +527,6 @@ void *compile_thread(void *ctx_) {
clock_gettime(CLOCK_MONOTONIC, &now);
diff += timespec_sub(now, last).tv_nsec;
if(ctx->ng->eventOnFrameComplete) {
ctx->ng->eventOnFrameComplete(ctx->ng);
}
frm++;
}
}
@@ -473,6 +542,7 @@ void *compile_thread(void *ctx_) {
ctx->ng->eventOnStopComplete(ctx->ng);
}
stop:
ctx->ng->compilationStatus = CUTIHI_COMP_READY;
free(ctx);
@@ -496,57 +566,87 @@ CUTIVIS void CHi_StopCompilation(CHiNodeGraph *ng) {
}
}
typedef struct {
CHiPubNode pubn;
char *cachePath;
CHiImage *cacheImg;
} ImageNode;
static int image_perform(CHiPubNode *node) {
if(node->clean) return 1;
ImageNode *internal = (ImageNode*) node;
node->sources->type = CUTIHI_VAL_SAMPLE;
if(node->sources->data.sample) CHi_Image_Free(node->sources->data.sample);
struct sail_image *simg;
SAIL_TRY(sail_load_from_file(node->sinks[0].data.text, &simg));
struct sail_image *cimg;
sail_convert_image(simg, SAIL_PIXEL_FORMAT_BPP64_BGRA, &cimg);
sail_destroy_image(simg);
CHiImage *img = CHi_Image_New(2, 4, (cimg->bytes_per_line + 15) & ~15, cimg->width, cimg->height, NULL);
CHi_Restride(cimg->pixels, img->data16, cimg->bytes_per_line, img->stride, img->height);
node->sources->data.sample = img;
for(size_t y = 0; y < img->height; y++) {
for(size_t x = 0; x < img->stride; x += 16) {
__m128i pixels = _mm_load_si128((__m128i*) ((uintptr_t) img->data16 + y * img->stride + x));
pixels = apply_gamma_epi16(pixels, _mm_set_ps(1.0f, 2.2f, 2.2f, 2.2f));
_mm_stream_si128((__m128i*) ((uintptr_t) img->data16 + y * img->stride + x), pixels);
const char *fn = node->sinks[CUTIHI_IMAGE_IN_FILE].data.text;
if(fn && (!internal->cachePath || strcmp(internal->cachePath, fn))) {
if(node->sinks[CUTIHI_IMAGE_IN_FILE].type == CUTIHI_VAL_NONE) {
return 1;
}
if(node->sinks[CUTIHI_IMAGE_IN_FILE].type != CUTIHI_VAL_TEXT) {
node->errors.active[0] = true;
strncpy(node->errors.code[0], "invalid type", CUTIHI_ERR_SIZE);
node->errors.sink[0] = CUTIHI_IMAGE_IN_FILE;
return 1;
}
if(internal->cacheImg) {
CHi_Image_Free(internal->cacheImg);
internal->cacheImg = NULL;
}
}
sail_destroy_image(cimg);
if(!internal->cacheImg) {
struct sail_image *simg;
if(sail_load_from_file(fn, &simg) != SAIL_OK) {
node->errors.active[0] = true;
strncpy(node->errors.code[0], "invalid file", CUTIHI_ERR_SIZE);
node->errors.sink[0] = CUTIHI_IMAGE_IN_FILE;
return 1;
}
struct sail_image *cimg;
sail_convert_image(simg, SAIL_PIXEL_FORMAT_BPP64_BGRA, &cimg);
sail_destroy_image(simg);
simg = NULL;
CHiImage *img = CHi_Image_New(2, 4, (cimg->bytes_per_line + 15) & ~15, cimg->width, cimg->height, NULL);
CHi_Restride(cimg->pixels, img->data16, cimg->bytes_per_line, img->stride, img->height);
internal->cacheImg = img;
for(size_t y = 0; y < img->height; y++) {
for(size_t x = 0; x < img->stride; x += 16) {
__m128i pixels = _mm_load_si128((__m128i*) ((uintptr_t) img->data16 + y * img->stride + x));
pixels = apply_gamma_epi16(pixels, _mm_set_ps(1.0f, 2.2f, 2.2f, 2.2f));
_mm_stream_si128((__m128i*) ((uintptr_t) img->data16 + y * img->stride + x), pixels);
}
}
sail_destroy_image(cimg);
}
if(CHi_Node_Active(node)) {
node->sources->data.sample = internal->cacheImg;
} else {
node->sources->data.sample = NULL;
}
node->clean = 0;
return 1;
err:
node->sources->data.sample = NULL;
return 0;
}
CUTIVIS CHiPubNode *CHi_Image() {
CHiPubNode *n = calloc(1, sizeof(*n));
CHiPubNode *n = calloc(1, sizeof(ImageNode));
n->type = CUTIHI_T('CIma','ge ');
n->Start = n->Stop = NULL;
n->Perform = image_perform;
n->clean = 0;
n->sinkCount = 1;
n->sinks = calloc(sizeof(*n->sinks), 1);
n->sourceCount = 1;
n->sources = calloc(sizeof(*n->sources), 1);
((ImageNode*) n)->cachePath = strdup("");
return n;
}
static int embed_perform(CHiPubNode *node) {
if(node->clean) return 1;
MTR_BEGIN("CHi", "embed_perform");
node->sources[0].type = CUTIHI_VAL_SAMPLE;
@@ -557,7 +657,7 @@ static int embed_perform(CHiPubNode *node) {
CHiImage *dest = node->sources->data.sample = CHi_Image_New(2, 4, main->stride, main->width, main->height, NULL);
memcpy(dest->data16, main->data16, main->stride * main->height);
for(int sid = 0; sid < CUTIHI_EMBED_MAX_SMALLS; sid++) {
for(int sid = 0; sid < (node->sinkCount - 1) / 3; sid++) {
CHiImage *sub = CHi_Crawl(&node->sinks[1 + sid * 3])->data.sample;
if(!sub) continue;
@@ -590,7 +690,6 @@ static int embed_perform(CHiPubNode *node) {
MTR_END("CHi", "embed_perform");
node->clean = 0;
return 1;
}
CUTIVIS CHiPubNode *CHi_Embed() {
@@ -598,37 +697,32 @@ CUTIVIS CHiPubNode *CHi_Embed() {
n->type = CUTIHI_T('CEmb','ed ');
n->Start = n->Stop = NULL;
n->Perform = embed_perform;
n->clean = 0;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 1 + 3 * CUTIHI_EMBED_MAX_SMALLS);
for(int i = 0; i < CUTIHI_EMBED_MAX_SMALLS; i++) {
n->sinks[2 + i * 3].type = CUTIHI_VAL_VEC4;
n->sinks[2 + i * 3].data.vec4[0] = 0;
n->sinks[2 + i * 3].data.vec4[1] = 0;
n->sinks[3 + i * 3].type = CUTIHI_VAL_VEC4;
n->sinks[3 + i * 3].data.vec4[0] = 1;
}
n->sources = calloc(sizeof(*n->sources), n->sourceCount = 1);
return n;
}
static int constantsample_perform(CHiPubNode *node) {
if(node->clean) return 1;
node->sources[0].type = CUTIHI_VAL_SAMPLE;
if(node->sources->data.sample) CHi_Image_Free(node->sources->data.sample);
CHiValue *sink = CHi_Crawl(&node->sinks[0]);
CHiValue *sz = CHi_Crawl(&node->sinks[1]);
CHiImage *img = CHi_Image_New(2, 4, 8 * 16, 16, 16, NULL);
for(int i = 0; i < 256; i++) {
img->data16[i * 4 + 0] = sink->data.vec4[2] * 65535;
img->data16[i * 4 + 1] = sink->data.vec4[1] * 65535;
img->data16[i * 4 + 2] = sink->data.vec4[0] * 65535;
img->data16[i * 4 + 3] = 65535;
size_t w = sz->data.vec4[0] < 1 ? 1 : sz->data.vec4[0];
size_t h = sz->data.vec4[1] < 1 ? 1 : sz->data.vec4[1];
CHiImage *img = CHi_Image_New(2, 4, 8 * w, w, h, NULL);
if(CHi_Node_Active(node)) {
for(size_t i = 0; i < w * h; i++) {
img->data16[i * 4 + 0] = sink->data.vec4[2] * 65535;
img->data16[i * 4 + 1] = sink->data.vec4[1] * 65535;
img->data16[i * 4 + 2] = sink->data.vec4[0] * 65535;
img->data16[i * 4 + 3] = 65535;
}
}
node->sources->data.sample = img;
node->clean = 0;
return 1;
}
CUTIVIS CHiPubNode *CHi_ConstantSample() {
@@ -636,27 +730,172 @@ CUTIVIS CHiPubNode *CHi_ConstantSample() {
n->type = CUTIHI_T('CCns','tCol');
n->Start = n->Stop = NULL;
n->Perform = constantsample_perform;
n->clean = 0;
n->sinkCount = 1;
n->sinks = calloc(sizeof(*n->sinks), 1);
n->sinkCount = 2;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount);
n->sourceCount = 1;
n->sources = calloc(sizeof(*n->sources), 1);
n->sources = calloc(sizeof(*n->sources), n->sourceCount);
n->sinks[0].type = CUTIHI_VAL_VEC4;
n->sinks[0].data.vec4[0] = 1280;
n->sinks[0].data.vec4[1] = 720;
return n;
}
static __m128i _mm_mullo_epi32(__m128i a, __m128i b) {
// Plagiarized from a plagiarization of Agner Fog's code
__m128i a13 = _mm_shuffle_epi32(a, 0xF5); // (-,a3,-,a1)
__m128i b13 = _mm_shuffle_epi32(b, 0xF5); // (-,b3,-,b1)
__m128i prod02 = _mm_mul_epu32(a, b); // (-,a2*b2,-,a0*b0)
__m128i prod13 = _mm_mul_epu32(a13, b13); // (-,a3*b3,-,a1*b1)
__m128i prod01 = _mm_unpacklo_epi32(prod02,prod13); // (-,-,a1*b1,a0*b0)
__m128i prod23 = _mm_unpackhi_epi32(prod02,prod13); // (-,-,a3*b3,a2*b2)
__m128i prod = _mm_unpacklo_epi64(prod01,prod23); // (ab3,ab2,ab1,ab0)
return prod;
}
static int modulate_perform(CHiPubNode *node) {
if(node->clean) return 1;
MTR_BEGIN("CHi", "modulate_perform");
CHiValue *imgsrc = CHi_Crawl(&node->sinks[0]);
if(!imgsrc || imgsrc->type == CUTIHI_VAL_NONE) {
return 1;
}
if(imgsrc->type != CUTIHI_VAL_SAMPLE) {
node->errors.active[0] = true;
strncpy(node->errors.code[0], "invalid type", CUTIHI_ERR_SIZE);
node->errors.sink[0] = 0;
return 1;
}
node->sources[0].type = CUTIHI_VAL_SAMPLE;
if(node->sources->data.sample) CHi_Image_Free(node->sources->data.sample);
node->sources->data.sample = CHi_Image_New(2, 4, 8 * 16, 16, 16, NULL);
if(!CHi_Node_Active(node)) {
node->sources->data.sample = NULL;
return;
}
CHiImage *src = imgsrc->data.sample;
assert(src->stride % 16 == 0);
CHiImage *dst = CHi_Image_New(2, 4, src->stride, src->width, src->height, NULL);
node->sources->data.sample = dst;
float V = CHi_Crawl(&node->sinks[1])->data.vec4[0];
float S = CHi_Crawl(&node->sinks[2])->data.vec4[0];
float H = CHi_Crawl(&node->sinks[3])->data.vec4[0] * 3.1415926535897 / 180;
float sH = sinf(H);
float cH = cosf(H);
__m128i row1 = _mm_set_epi32(
0,
32768 * (+0.180472 * S * sH + 0.7874000 * S * cH + 0.2126 * V),
32768 * (-0.715274 * S * cH + 0.6069280 * S * sH + 0.7152 * V),
32768 * (-0.787400 * S * sH - 0.0721258 * S * cH + 0.0722 * V)
);
__m128i row2 = _mm_set_epi32(
0,
32768 * (-0.212585 * S * cH - 0.1472940 * S * sH + 0.2126 * V),
32768 * (-0.095334 * S * sH + 0.2847960 * S * cH + 0.7152 * V),
32768 * (-0.072211 * S * cH + 0.2426280 * S * sH + 0.0722 * V)
);
__m128i row3 = _mm_set_epi32(
0,
32768 * (-0.212652 * S * cH + 0.9278000 * S * sH + 0.2126 * V),
32768 * (-0.842814 * S * sH - 0.7151480 * S * cH + 0.7152 * V),
32768 * (-0.084987 * S * sH + 0.9278000 * S * cH + 0.0722 * V)
);
for(size_t b = 0; b < dst->stride * dst->height; b += 16) {
__m128i rgba2U16 = _mm_load_si128((__m128i*) ((uintptr_t) src->data16 + b));
__m128i rgba2S16 = _mm_srli_epi16(rgba2U16, 1);
__m128i rgbaS16Lo = _mm_unpacklo_epi16(rgba2S16, _mm_setzero_si128());
__m128i rgbaS16Hi = _mm_unpackhi_epi16(rgba2S16, _mm_setzero_si128());
rgba2S16 = _mm_setzero_si128();
do {
__m128i newR = _mm_mullo_epi32(rgbaS16Lo, row1);
__m128i newG = _mm_mullo_epi32(rgbaS16Lo, row2);
__m128i newB = _mm_mullo_epi32(rgbaS16Lo, row3);
__m128i newA = _mm_mullo_epi32(rgbaS16Lo, _mm_set_epi32(1, 0, 0, 0));
newR = _mm_srai_epi32(newR, 16);
newG = _mm_srai_epi32(newG, 16);
newB = _mm_srai_epi32(newB, 16);
newR = _mm_hadd_epi32(newR, _mm_setzero_si128());
newG = _mm_hadd_epi32(newG, _mm_setzero_si128());
newB = _mm_hadd_epi32(newB, _mm_setzero_si128());
newA = _mm_hadd_epi32(newA, _mm_setzero_si128());
newR = _mm_hadd_epi32(newR, _mm_setzero_si128());
newG = _mm_hadd_epi32(newG, _mm_setzero_si128());
newB = _mm_hadd_epi32(newB, _mm_setzero_si128());
newA = _mm_hadd_epi32(newA, _mm_setzero_si128());
newR = _mm_max_epi16(_mm_min_epi16(newR, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newG = _mm_max_epi16(_mm_min_epi16(newG, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newB = _mm_max_epi16(_mm_min_epi16(newB, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newA = _mm_max_epi16(_mm_min_epi16(newA, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newR = _mm_shuffle_epi8(newR, _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, 0, -128, -128, -128, -128));
newG = _mm_shuffle_epi8(newG, _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, 0, -128, -128));
newB = _mm_shuffle_epi8(newB, _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, 0));
newA = _mm_shuffle_epi8(newA, _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, 1, 0, -128, -128, -128, -128, -128, -128));
rgba2S16 = _mm_or_si128(rgba2S16, _mm_or_si128(_mm_or_si128(_mm_or_si128(newR, newG), newB), newA));
} while(0);
do {
__m128i newR = _mm_mullo_epi32(rgbaS16Hi, row1);
__m128i newG = _mm_mullo_epi32(rgbaS16Hi, row2);
__m128i newB = _mm_mullo_epi32(rgbaS16Hi, row3);
__m128i newA = _mm_mullo_epi32(rgbaS16Hi, _mm_set_epi32(1, 0, 0, 0));
newR = _mm_srai_epi32(newR, 16);
newG = _mm_srai_epi32(newG, 16);
newB = _mm_srai_epi32(newB, 16);
newR = _mm_hadd_epi32(newR, _mm_setzero_si128());
newG = _mm_hadd_epi32(newG, _mm_setzero_si128());
newB = _mm_hadd_epi32(newB, _mm_setzero_si128());
newA = _mm_hadd_epi32(newA, _mm_setzero_si128());
newR = _mm_hadd_epi32(newR, _mm_setzero_si128());
newG = _mm_hadd_epi32(newG, _mm_setzero_si128());
newB = _mm_hadd_epi32(newB, _mm_setzero_si128());
newA = _mm_hadd_epi32(newA, _mm_setzero_si128());
newR = _mm_max_epi16(_mm_min_epi16(newR, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newG = _mm_max_epi16(_mm_min_epi16(newG, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newB = _mm_max_epi16(_mm_min_epi16(newB, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newA = _mm_max_epi16(_mm_min_epi16(newA, _mm_set1_epi32(16383)), _mm_set1_epi32(0));
newR = _mm_shuffle_epi8(newR, _mm_set_epi8(-128, -128, 1, 0, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128));
newG = _mm_shuffle_epi8(newG, _mm_set_epi8(-128, -128, -128, -128, 1, 0, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128));
newB = _mm_shuffle_epi8(newB, _mm_set_epi8(-128, -128, -128, -128, -128, -128, 1, 0, -128, -128, -128, -128, -128, -128, -128, -128));
newA = _mm_shuffle_epi8(newA, _mm_set_epi8(1, 0, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128));
rgba2S16 = _mm_or_si128(rgba2S16, _mm_or_si128(_mm_or_si128(_mm_or_si128(newR, newG), newB), newA));
} while(0);
rgba2U16 = _mm_slli_epi16(rgba2S16, 2);
_mm_store_si128((__m128i*) ((uintptr_t) dst->data16 + b), rgba2U16);
}
MTR_END("CHi", "modulate_perform");
node->clean = 0;
return 1;
}
CUTIVIS CHiPubNode *CHi_Modulate() {
@@ -664,11 +903,17 @@ CUTIVIS CHiPubNode *CHi_Modulate() {
n->type = CUTIHI_T('CMod','ulat');
n->Start = n->Stop = NULL;
n->Perform = modulate_perform;
n->clean = 0;
n->sinkCount = 4;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount);
n->sourceCount = 1;
n->sources = calloc(sizeof(*n->sources), n->sourceCount);
n->sinks[0].type = CUTIHI_VAL_VEC4;
n->sinks[0].data.vec4[0] = 1;
n->sinks[1].type = CUTIHI_VAL_VEC4;
n->sinks[1].data.vec4[0] = 1;
return n;
}
@@ -718,7 +963,6 @@ static void update_keyed_values(CHiNodeGraph *ng) {
static int time_perform(CHiPubNode *node) {
node->sources->type = CUTIHI_VAL_VEC4;
node->sources->data.vec4[0] = node->ng->time;
node->clean = 0;
return 1;
}
@@ -739,7 +983,6 @@ CUTIVIS CHiPubNode *CHi_Time() {
n->type = CUTIHI_T('CTim','e ');
n->Start = n->Stop = NULL;
n->Perform = time_perform;
n->clean = 0;
n->sinkCount = 0;
n->sinks = NULL;
n->sourceCount = 1;
@@ -747,34 +990,63 @@ CUTIVIS CHiPubNode *CHi_Time() {
return n;
}
static PangoFontMap *pfontmap;
static PangoContext *pcontext;
static PangoFontDescription * pfontdesc;
static PangoLayout *playout;
struct TextNode {
CHiPubNode pubn;
PangoFontMap *pfontmap;
PangoContext *pcontext;
PangoFontDescription * pfontdesc;
PangoLayout *playout;
char *cacheText;
char *cacheFontName;
};
static int text_perform(CHiPubNode *n) {
if(n->clean) return 1;
MTR_BEGIN("CHi", "text_perform");
if(!pfontmap) {
pfontmap = pango_ft2_font_map_new();
pango_ft2_font_map_set_resolution(PANGO_FT2_FONT_MAP(pfontmap), 72, 72);
pcontext = pango_font_map_create_context(pfontmap);
pango_context_set_language(pcontext, pango_language_from_string("en_US"));
pango_context_set_base_dir(pcontext, PANGO_DIRECTION_LTR);
pfontdesc = pango_font_description_from_string("Open Sans 48");
playout = pango_layout_new(pcontext);
pango_layout_set_font_description(playout, pfontdesc);
struct TextNode *this = (struct TextNode*) n;
CHiValue *valFontName = CHi_Crawl(&n->sinks[3]);
CHiValue *valDPI = CHi_Crawl(&n->sinks[2]);
CHiValue *valCol = CHi_Crawl(&n->sinks[1]);
CHiValue *valText = CHi_Crawl(&n->sinks[0]);
if(!this->cacheFontName || strcmp(this->cacheFontName, valFontName->data.text)) {
if(this->cacheFontName) free(this->cacheFontName);
this->cacheFontName = strdup(valFontName->data.text);
this->pfontmap = pango_ft2_font_map_new();
pango_ft2_font_map_set_resolution(PANGO_FT2_FONT_MAP(this->pfontmap), 72, 72);
this->pcontext = pango_font_map_create_context(this->pfontmap);
pango_context_set_language(this->pcontext, pango_language_from_string("en_US"));
pango_context_set_base_dir(this->pcontext, PANGO_DIRECTION_LTR);
this->pfontdesc = pango_font_description_from_string(this->cacheFontName);
this->playout = pango_layout_new(this->pcontext);
pango_layout_set_font_description(this->playout, this->pfontdesc);
free(this->cacheText);
this->cacheText = NULL;
}
pango_layout_set_markup(playout, CHi_Crawl(&n->sinks[0])->data.text, -1);
pango_ft2_font_map_set_resolution(PANGO_FT2_FONT_MAP(pfontmap), CHi_Crawl(&n->sinks[2])->data.vec4[0], CHi_Crawl(&n->sinks[2])->data.vec4[0]);
if(!this->cacheText || strcmp(this->cacheText, valText->data.text)) {
if(this->cacheText) free(this->cacheText);
this->cacheText = strdup(valText->data.text);
pango_layout_set_markup(this->playout, valText->data.text, -1);
}
pango_ft2_font_map_set_resolution(PANGO_FT2_FONT_MAP(this->pfontmap), valDPI->data.vec4[0], valDPI->data.vec4[0]);
PangoRectangle extents;
pango_layout_get_extents(playout, NULL, &extents);
pango_layout_get_extents(this->playout, NULL, &extents);
n->sources[0].type = CUTIHI_VAL_SAMPLE;
if(n->sources->data.sample) CHi_Image_Free(n->sources->data.sample);
if(n->sources->data.sample) {
CHi_Image_Free(n->sources->data.sample);
n->sources->data.sample = NULL;
}
if(!CHi_Node_Active(n)) {
return 1;
}
size_t width = (PANGO_PIXELS(extents.width) + 15) & ~15;
CHiImage *chiret = CHi_Image_New(2, 4, 8 * width, width, PANGO_PIXELS(extents.height), NULL);
@@ -788,18 +1060,18 @@ static int text_perform(CHiPubNode *n) {
bmp.pitch = chiret->width;
bmp.pixel_mode = FT_PIXEL_MODE_GRAY;
bmp.num_grays = 256;
pango_ft2_render_layout(&bmp, playout, PANGO_PIXELS(extents.x) + (PANGO_PIXELS(extents.width) + 15) % 16 / 4, PANGO_PIXELS(extents.y));
pango_ft2_render_layout(&bmp, this->playout, PANGO_PIXELS(extents.x) + (PANGO_PIXELS(extents.width) + 15) % 16 / 4, PANGO_PIXELS(extents.y));
__m128i ones = _mm_set1_epi64x(
(((size_t) (n->sinks[1].data.vec4[2] * 255) % 256) << 0) |
(((size_t) (n->sinks[1].data.vec4[1] * 255) % 256) << 16) |
(((size_t) (n->sinks[1].data.vec4[0] * 255) % 256) << 32) |
(((size_t) (valCol->data.vec4[2] * 255) % 256) << 0) |
(((size_t) (valCol->data.vec4[1] * 255) % 256) << 16) |
(((size_t) (valCol->data.vec4[0] * 255) % 256) << 32) |
0x0100000000000000
);
for(size_t p = 0; p < bmp.width * bmp.rows; p += 2) {
__m128i alphad0 =
_mm_mullo_epi16(ones, _mm_set_epi16(bmp.buffer[p + 1], bmp.buffer[p + 1], bmp.buffer[p + 1], bmp.buffer[p + 1], bmp.buffer[p + 0], bmp.buffer[p + 0], bmp.buffer[p + 0], bmp.buffer[p + 0]));
_mm_mullo_epi16(ones, _mm_set_epi16(bmp.buffer[p + 1], 0xFF, 0xFF, 0xFF, bmp.buffer[p + 0], 0xFF, 0xFF, 0xFF));
_mm_stream_si128((__m128i*) &chiret->data16[p * 4], alphad0);
}
@@ -807,23 +1079,33 @@ static int text_perform(CHiPubNode *n) {
MTR_END("CHi", "text_perform");
n->clean = 0;
return 1;
}
CUTIVIS CHiPubNode *CHi_Text() {
CHiPubNode *n = calloc(1, sizeof(*n));
CHiPubNode *n = calloc(1, sizeof(struct TextNode));
n->type = CUTIHI_T('CTex','t ');
n->Start = n->Stop = NULL;
n->Perform = text_perform;
n->clean = 0;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 3);
n->sinks[2].type = CUTIHI_VAL_VEC4;
n->sinks[2].data.vec4[0] = 72;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 4);
n->sources = calloc(sizeof(*n->sources), n->sourceCount = 1);
n->sinks[0].type = CUTIHI_VAL_TEXT;
n->sinks[0].data.text = strdup("Title Text");
n->sinks[2].type = CUTIHI_VAL_VEC4;
n->sinks[2].data.vec4[0] = 256;
n->sinks[3].type = CUTIHI_VAL_TEXT;
n->sinks[3].data.text = strdup("Sans-Serif");
return n;
}
static int mixer_perform(CHiPubNode *n) {
if(n->sinkCount == 0) {
return 1;
}
n->sources[0].type = CUTIHI_VAL_SAMPLE;
MTR_BEGIN("CHi", "mixer_perform");
@@ -833,26 +1115,40 @@ static int mixer_perform(CHiPubNode *n) {
n->sources[0].data.sample = NULL;
}
CHiImage *src0 = CHi_Crawl(&n->sinks[0])->data.sample;
CHiImage *src1 = CHi_Crawl(&n->sinks[1])->data.sample;
size_t width = 0, height = 0, stride = 0;
if(!src0 && !src1) {
for(int s = 0; s < n->sinkCount; s++) {
CHiValue *val = CHi_Crawl(&n->sinks[s]);
if(val && val->type == CUTIHI_VAL_SAMPLE) {
if(width == 0 || height == 0) {
width = val->data.sample->width;
height = val->data.sample->height;
stride = val->data.sample->stride;
} else {
assert(val->data.sample->width == width && val->data.sample->height == height);
}
}
}
if(width == 0 || height == 0) {
return 1;
}
assert(src0->width == src1->width && src0->height == src1->height);
n->sources[0].data.sample = CHi_Image_New(2, 1, (stride + 15) & ~15, width, height, NULL);
n->sources[0].data.sample = CHi_Image_New(2, 1, (src0->stride + 15) & ~15, src0->width, src0->height, NULL);
for(size_t b = 0; b < src0->stride; b += 16) {
__m128i a0 = src0 ? _mm_load_si128((__m128i*) ((uintptr_t) src0->data16 + b)) : _mm_setzero_si128();
__m128i a1 = src1 ? _mm_load_si128((__m128i*) ((uintptr_t) src1->data16 + b)) : _mm_setzero_si128();
_mm_stream_si128((__m128i*) ((uintptr_t) n->sources[0].data.sample->data16 + b), _mm_adds_epi16(a0, a1));
for(size_t b = 0; b < stride; b += 16) {
__m128i sum = _mm_setzero_si128();
for(int s = 0; s < n->sinkCount; s++) {
CHiValue *val = CHi_Crawl(&n->sinks[s]);
if(val && val->type == CUTIHI_VAL_SAMPLE) {
sum = _mm_adds_epi16(sum, _mm_load_si128((__m128i*) ((uintptr_t) val->data.sample->data16 + b)));
}
}
_mm_stream_si128((__m128i*) ((uintptr_t) n->sources[0].data.sample->data16 + b), sum);
}
MTR_END("CHi", "mixer_perform");
n->clean = 0;
return 1;
}
CUTIVIS CHiPubNode *CHi_Mixer() {
@@ -860,7 +1156,6 @@ CUTIVIS CHiPubNode *CHi_Mixer() {
n->type = CUTIHI_T('CMix','er ');
n->Start = n->Stop = NULL;
n->Perform = mixer_perform;
n->clean = 0;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 2);
n->sources = calloc(sizeof(*n->sources), n->sourceCount = 1);
return n;
@@ -874,12 +1169,134 @@ CUTIVIS CHiPubNode *CHi_Preview() {
n->type = CUTIHI_T('CPre','view');
n->Start = n->Stop = NULL;
n->Perform = preview_perform;
n->clean = 0;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 1);
n->sinks[0].type = CUTIHI_VAL_SAMPLE;
n->sinks[0].data.sample = NULL;
n->sources = NULL;
n->sourceCount = 0;
n->sinks[0].type = CUTIHI_VAL_SAMPLE;
n->sinks[0].data.sample = NULL;
return n;
}
static int chromakey_perform(CHiPubNode *n) {
CHiValue *sampleV = CHi_Crawl(&n->sinks[0]);
CHiValue *colorV = CHi_Crawl(&n->sinks[1]);
if(!sampleV || sampleV->type != CUTIHI_VAL_SAMPLE || !sampleV->data.sample) {
return 1;
}
CHiImage *src = sampleV->data.sample;
if(n->sources[0].data.sample) {
CHi_Image_Free(n->sources[0].data.sample);
}
n->sources[0].type = CUTIHI_VAL_SAMPLE;
CHiImage *dst = n->sources[0].data.sample = CHi_Image_New(2, 4, (src->width * src->bpc * src->channels + 15) & ~15, src->width, src->height, NULL);
int16_t uKey = 32767 * (colorV->data.vec4[0] * -0.1146 + colorV->data.vec4[1] * -0.3854 + colorV->data.vec4[2] * +0.5000);
int16_t vKey = 32767 * (colorV->data.vec4[0] * +0.5000 + colorV->data.vec4[1] * -0.4542 + colorV->data.vec4[2] * -0.0458);
__m128i row2 = _mm_set_epi32(0, -3755, -12628, 16384);
__m128i row3 = _mm_set_epi32(0, 16384, -14883, -1501);
float threshold0 = 300;
float threshold1 = 3000;
for(size_t y = 0; y < src->height; y++) {
for(size_t off = 0; off < dst->stride; off += 16) {
__m128i rgba2U16 = _mm_load_si128((__m128i*) ((uintptr_t) src->data16 + y * src->stride + off));
__m128i rgba2S16 = _mm_srli_epi16(rgba2U16, 1);
__m128i rgbaS16Lo = _mm_unpacklo_epi16(rgba2S16, _mm_setzero_si128());
__m128i rgbaS16Hi = _mm_unpackhi_epi16(rgba2S16, _mm_setzero_si128());
__m128i alphas = _mm_setzero_si128();
{
__m128i uProd = _mm_mullo_epi32(row2, rgbaS16Lo);
__m128i vProd = _mm_mullo_epi32(row3, rgbaS16Lo);
uProd = _mm_srai_epi32(uProd, 15);
uProd = _mm_hadd_epi32(uProd, _mm_setzero_si128());
uProd = _mm_hadd_epi32(uProd, _mm_setzero_si128());
vProd = _mm_srai_epi32(vProd, 15);
vProd = _mm_hadd_epi32(vProd, _mm_setzero_si128());
vProd = _mm_hadd_epi32(vProd, _mm_setzero_si128());
__m128 diffU = _mm_cvtepi32_ps(_mm_sub_epi32(uProd, _mm_set1_epi32(uKey)));
__m128 diffV = _mm_cvtepi32_ps(_mm_sub_epi32(vProd, _mm_set1_epi32(vKey)));
__m128 distance = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(diffU, diffU), _mm_mul_ps(diffV, diffV)));
__m128 alpha = (__m128) _mm_bslli_si128((__m128i) distance, 12);
alpha = _mm_sub_ps(alpha, _mm_set1_ps(threshold0));
alpha = _mm_max_ps(alpha, _mm_set1_ps(0));
alpha = _mm_mul_ps(alpha, _mm_set1_ps(65535 / (threshold1 - threshold0)));
alpha = _mm_min_ps(alpha, _mm_set1_ps(65535));
__m128i z = _mm_hadd_epi16(_mm_add_epi32(_mm_and_si128(_mm_cvtps_epi32(alpha), _mm_set_epi32(0xFFFFFFFF, 0, 0, 0)), _mm_set_epi32(0, 65535, 65535, 65535)), _mm_setzero_si128());
alphas = _mm_or_si128(alphas, z);
}
{
__m128i uProd = _mm_mullo_epi32(row2, rgbaS16Hi);
__m128i vProd = _mm_mullo_epi32(row3, rgbaS16Hi);
uProd = _mm_srai_epi32(uProd, 15);
uProd = _mm_hadd_epi32(uProd, _mm_setzero_si128());
uProd = _mm_hadd_epi32(uProd, _mm_setzero_si128());
vProd = _mm_srai_epi32(vProd, 15);
vProd = _mm_hadd_epi32(vProd, _mm_setzero_si128());
vProd = _mm_hadd_epi32(vProd, _mm_setzero_si128());
__m128 diffU = _mm_cvtepi32_ps(_mm_sub_epi32(uProd, _mm_set1_epi32(uKey)));
__m128 diffV = _mm_cvtepi32_ps(_mm_sub_epi32(vProd, _mm_set1_epi32(vKey)));
__m128 distance = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(diffU, diffU), _mm_mul_ps(diffV, diffV)));
__m128 alpha = (__m128) _mm_bslli_si128((__m128i) distance, 12);
alpha = _mm_sub_ps(alpha, _mm_set1_ps(threshold0));
alpha = _mm_max_ps(alpha, _mm_set1_ps(0));
alpha = _mm_mul_ps(alpha, _mm_set1_ps(65535 / (threshold1 - threshold0)));
alpha = _mm_min_ps(alpha, _mm_set1_ps(65535));
__m128i z = _mm_hadd_epi16(_mm_add_epi32(_mm_and_si128(_mm_cvtps_epi32(alpha), _mm_set_epi32(0xFFFFFFFF, 0, 0, 0)), _mm_set_epi32(0, 65535, 65535, 65535)), _mm_setzero_si128());
alphas = _mm_or_si128(alphas, _mm_bslli_si128(z, 8));
}
rgba2U16 = _mm_mulhi_epu16(rgba2U16, alphas);
_mm_stream_si128((__m128i*) ((uintptr_t) dst->data16 + y * src->stride + off), rgba2U16);
}
}
return 1;
}
CUTIVIS CHiPubNode *CHi_ChromaKey() {
CHiPubNode *n = calloc(1, sizeof(*n));
n->type = CUTIHI_T('CChr','omaK');
n->Start = n->Stop = NULL;
n->Perform = chromakey_perform;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount = 2);
n->sources = calloc(sizeof(*n->sources), n->sourceCount = 1);
n->sinks[1].type = CUTIHI_VAL_VEC4; // Default green
n->sinks[1].data.vec4[0] = 0;
n->sinks[1].data.vec4[1] = 1;
n->sinks[1].data.vec4[2] = 0;
n->sinks[1].data.vec4[3] = 1;
return n;
}
@@ -1105,6 +1522,12 @@ CUTIVIS int CHi_NodeGraphLoad(CHiNodeGraph *ng, CHiLoadReader reader, void *ud)
n = CHi_Camera();
} else if(type == CUTIHI_T('CCmp','nScl')) {
n = CHi_ComponentScale();
} else if(type == CUTIHI_T('CEnc','H264')) {
n = CHi_EncodeH264();
} else if(type == CUTIHI_T('CStr','RTMP')) {
n = CHi_StreamRTMP();
} else if(type == CUTIHI_T('CEnc','AACL')) {
n = CHi_EncodeAAC();
}
n->ng = ng;
@@ -1148,3 +1571,8 @@ CUTIVIS int CHi_NodeGraphLoad(CHiNodeGraph *ng, CHiLoadReader reader, void *ud)
return 0;
}
CUTIVIS bool CHi_Node_Active(CHiPubNode *pubn) {
float now = CHi_Time_Get(pubn->ng);
return pubn->lifespan.start <= now && (pubn->lifespan.end == 0 || now < pubn->lifespan.end);
}

View File

@@ -3,18 +3,23 @@
#include<stdint.h>
#include<stddef.h>
#include<stdbool.h>
#include<pthread.h>
#include"defs.h"
#include"bs.h"
#include<arpa/inet.h>
#ifdef __cplusplus
extern "C" {
#endif
#define CUTIHI_T(a, b) ((((uint64_t) htonl(b)) << 32) | htonl(a))
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define CUTIHI_T(a, b) ((((uint64_t) __builtin_bswap32(b)) << 32) | __builtin_bswap32(a))
#else
#define CUTIHI_T(a, b) ((((uint64_t) b) << 32) | a
#endif
typedef size_t(*CHiSaveWriter)(void *ud, const void *data, size_t len);
typedef size_t(*CHiLoadReader)(void *ud, void *data, size_t len);
@@ -28,9 +33,11 @@ typedef enum {
CUTIHI_VAL_VEC4 = 5,
CUTIHI_VAL_WEAK_PTR = 6,
CUTIHI_VAL_VP9BS = 666,
CUTIHI_VAL_VP8BS = 667,
CUTIHI_VAL_VP8BS = 666,
CUTIHI_VAL_VP9BS = 667,
CUTIHI_VAL_OPUSBS = 668,
CUTIHI_VAL_H264BS = 669,
CUTIHI_VAL_AACBS = 670,
} CHiValType;
struct CHiImage;
@@ -57,6 +64,15 @@ typedef struct {
CHiValueRaw data;
} CHiValue;
#define CUTIHI_ERR_SIZE 16
#define CUTIHI_MAX_ERRORS 4
typedef struct {
bool activeLast[CUTIHI_MAX_ERRORS];
bool active[CUTIHI_MAX_ERRORS];
char code[CUTIHI_MAX_ERRORS][CUTIHI_ERR_SIZE];
int sink[CUTIHI_MAX_ERRORS];
} CHiErrors;
typedef struct CHiPubNode {
uint64_t type;
@@ -67,7 +83,6 @@ typedef struct CHiPubNode {
int (*Perform)(struct CHiPubNode *node);
int (*Start)(struct CHiPubNode *node);
int (*Stop)(struct CHiPubNode *node);
char clean;
void (*Destroy)(struct CHiPubNode *node);
@@ -81,6 +96,13 @@ typedef struct CHiPubNode {
struct CHiNodeGraph *ng;
CHiErrors errors;
struct {
float start;
float end;
} lifespan;
char _dfsmark;
} CHiPubNode;
@@ -127,6 +149,7 @@ typedef struct CHiNodeGraph {
void *ud;
void(*eventOnStopComplete)(struct CHiNodeGraph*);
void(*eventOnFrameComplete)(struct CHiNodeGraph*);
void(*eventOnError)(struct CHiNodeGraph*, CHiPubNode*);
CHiCompilationStatus compilationStatus;
@@ -137,6 +160,9 @@ typedef struct CHiNodeGraph {
float time;
float timedelta;
// This is necessary for live changes of the node graph
pthread_mutex_t mut;
} CHiNodeGraph;
CUTIVIS CHiNodeGraph *CHi_NewNodeGraph();
@@ -151,7 +177,7 @@ CUTIVIS int CHi_ConfigureSink(CHiPubNode*, size_t, CHiValue);
CUTIVIS void CHi_MakeKeyframe(CHiNodeGraph *ng, CHiPubNode *n, size_t idx);
CUTIVIS size_t CHi_MoveKeyframe(CHiNodeGraph *ng, CHiKeyframes *kfs, size_t idx, float to);
CUTIVIS size_t CHi_MoveKeyframeBy(CHiNodeGraph *ng, CHiKeyframes *kfs, size_t idx, float dt);
CUTIVIS size_t CHi_GetClosestKeyframe(CHiNodeGraph *ng, size_t kfsIdx, float t);
CUTIVIS size_t CHi_GetClosestKeyframe(CHiNodeGraph *ng, CHiKeyframes *kfs, float t);
CUTIVIS void CHi_SetExtrapolationMode(CHiNodeGraph *ng, CHiPubNode *n, size_t sinkIdx, CHiExtrapolationMode mode, float* params);
CUTIVIS void CHi_DeleteKeyframe(CHiNodeGraph *ng, CHiKeyframes *kfs, size_t idx);
@@ -162,7 +188,7 @@ CUTIVIS void CHi_BeginCompilation(CHiNodeGraph *ng);
CUTIVIS void CHi_StopCompilation(CHiNodeGraph *ng);
#define CUTIHI_IMAGE_IN_FILE 0
#define CUTIHI_IMAGE_OUT_SAMPLE 1
#define CUTIHI_IMAGE_OUT_SAMPLE 0
CUTIVIS CHiPubNode *CHi_Image();
#define CUTIHI_EMBED_OUT_MAIN 0
@@ -170,6 +196,7 @@ CUTIVIS CHiPubNode *CHi_Image();
CUTIVIS CHiPubNode *CHi_Embed();
#define CUTIHI_CONSTANTSAMPLE_IN_COLOR 0
#define CUTIHI_CONSTANTSAMPLE_IN_SIZE 1
#define CUTIHI_CONSTANTSAMPLE_OUT_SAMPLE 1
CUTIVIS CHiPubNode *CHi_ConstantSample();
@@ -227,12 +254,8 @@ CUTIVIS CHiPubNode *CHi_Microphone();
CUTIVIS CHiPubNode *CHi_Text();
CUTIVIS CHiPubNode *CHi_ExportWav();
CUTIVIS int CHi_ExportWav_Start(CHiPubNode*);
CUTIVIS int CHi_ExportWav_Stop(CHiPubNode*);
CUTIVIS CHiPubNode *CHi_EncodeOpus();
CUTIVIS int CHi_EncodeOpus_Start(CHiPubNode*);
CUTIVIS int CHi_EncodeOpus_Stop(CHiPubNode*);
CUTIVIS CHiPubNode *CHi_Camera();
@@ -244,9 +267,19 @@ CUTIVIS CHiPubNode *CHi_Keyhook();
CUTIVIS CHiPubNode *CHi_Mixer();
CUTIVIS CHiPubNode *CHi_ChromaKey();
CUTIVIS CHiPubNode *CHi_EncodeH264();
CUTIVIS CHiPubNode *CHi_StreamRTMP();
CUTIVIS CHiPubNode *CHi_EncodeAAC();
CUTIVIS CHiValue *CHi_Crawl(CHiValue*);
CUTIVIS void CHi_Save(CHiNodeGraph *ng);
//CUTIVIS void CHi_Save(CHiNodeGraph *ng);
CUTIVIS bool CHi_Node_Active(CHiPubNode*);
#ifdef __cplusplus
}

View File

@@ -52,18 +52,7 @@ static int encodeopus_perform(CHiPubNode *pubn) {
return 1;
}
CUTIVIS CHiPubNode *CHi_EncodeOpus() {
struct CHiEncodeOpusNode *ret = calloc(1, sizeof(*ret));
ret->pubn.type = CUTIHI_T('CEnc','Opus');
ret->pubn.Start = CHi_EncodeOpus_Start;
ret->pubn.Perform = &encodeopus_perform;
ret->pubn.Stop = CHi_EncodeOpus_Stop;
ret->pubn.clean = 0;
ret->pubn.sinks = calloc(sizeof(*ret->pubn.sinks), ret->pubn.sinkCount = 1);
ret->pubn.sources = calloc(sizeof(*ret->pubn.sources), ret->pubn.sourceCount = 1);
return &ret->pubn;
}
CUTIVIS int CHi_EncodeOpus_Start(CHiPubNode *pubn) {
static int CHi_EncodeOpus_Start(CHiPubNode *pubn) {
struct CHiEncodeOpusNode *n = (struct CHiEncodeOpusNode*) pubn;
int error;
@@ -76,10 +65,20 @@ CUTIVIS int CHi_EncodeOpus_Start(CHiPubNode *pubn) {
return 1;
}
CUTIVIS int CHi_EncodeOpus_Stop(CHiPubNode *pubn) {
static int CHi_EncodeOpus_Stop(CHiPubNode *pubn) {
struct CHiEncodeOpusNode *n = (struct CHiEncodeOpusNode*) pubn;
opus_encoder_destroy(n->enc);
free(n->pcmbuf);
return 1;
}
CUTIVIS CHiPubNode *CHi_EncodeOpus() {
struct CHiEncodeOpusNode *ret = calloc(1, sizeof(*ret));
ret->pubn.type = CUTIHI_T('CEnc','Opus');
ret->pubn.Start = CHi_EncodeOpus_Start;
ret->pubn.Perform = &encodeopus_perform;
ret->pubn.Stop = CHi_EncodeOpus_Stop;
ret->pubn.sinks = calloc(sizeof(*ret->pubn.sinks), ret->pubn.sinkCount = 1);
ret->pubn.sources = calloc(sizeof(*ret->pubn.sources), ret->pubn.sourceCount = 1);
return &ret->pubn;
}

View File

@@ -39,7 +39,6 @@ CUTIVIS CHiPubNode *CHi_ComponentScale() {
n->type = CUTIHI_T('CCmp','nScl');
n->Start = n->Stop = NULL;
n->Perform = scale_perform;
n->clean = 0;
n->sinkCount = 2;
n->sinks = calloc(sizeof(*n->sinks), n->sinkCount);
n->sourceCount = 1;
@@ -48,12 +47,14 @@ CUTIVIS CHiPubNode *CHi_ComponentScale() {
}
static Display *dpy;
static XkbDescPtr xKeyboardDesc;
typedef struct {
CHiPubNode pub;
XRecordContext rc;
pthread_t thrd;
atomic_int key;
char key[64];
atomic_bool on;
} CHiKeyhookNode;
@@ -68,8 +69,10 @@ static void keyhook_handler(XPointer ud, XRecordInterceptData *recdata) {
CHiKeyhookNode *n = (CHiKeyhookNode*) ud;
printf("%i\n", key);
if(!repeat && key == n->key) {
char keyname[XkbKeyNameLength + 1] = {};
memcpy(keyname, xKeyboardDesc->names->keys[key].name, XkbKeyNameLength);
if(!repeat && !strcmp(keyname, n->key)) {
if(type == KeyPress) {
n->on = 1;
} else if(type == KeyRelease) {
@@ -93,7 +96,10 @@ static void *keyhook_thread(void *ud) {
}
static int keyhook_perform(CHiPubNode *n) {
((CHiKeyhookNode*) n)->key = CHi_Crawl(&n->sinks[0])->data.vec4[0];
CHiKeyhookNode *me = (CHiKeyhookNode*) n;
strncpy(me->key, CHi_Crawl(&n->sinks[0])->data.text, 63);
me->key[63] = '\0';
n->sources[0].type = CUTIHI_VAL_VEC4;
@@ -123,17 +129,19 @@ CUTIVIS CHiPubNode *CHi_Keyhook() {
n->pub.Start = n->pub.Stop = NULL;
n->pub.Perform = keyhook_perform;
n->pub.Destroy = keyhook_destroy;
n->pub.clean = 0;
n->pub.sinkCount = 2;
n->pub.sinks = calloc(sizeof(*n->pub.sinks), n->pub.sinkCount);
n->pub.sourceCount = 1;
n->pub.sources = calloc(sizeof(*n->pub.sources), n->pub.sourceCount);
n->on = 0;
n->key = 0;
n->key[0] = '\n';
if(!dpy) {
dpy = XOpenDisplay(NULL);
xKeyboardDesc = XkbGetMap(dpy, 0, XkbUseCoreKbd);
XkbGetNames(dpy, XkbKeyNamesMask, xKeyboardDesc);
}
pthread_create(&n->thrd, NULL, keyhook_thread, n);

View File

@@ -127,7 +127,6 @@ CUTIVIS CHiPubNode *CHi_Camera() {
CHiPubNode *pubn = calloc(1, sizeof(*pubn));
pubn->type = CUTIHI_T('CWeb','Cam ');
pubn->clean = 0;
pubn->Start = pubn->Stop = NULL;
pubn->Perform = camera_perform;
pubn->sinks = calloc(sizeof(*pubn->sinks), pubn->sinkCount = 0);

View File

@@ -280,14 +280,22 @@ webm::Status CueParser::OnCuePoint(const webm::ElementMetadata &metadata, const
static int movie_perform(CHiPubNode *pub) {
CHiMovieNode *node = (CHiMovieNode*) ((uintptr_t) pub - offsetof(CHiMovieNode, pub));
pub->sources[0].type = CUTIHI_VAL_SAMPLE;
if(pub->sources[0].data.sample) {
CHi_Image_Free(pub->sources[0].data.sample);
pub->sources[0].data.sample = nullptr;
}
if(!CHi_Node_Active(pub)) {
return 1;
}
MTR_BEGIN("CHi", "movie_perform");
int64_t t;
if(pub->sinks[1].type == CUTIHI_VAL_NONE) t = CHi_Time_Get(pub->ng) * 1000;
if(pub->sinks[1].type == CUTIHI_VAL_NONE) t = (CHi_Time_Get(pub->ng) - pub->lifespan.start) * 1000;
else t = CHi_Crawl(&pub->sinks[1])->data.vec4[0] * 1000;
pub->sources[0].type = CUTIHI_VAL_SAMPLE;
char *filepath = CHi_Crawl(&pub->sinks[0])->data.text;
if(!node->filepathCache || strcmp(node->filepathCache, filepath) != 0) {
@@ -331,15 +339,13 @@ static int movie_perform(CHiPubNode *pub) {
return 1;
}
if(pub->sources[0].data.sample) {
CHi_Image_Free(pub->sources[0].data.sample);
pub->sources[0].data.sample = nullptr;
}
if(t >= 0 && t < 1000 * node->duration) {
if(t < node->timeCache || (t - node->timeCache) > 5000) {
fseek(node->vf, 0, SEEK_SET);
fseek(node->af, 0, SEEK_SET);
if(node->cuepoints.size() > 0) {
size_t i;
@@ -351,19 +357,16 @@ static int movie_perform(CHiPubNode *pub) {
if(i != 0) i--;
for(webm::Element<webm::CueTrackPositions> &p : node->cuepoints[i].cue_track_positions) {
if(p.value().track.value() == node->videoTrack) {
fseek(node->vf, node->segmentOff + p.value().cluster_position.value(), SEEK_SET);
fseek(node->af, node->segmentOff + p.value().cluster_position.value(), SEEK_SET);
break;
if(node->cuepoints[i].time.value() <= t) {
for(webm::Element<webm::CueTrackPositions> &p : node->cuepoints[i].cue_track_positions) {
if(p.value().track.value() == node->videoTrack) {
fseek(node->vf, node->segmentOff + p.value().cluster_position.value(), SEEK_SET);
fseek(node->af, node->segmentOff + p.value().cluster_position.value(), SEEK_SET);
break;
}
}
}
} else {
fseek(node->vf, 0, SEEK_SET);
fseek(node->af, 0, SEEK_SET);
}
}
@@ -380,6 +383,8 @@ static int movie_perform(CHiPubNode *pub) {
pub->sources[0].data.sample = node->fp.output;
node->fp.output = nullptr;
node->timeCache = t;
}
@@ -406,8 +411,6 @@ static int movie_perform(CHiPubNode *pub) {
pub->sources[1].type = CUTIHI_VAL_SAMPLE;
pub->sources[1].data.sample = aud;
pub->clean = 0;
MTR_END("CHi", "movie_perform");
return 1;
@@ -441,7 +444,6 @@ CUTIVIS CHiPubNode *CHi_Movie() {
n->pub.type = CUTIHI_T('CMov','ie ');
n->pub.Perform = movie_perform;
n->pub.Destroy = movie_destroy;
n->pub.clean = 0;
n->pub.sinkCount = 2;
n->pub.sinks = (CHiValue*) calloc(sizeof(*n->pub.sinks), n->pub.sinkCount);
n->pub.sinks[1].type = CUTIHI_VAL_VEC4;

View File

@@ -24,295 +24,7 @@
#include"linearity.h"
struct CHiEncodeVP9Node {
vpx_codec_ctx_t codec;
vpx_codec_enc_cfg_t cfg;
enum {
WAITING, IN_PROGRESS
} state;
uint8_t *outY, *outU, *outV;
uint16_t strideY, strideU, strideV;
vpx_codec_iface_t *iface;
CHiPubNode pub;
};
static int encodevp9_perform(CHiPubNode *pub) {
CHiEncodeVP9Node *node = (CHiEncodeVP9Node*) ((uintptr_t) pub - offsetof(CHiEncodeVP9Node, pub));
MTR_BEGIN("CHi", "encodevp9_perform");
pub->sources[0].type = CUTIHI_VAL_VP9BS;
pub->sources[0].data.bitstream = NULL;
if(node->state == CHiEncodeVP9Node::WAITING) return 1;
CHiImage *rgbIn = (CHiImage*) CHi_Crawl(&pub->sinks[0])->data.sample;
#pragma omp parallel for simd
for(size_t y = 0; y < node->cfg.g_h; y += 2) {
for(size_t x = 0; x < node->cfg.g_w; x += 16) {
__m128i rgb, partY, partU, partV, dotY, dotU, dotV;
__m128i wipY0 = _mm_setzero_si128();
__m128i wipY1 = _mm_setzero_si128();
__m128i wipU = _mm_setzero_si128();
__m128i wipV = _mm_setzero_si128();
__m128i tempU = _mm_setzero_si128();
__m128i tempV = _mm_setzero_si128();
#define DO_DAH_DOO_DOO(LoOrHi, shufY, shufUV) \
/* Process top two */\
rgb = _mm_srli_epi16(apply_gamma_epi16(line0, _mm_set1_ps(1 / 2.2f)), 8); \
/* Start matrix multiplication (BT.709 + full->studio range) */\
partY = _mm_mullo_epi16(rgb, _mm_set_epi16(0, 47, 157, 16, 0, 47, 157, 16));\
partU = _mm_mullo_epi16(rgb, _mm_set_epi16(0, -25, -85, 110, 0, -25, -85, 110));\
partV = _mm_mullo_epi16(rgb, _mm_set_epi16(0, 110, -100, -10, 0, 110, -100, -10));\
/* Finish mat-mul with dot products */\
dotY = _mm_madd_epi16(partY, _mm_set1_epi16(1));\
dotY = _mm_hadd_epi32(dotY, _mm_setzero_si128());\
dotU = _mm_madd_epi16(partU, _mm_set1_epi16(1));\
dotU = _mm_hadd_epi32(dotU, _mm_setzero_si128());\
dotV = _mm_madd_epi16(partV, _mm_set1_epi16(1));\
dotV = _mm_hadd_epi32(dotV, _mm_setzero_si128());\
/* Insert Ys */\
wipY0 = _mm_or_si128(wipY0, _mm_shuffle_epi8(dotY, shufY));\
/* Save top UV */\
tempU = dotU;\
tempV = dotV;\
\
/* Process bottom two */\
rgb = _mm_srli_epi16(apply_gamma_epi16(line1, _mm_set1_ps(1 / 2.2f)), 8); \
/* Start matrix multiplication (BT.709 + full->studio range) */\
partY = _mm_mullo_epi16(rgb, _mm_set_epi16(0, 47, 157, 16, 0, 47, 157, 16));\
partU = _mm_mullo_epi16(rgb, _mm_set_epi16(0, -25, -85, 110, 0, -25, -85, 110));\
partV = _mm_mullo_epi16(rgb, _mm_set_epi16(0, 110, -100, -10, 0, 110, -100, -10));\
/* Finish mat-mul with dot products */\
dotY = _mm_madd_epi16(partY, _mm_set1_epi16(1));\
dotY = _mm_hadd_epi32(dotY, _mm_setzero_si128());\
dotU = _mm_madd_epi16(partU, _mm_set1_epi16(1));\
dotU = _mm_hadd_epi32(dotU, _mm_setzero_si128());\
dotV = _mm_madd_epi16(partV, _mm_set1_epi16(1));\
dotV = _mm_hadd_epi32(dotV, _mm_setzero_si128());\
/* Insert Ys */\
wipY1 = _mm_or_si128(wipY1, _mm_shuffle_epi8(dotY, shufY));\
/* Save bottom UVs */\
tempU = _mm_hadd_epi32(_mm_add_epi32(tempU, dotU), _mm_setzero_si128());\
tempV = _mm_hadd_epi32(_mm_add_epi32(tempV, dotV), _mm_setzero_si128());\
\
/* Insert UVs */\
wipU = _mm_or_si128(wipU, _mm_shuffle_epi8(_mm_srli_epi32(tempU, 2), shufUV));\
wipV = _mm_or_si128(wipV, _mm_shuffle_epi8(_mm_srli_epi32(tempV, 2), shufUV));
__m128i line0 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 0) * rgbIn->stride + (x + 0) * 8)); // Load two pixels
__m128i line1 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 1) * rgbIn->stride + (x + 0) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 5, 1),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 0) * rgbIn->stride + (x + 2) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 1) * rgbIn->stride + (x + 2) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 5, 1, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, -128));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 0) * rgbIn->stride + (x + 4) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 1) * rgbIn->stride + (x + 4) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 5, 1, -128, -128, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, -128, -128));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 0) * rgbIn->stride + (x + 6) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 1) * rgbIn->stride + (x + 6) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, 5, 1, -128, -128, -128, -128, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, -128, -128, -128));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 0) * rgbIn->stride + (x + 8) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 1) * rgbIn->stride + (x + 8) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, -128, -128, -128, -128, 5, 1, -128, -128, -128, -128, -128, -128, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, -128, -128, -128, -128));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 0) * rgbIn->stride + (x + 10) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 1) * rgbIn->stride + (x + 10) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, -128, -128, 5, 1, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, -128, -128, -128, -128, -128));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 0) * rgbIn->stride + (x + 12) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 1) * rgbIn->stride + (x + 12) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, 5, 1, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, 1, -128, -128, -128, -128, -128, -128));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 0) * rgbIn->stride + (x + 14) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) rgbIn->data16 + (y + 1) * rgbIn->stride + (x + 14) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8( 5, 1, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, 1, -128, -128, -128, -128, -128, -128, -128));
_mm_stream_si128((__m128i*) &node->outY[node->strideY * (y + 0) + x], _mm_add_epi8(_mm_set1_epi8(16), wipY0));
_mm_stream_si128((__m128i*) &node->outY[node->strideY * (y + 1) + x], _mm_add_epi8(_mm_set1_epi8(16), wipY1));
_mm_storeu_si128((__m128i*) &node->outU[node->strideU * (y / 2) + x / 2], _mm_add_epi8(wipU, _mm_set1_epi8(128)));
_mm_storeu_si128((__m128i*) &node->outV[node->strideV * (y / 2) + x / 2], _mm_add_epi8(wipV, _mm_set1_epi8(128)));
}
}
vpx_image_t vpxraw;
vpxraw.fmt = VPX_IMG_FMT_I420;
vpxraw.cs = VPX_CS_BT_709;
vpxraw.range = VPX_CR_STUDIO_RANGE;
vpxraw.bit_depth = 8;
vpxraw.w = vpxraw.d_w = node->cfg.g_w;
vpxraw.h = vpxraw.d_h = node->cfg.g_h;
vpxraw.r_w = vpxraw.r_h = 0;
vpxraw.x_chroma_shift = vpxraw.y_chroma_shift = 1;
vpxraw.img_data_owner = 0;
vpxraw.self_allocd = 0;
vpxraw.bps = 12;
vpxraw.stride[VPX_PLANE_Y] = node->strideY;
vpxraw.planes[VPX_PLANE_Y] = node->outY;
vpxraw.stride[VPX_PLANE_U] = node->strideU;
vpxraw.planes[VPX_PLANE_U] = node->outU;
vpxraw.stride[VPX_PLANE_V] = node->strideV;
vpxraw.planes[VPX_PLANE_V] = node->outV;
vpx_codec_encode(&node->codec, &vpxraw, CHi_Time_Get(pub->ng) * 1000.f, 1, 0, VPX_DL_REALTIME);
auto ret = (CHiBSFrames*) malloc(sizeof(CHiBSFrames));
ret->count = 0;
vpx_codec_iter_t iter = NULL;
const vpx_codec_cx_pkt_t *pkt;
while((pkt = vpx_codec_get_cx_data(&node->codec, &iter)) != NULL) {
if(pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
ret = (CHiBSFrames*) realloc(ret, sizeof(CHiBSFrames) + sizeof(CHiBSFrame) * (ret->count + 1));
ret->data[ret->count].timestamp = pkt->data.frame.pts;
ret->data[ret->count].sz = pkt->data.frame.sz;
ret->data[ret->count].flags = pkt->data.frame.flags & VPX_FRAME_IS_KEY;
ret->data[ret->count].ptr = malloc(ret->data[ret->count].sz);
memcpy(ret->data[ret->count].ptr, pkt->data.frame.buf, ret->data[ret->count].sz);
ret->count++;
}
}
// if(pktRet) v->queueOut.enqueue(pktRet);
//memcpy(node->vpxraw.planes[VPX_PLANE_Y], VIPS_IMAGE_ADDR(y, 0, 0), node->vpxraw.stride[VPX_PLANE_Y] * node->vpxraw.d_h);
//memcpy(node->vpxraw.planes[VPX_PLANE_U], VIPS_IMAGE_ADDR(u, 0, 0), node->vpxraw.stride[VPX_PLANE_U] * (node->vpxraw.d_h >> node->vpxraw.y_chroma_shift));
//memcpy(node->vpxraw.planes[VPX_PLANE_V], VIPS_IMAGE_ADDR(v, 0, 0), node->vpxraw.stride[VPX_PLANE_V] * (node->vpxraw.d_h >> node->vpxraw.y_chroma_shift));
//const vpx_codec_cx_pkt_t *pkt;
//while(!node->queueOut.try_dequeue(pkt)) usleep(0);
pub->sources[0].data.bitstream = ret;
MTR_END("CHi", "encodevp9_perform");
return 1;
}
static void encodevp_destroy(CHiPubNode *pub) {
CHiEncodeVP9Node *node = (CHiEncodeVP9Node*) ((uintptr_t) pub - offsetof(CHiEncodeVP9Node, pub));
free(node);
}
CUTIVIS CHiPubNode *CHi_EncodeVP8() {
CHiEncodeVP9Node *n = (CHiEncodeVP9Node*) calloc(1, sizeof(*n));
new (n) CHiEncodeVP9Node();
n->pub.type = CUTIHI_T('CEnc','GVP8');
n->pub.Start = CHi_EncodeVP9_Start;
n->pub.Perform = encodevp9_perform;
n->pub.Stop = CHi_EncodeVP9_Stop;
n->pub.Destroy = encodevp_destroy;
n->pub.clean = 0;
n->pub.sinks = (CHiValue*) calloc(sizeof(*n->pub.sinks), n->pub.sinkCount = 1);
n->pub.sources = (CHiValue*) calloc(sizeof(*n->pub.sources), n->pub.sourceCount = 1);
n->state = CHiEncodeVP9Node::WAITING;
n->iface = vpx_codec_vp8_cx();
return &n->pub;
}
CUTIVIS CHiPubNode *CHi_EncodeVP9() {
CHiEncodeVP9Node *n = (CHiEncodeVP9Node*) calloc(1, sizeof(*n));
new (n) CHiEncodeVP9Node();
n->pub.type = CUTIHI_T('CEnc','GVP9');
n->pub.Start = CHi_EncodeVP9_Start;
n->pub.Perform = encodevp9_perform;
n->pub.Stop = CHi_EncodeVP9_Stop;
n->pub.Destroy = encodevp_destroy;
n->pub.clean = 0;
n->pub.sinks = (CHiValue*) calloc(sizeof(*n->pub.sinks), n->pub.sinkCount = 1);
n->pub.sources = (CHiValue*) calloc(sizeof(*n->pub.sources), n->pub.sourceCount = 1);
n->state = CHiEncodeVP9Node::WAITING;
n->iface = vpx_codec_vp9_cx();
return &n->pub;
}
CUTIVIS int CHi_EncodeVP9_Start(CHiPubNode *pubn) {
CHiEncodeVP9Node *node = (CHiEncodeVP9Node*) ((uintptr_t) pubn - offsetof(CHiEncodeVP9Node, pub));
node->state = CHiEncodeVP9Node::IN_PROGRESS;
CHiImage *firstFrame = (CHiImage*) CHi_Crawl(&pubn->sinks[0])->data.sample;
vpx_codec_enc_config_default(node->iface, &node->cfg, 0);
node->cfg.g_w = firstFrame->width;
node->cfg.g_h = firstFrame->height;
node->cfg.g_timebase.num = 1;
node->cfg.g_timebase.den = 30;
node->cfg.g_lag_in_frames = 0;
node->cfg.g_threads = 8;
node->cfg.kf_mode = VPX_KF_AUTO;
node->cfg.kf_max_dist = 300;
node->cfg.rc_end_usage = VPX_VBR;
node->cfg.rc_target_bitrate = 512;
node->cfg.rc_min_quantizer = 4;
node->cfg.rc_max_quantizer = 48;
vpx_codec_enc_init(&node->codec, node->iface, &node->cfg, 0);
vpx_codec_control(&node->codec, VP8E_SET_CPUUSED, 8);
vpx_codec_control(&node->codec, VP9E_SET_ROW_MT, 1);
vpx_codec_control(&node->codec, VP9E_SET_TILE_COLUMNS, 2);
vpx_codec_control(&node->codec, VP9E_SET_TILE_ROWS, 1);
vpx_codec_control(&node->codec, VP9E_SET_TUNE_CONTENT, VP9E_CONTENT_SCREEN);
node->strideY = (node->cfg.g_w + 64) & ~63;
node->strideU = (node->cfg.g_w / 2 + 64) & ~63;
node->strideV = (node->cfg.g_w / 2 + 64) & ~63;
node->outY = (uint8_t*) _mm_malloc(node->strideY * node->cfg.g_h, 16);
node->outU = (uint8_t*) _mm_malloc(node->strideU * node->cfg.g_h / 2, 16);
node->outV = (uint8_t*) _mm_malloc(node->strideV * node->cfg.g_h / 2, 16);
return 1;
}
CUTIVIS int CHi_EncodeVP9_Stop(CHiPubNode *pubn) {
CHiEncodeVP9Node *node = (CHiEncodeVP9Node*) ((uintptr_t) pubn - offsetof(CHiEncodeVP9Node, pub));
node->state = CHiEncodeVP9Node::WAITING;
_mm_free(node->outY);
_mm_free(node->outU);
_mm_free(node->outV);
vpx_codec_destroy(&node->codec);
return 1;
}
#include"yuv.h"
struct CHiMuxWebmNode {
CHiPubNode pub;
@@ -386,7 +98,6 @@ CUTIVIS CHiPubNode *CHi_MuxWebm() {
n->pub.Perform = muxwebm_perform;
n->pub.Destroy = muxwebm_destroy;
n->pub.Stop = CHi_MuxWebm_Stop;
n->pub.clean = 0;
n->pub.sinks = (CHiValue*) calloc(sizeof(*n->pub.sinks), n->pub.sinkCount = 3);
n->pub.sourceCount = 0;
n->pub.sources = NULL;

View File

@@ -114,8 +114,6 @@ static int window_perform(CHiPubNode *n) {
n->sources[0].type = CUTIHI_VAL_SAMPLE;
n->sources[0].data.sample = w->vcache;
n->clean = 0;
MTR_END("CHi", "window_perform");
return 1;
@@ -144,7 +142,6 @@ CUTIVIS CHiPubNode *CHi_Window() {
n->pub.Start = n->pub.Stop = NULL;
n->pub.Perform = window_perform;
n->pub.Destroy = window_destroy;
n->pub.clean = 0;
n->pub.sinkCount = 1;
n->pub.sinks = calloc(sizeof(*n->pub.sinks), 1);
n->pub.sourceCount = 1;
@@ -168,7 +165,7 @@ CUTIVIS size_t CHi_Window_GetSourceCount() {
int status = XGetWindowProperty(d, root, atom, 0L, ~0L, 0, AnyPropertyType, &actualType, &format, &numItems, &bytesAfter, (unsigned char**) &list);
XFree(list);
//XFree(list);
return status >= Success ? numItems : 0;
}
@@ -186,7 +183,7 @@ CUTIVIS const char *CHi_Window_GetSourceName(size_t idx) {
int status = XGetWindowProperty(d, root, atom, 0L, ~0L, 0, AnyPropertyType, &actualType, &format, &numItems, &bytesAfter, (unsigned char**) &list);
if(status >= Success) {
XFree(list);
//XFree(list);
status = XGetWMName(d, list[idx], &windowName);
if(status >= Success) {
@@ -209,7 +206,7 @@ CUTIVIS uintptr_t CHi_Window_GetSourceData(size_t idx) {
if(status >= Success) {
Window ret = list[idx];
XFree(list);
//XFree(list);
return ret;
}