cuticle/hi/h264enc.c
2025-03-09 10:29:35 +02:00

290 lines
7.1 KiB
C

#include"node.h"
#include<stdio.h>
#include<assert.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<mm_malloc.h>
#define MINIH264_IMPLEMENTATION
#include<minih264e.h>
#include"mode.h"
#include"img.h"
#include"yuv.h"
#include"h264enc_sys.c"
typedef struct
{
void *event_start;
void *event_done;
void (*callback)(void*);
void *job;
void *thread;
int terminated;
} h264e_thread_t;
static THREAD_RET THRAPI minih264_thread_func(void *arg) {
h264e_thread_t *t = (h264e_thread_t *)arg;
thread_name("h264");
for(;;) {
event_wait(t->event_start, INFINITE);
if(t->terminated)
break;
t->callback(t->job);
event_set(t->event_done);
}
return 0;
}
static void *h264e_thread_pool_init(int max_threads) {
int i;
h264e_thread_t *threads = (h264e_thread_t*) calloc(sizeof(h264e_thread_t), max_threads);
if(!threads)
return 0;
for(i = 0; i < max_threads; i++) {
h264e_thread_t *t = threads + i;
t->event_start = event_create(0, 0);
t->event_done = event_create(0, 0);
t->thread = thread_create(minih264_thread_func, t);
}
return threads;
}
static void h264e_thread_pool_close(void *pool, int max_threads) {
int i;
h264e_thread_t *threads = (h264e_thread_t *)pool;
for(i = 0; i < max_threads; i++) {
h264e_thread_t *t = threads + i;
t->terminated = 1;
event_set(t->event_start);
thread_wait(t->thread);
thread_close(t->thread);
event_destroy(t->event_start);
event_destroy(t->event_done);
}
free(pool);
}
static void h264e_thread_pool_run(void *pool, void (*callback)(void*), void *callback_job[], int njobs) {
h264e_thread_t *threads = (h264e_thread_t*)pool;
int i;
for(i = 0; i < njobs; i++) {
h264e_thread_t *t = threads + i;
t->callback = (void (*)(void *))callback;
t->job = callback_job[i];
event_set(t->event_start);
}
for(i = 0; i < njobs; i++) {
h264e_thread_t *t = threads + i;
event_wait(t->event_done, INFINITE);
}
}
typedef struct {
CHiPubNode pub;
H264E_persist_t *enc;
H264E_scratch_t *scratch;
int threadpoolsize;
void *threadpool;
bool firstFrame;
} Internal;
int encodeh264_start(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
CHiValue *firstFrameVal = CHi_Crawl(&pub->sinks[0]);
if(!firstFrameVal || firstFrameVal->type != CUTIHI_VAL_SAMPLE) {
pub->errors.active[0] = true;
strncpy(pub->errors.code[0], "frame not found", CUTIHI_ERR_SIZE);
pub->errors.sink[0] = 0;
return 0;
}
CHiImage *firstFrame = firstFrameVal->data.sample;
if(firstFrame->width % 16 != 0 || firstFrame->height % 16 != 0) {
pub->errors.active[0] = true;
strncpy(pub->errors.code[0], "size mod16 not 0", CUTIHI_ERR_SIZE);
pub->errors.sink[0] = 0;
return 0;
}
H264E_create_param_t params;
memset(&params, 0, sizeof(params));
params.enableNEON = 1;
params.num_layers = 1;
params.inter_layer_pred_flag = 0;
params.gop = 30;
params.width = firstFrame->width;
params.height = firstFrame->height;
params.max_long_term_reference_frames = 0;
params.fine_rate_control_flag = 0;
params.const_input_flag = 0;
params.vbv_size_bytes = 1024 * 1024;
params.temporal_denoise_flag = 1;
params.const_input_flag = 1;
params.max_threads = n->threadpoolsize = 4;
params.token = n->threadpool = h264e_thread_pool_init(n->threadpoolsize);
params.run_func_in_thread = h264e_thread_pool_run;
int sizeofPersist, sizeofScratch;
assert(!H264E_sizeof(&params, &sizeofPersist, &sizeofScratch));
n->enc = _mm_malloc(sizeofPersist, 64);
n->scratch = _mm_malloc(sizeofScratch, 64);
assert(!H264E_init(n->enc, &params));
n->firstFrame = true;
return 1;
}
int encodeh264_stop(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
h264e_thread_pool_close(n->threadpool, n->threadpoolsize);
return 1;
}
static bool contains_nal(uint8_t *data, size_t sz, int nalType) {
uint8_t *dataEnd = data + sz;
int zeros = 0;
while(data != dataEnd) {
if(*data == 0) {
zeros++;
} else if((zeros == 2 || zeros == 3) && *data == 1 && data + 1 != dataEnd && (*(data + 1) & 0x1F) == nalType) {
return true;
} else {
zeros = 0;
}
data++;
}
return false;
}
static uint8_t *find_nal(uint8_t *data, uint8_t *dataEnd, int *nalType) {
int zeros = 0;
while(data != dataEnd) {
if(*data == 0) {
zeros++;
} else if((zeros == 2 || zeros == 3) && *data == 1 && data + 1 != dataEnd) {
*nalType = (*(data + 1)) & 0x1F;
return data - zeros;
} else {
zeros = 0;
}
data++;
}
return dataEnd;
}
static size_t delete_nals(uint8_t *data, size_t sz, int targetNalType) {
uint8_t *dataStart = data;
uint8_t *dataEnd = data + sz;
while(1) {
int nalType;
uint8_t *start = find_nal(data, dataEnd, &nalType);
if(start == dataEnd) {
break;
}
if(nalType == targetNalType) {
uint8_t *start2 = find_nal(start + 3, dataEnd, &nalType);
memmove(start, start2, dataEnd - start2);
dataEnd -= start2 - start;
}
data = start + 3;
}
return dataEnd - dataStart;
}
int encodeh264_perform(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
CHiImage *img = CHi_Crawl(&pub->sinks[0])->data.sample;
size_t strideY = (img->width + 15) & ~15;
size_t strideU = (((img->width + 1) / 2) + 15) & ~15;
size_t strideV = (((img->width + 1) / 2) + 15) & ~15;
uint8_t *outY = malloc(strideY * img->height + 15);
uint8_t *outU = malloc(strideU * ((img->height + 1) / 2) + 15);
uint8_t *outV = malloc(strideV * ((img->height + 1) / 2) + 15);
bgra64toycbcr(img->data8, img->stride, img->width, img->height, outY, outU, outV, strideY, strideU, strideV);
H264E_run_param_t params;
memset(&params, 0, sizeof(params));
params.encode_speed = H264E_SPEED_FASTEST;
params.frame_type = 0;
params.desired_frame_bytes = 10000;
params.qp_min = 10;
params.qp_max = 30;
params.desired_nalu_bytes = 0;
size_t sizeofCodedData = 0;
uint8_t *codedData = NULL;
assert(!H264E_encode(n->enc, n->scratch, &params, &(H264E_io_yuv_t) {
.yuv = {outY, outU, outV},
.stride = {strideY, strideU, strideV}
}, &codedData, &sizeofCodedData));
free(outY);
free(outU);
free(outV);
/* We only want SPS and PPS NALs once */
if(!n->firstFrame) {
sizeofCodedData = delete_nals(codedData, sizeofCodedData, 7);
sizeofCodedData = delete_nals(codedData, sizeofCodedData, 8);
}
CHiBSFrames *frames = malloc(sizeof(*frames) + sizeof(CHiBSFrame));
frames->count = 1;
frames->data[0].timestamp = CHi_Time_Get(pub->ng) * 1000;
frames->data[0].sz = sizeofCodedData;
frames->data[0].ptr = codedData;
frames->data[0].flags = 0;
if(contains_nal(codedData, sizeofCodedData, 5)) {
frames->data[0].flags |= CUTIHI_BS_FLAG_KEY;
}
if(n->firstFrame) {
frames->data[0].flags |= CUTIHI_BS_SETUP_PACKET;
}
pub->sources[0].type = CUTIHI_VAL_H264BS;
pub->sources[0].data.bitstream = frames;
n->firstFrame = false;
return 1;
}
CUTIVIS CHiPubNode *CHi_EncodeH264() {
Internal *ret = calloc(1, sizeof(*ret));
ret->pub.type = CUTIHI_T('CEnc', 'H264');
ret->pub.Start = encodeh264_start;
ret->pub.Perform = encodeh264_perform;
ret->pub.Stop = encodeh264_stop;
ret->pub.sinks = calloc(sizeof(*ret->pub.sinks), ret->pub.sinkCount = 1);
ret->pub.sources = calloc(sizeof(*ret->pub.sources), ret->pub.sourceCount = 1);
return &ret->pub;
}