Forgot these

This commit is contained in:
mid 2025-03-09 10:29:35 +02:00
parent 6fc29ba5f8
commit 824f3812ce
7 changed files with 1872 additions and 0 deletions

141
hi/aaclc.c Normal file
View File

@ -0,0 +1,141 @@
#include"node.h"
#include<fdk-aac/aacenc_lib.h>
#include<string.h>
#include"img.h"
#include<stdlib.h>
#include<stdio.h>
#include<assert.h>
#include<stdbool.h>
#include<math.h>
typedef struct {
CHiPubNode pub;
HANDLE_AACENCODER enc;
int16_t *pcmbuf;
size_t pcmbufSz;
uint64_t timestamp; // In samples (48000 Hz)
AACENC_InfoStruct info;
bool extradataSent;
} Internal;
static int encodeaac_perform(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
CHiImage *newpcm = CHi_Crawl(&pub->sinks[0])->data.sample;
if(newpcm && newpcm->width) {
n->pcmbuf = realloc(n->pcmbuf, sizeof(*n->pcmbuf) * (n->pcmbufSz + newpcm->width * newpcm->channels));
memcpy(n->pcmbuf + n->pcmbufSz, newpcm->data8, newpcm->width * newpcm->channels * sizeof(*n->pcmbuf));
/*for(size_t z = 0; z < newpcm->width; z++) {
static size_t lol = 0;
n->pcmbuf[n->pcmbufSz + z] = sinf(lol++ * 440.0 / 48000 * 2.0 * 3.14159) * 15000;
}*/
n->pcmbufSz += newpcm->width * newpcm->channels;
}
CHiBSFrames *frames = calloc(1, sizeof(*frames) + 1 * sizeof(CHiBSFrame));
if(!n->extradataSent) {
frames->count++;
frames->data[frames->count - 1].timestamp = 0;
frames->data[frames->count - 1].flags = CUTIHI_BS_SETUP_PACKET;
frames->data[frames->count - 1].ptr = n->info.confBuf;
frames->data[frames->count - 1].sz = n->info.confSize;
n->extradataSent = true;
}
while(n->pcmbufSz) {
int inIdentifier = IN_AUDIO_DATA;
int inSize = n->pcmbufSz * sizeof(*n->pcmbuf);
int inElSize = 2;
AACENC_BufDesc inbuf = { .numBufs = 1, .bufs = &n->pcmbuf, .bufferIdentifiers = &inIdentifier, .bufSizes = &inSize, .bufElSizes = &inElSize };
int outIdentifier = OUT_BITSTREAM_DATA;
int outSize = 2048;
int outElSize = 1;
void *outBuf = malloc(outSize);
AACENC_BufDesc outbuf = { .numBufs = 1, .bufs = &outBuf, .bufferIdentifiers = &outIdentifier, .bufSizes = &outSize, .bufElSizes = &outElSize };
AACENC_InArgs inargs = { .numInSamples = n->pcmbufSz };
AACENC_OutArgs outargs = {};
int err = aacEncEncode(n->enc, &inbuf, &outbuf, &inargs, &outargs);
if(err == AACENC_OK) {
if(outargs.numOutBytes > 0) {
frames = realloc(frames, sizeof(*frames) + (++frames->count) * sizeof(CHiBSFrame));
frames->data[frames->count - 1].timestamp = n->timestamp / 48; // ms
frames->data[frames->count - 1].flags = 0;
frames->data[frames->count - 1].ptr = outBuf;
frames->data[frames->count - 1].sz = outargs.numOutBytes;
}
if(outargs.numInSamples > 0) {
memmove(n->pcmbuf, n->pcmbuf + outargs.numInSamples, sizeof(*n->pcmbuf) * (n->pcmbufSz - outargs.numInSamples));
n->pcmbufSz -= outargs.numInSamples;
n->timestamp += outargs.numInSamples;
}
if(outargs.numOutBytes == 0 || outargs.numInSamples == 0) {
break;
}
} else if(err == AACENC_ENCODE_EOF) {
break;
} else {
abort();
return 0;
}
}
n->pub.sources[0].type = CUTIHI_VAL_AACBS;
n->pub.sources[0].data.bitstream = frames;
return 1;
}
static int encodeaac_start(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
assert(aacEncOpen(&n->enc, 0, 1) == AACENC_OK);
assert(aacEncoder_SetParam(n->enc, AACENC_AOT, 2) == AACENC_OK); // Low complexity profile
assert(aacEncoder_SetParam(n->enc, AACENC_SAMPLERATE, 48000) == AACENC_OK);
assert(aacEncoder_SetParam(n->enc, AACENC_CHANNELMODE, MODE_1) == AACENC_OK);
assert(aacEncoder_SetParam(n->enc, AACENC_CHANNELORDER, 1) == AACENC_OK);
assert(aacEncoder_SetParam(n->enc, AACENC_BITRATEMODE, 3) == AACENC_OK);
assert(aacEncEncode(n->enc, NULL, NULL, NULL, NULL) == AACENC_OK);
memset(&n->info, 0, sizeof(n->info));
assert(aacEncInfo(n->enc, &n->info) == AACENC_OK);
n->pcmbuf = NULL;
n->pcmbufSz = 0;
n->timestamp = 0;
n->extradataSent = false;
return 1;
}
static int encodeaac_stop(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
aacEncClose(&n->enc);
return 1;
}
CUTIVIS CHiPubNode *CHi_EncodeAAC() {
Internal *ret = calloc(1, sizeof(*ret));
ret->pub.type = CUTIHI_T('CEnc','AACL');
ret->pub.Start = encodeaac_start;
ret->pub.Perform = encodeaac_perform;
ret->pub.Stop = encodeaac_stop;
ret->pub.sinks = calloc(sizeof(*ret->pub.sinks), ret->pub.sinkCount = 1);
ret->pub.sources = calloc(sizeof(*ret->pub.sources), ret->pub.sourceCount = 1);
return &ret->pub;
}

289
hi/h264enc.c Normal file
View File

@ -0,0 +1,289 @@
#include"node.h"
#include<stdio.h>
#include<assert.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<mm_malloc.h>
#define MINIH264_IMPLEMENTATION
#include<minih264e.h>
#include"mode.h"
#include"img.h"
#include"yuv.h"
#include"h264enc_sys.c"
typedef struct
{
void *event_start;
void *event_done;
void (*callback)(void*);
void *job;
void *thread;
int terminated;
} h264e_thread_t;
static THREAD_RET THRAPI minih264_thread_func(void *arg) {
h264e_thread_t *t = (h264e_thread_t *)arg;
thread_name("h264");
for(;;) {
event_wait(t->event_start, INFINITE);
if(t->terminated)
break;
t->callback(t->job);
event_set(t->event_done);
}
return 0;
}
static void *h264e_thread_pool_init(int max_threads) {
int i;
h264e_thread_t *threads = (h264e_thread_t*) calloc(sizeof(h264e_thread_t), max_threads);
if(!threads)
return 0;
for(i = 0; i < max_threads; i++) {
h264e_thread_t *t = threads + i;
t->event_start = event_create(0, 0);
t->event_done = event_create(0, 0);
t->thread = thread_create(minih264_thread_func, t);
}
return threads;
}
static void h264e_thread_pool_close(void *pool, int max_threads) {
int i;
h264e_thread_t *threads = (h264e_thread_t *)pool;
for(i = 0; i < max_threads; i++) {
h264e_thread_t *t = threads + i;
t->terminated = 1;
event_set(t->event_start);
thread_wait(t->thread);
thread_close(t->thread);
event_destroy(t->event_start);
event_destroy(t->event_done);
}
free(pool);
}
static void h264e_thread_pool_run(void *pool, void (*callback)(void*), void *callback_job[], int njobs) {
h264e_thread_t *threads = (h264e_thread_t*)pool;
int i;
for(i = 0; i < njobs; i++) {
h264e_thread_t *t = threads + i;
t->callback = (void (*)(void *))callback;
t->job = callback_job[i];
event_set(t->event_start);
}
for(i = 0; i < njobs; i++) {
h264e_thread_t *t = threads + i;
event_wait(t->event_done, INFINITE);
}
}
typedef struct {
CHiPubNode pub;
H264E_persist_t *enc;
H264E_scratch_t *scratch;
int threadpoolsize;
void *threadpool;
bool firstFrame;
} Internal;
int encodeh264_start(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
CHiValue *firstFrameVal = CHi_Crawl(&pub->sinks[0]);
if(!firstFrameVal || firstFrameVal->type != CUTIHI_VAL_SAMPLE) {
pub->errors.active[0] = true;
strncpy(pub->errors.code[0], "frame not found", CUTIHI_ERR_SIZE);
pub->errors.sink[0] = 0;
return 0;
}
CHiImage *firstFrame = firstFrameVal->data.sample;
if(firstFrame->width % 16 != 0 || firstFrame->height % 16 != 0) {
pub->errors.active[0] = true;
strncpy(pub->errors.code[0], "size mod16 not 0", CUTIHI_ERR_SIZE);
pub->errors.sink[0] = 0;
return 0;
}
H264E_create_param_t params;
memset(&params, 0, sizeof(params));
params.enableNEON = 1;
params.num_layers = 1;
params.inter_layer_pred_flag = 0;
params.gop = 30;
params.width = firstFrame->width;
params.height = firstFrame->height;
params.max_long_term_reference_frames = 0;
params.fine_rate_control_flag = 0;
params.const_input_flag = 0;
params.vbv_size_bytes = 1024 * 1024;
params.temporal_denoise_flag = 1;
params.const_input_flag = 1;
params.max_threads = n->threadpoolsize = 4;
params.token = n->threadpool = h264e_thread_pool_init(n->threadpoolsize);
params.run_func_in_thread = h264e_thread_pool_run;
int sizeofPersist, sizeofScratch;
assert(!H264E_sizeof(&params, &sizeofPersist, &sizeofScratch));
n->enc = _mm_malloc(sizeofPersist, 64);
n->scratch = _mm_malloc(sizeofScratch, 64);
assert(!H264E_init(n->enc, &params));
n->firstFrame = true;
return 1;
}
int encodeh264_stop(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
h264e_thread_pool_close(n->threadpool, n->threadpoolsize);
return 1;
}
static bool contains_nal(uint8_t *data, size_t sz, int nalType) {
uint8_t *dataEnd = data + sz;
int zeros = 0;
while(data != dataEnd) {
if(*data == 0) {
zeros++;
} else if((zeros == 2 || zeros == 3) && *data == 1 && data + 1 != dataEnd && (*(data + 1) & 0x1F) == nalType) {
return true;
} else {
zeros = 0;
}
data++;
}
return false;
}
static uint8_t *find_nal(uint8_t *data, uint8_t *dataEnd, int *nalType) {
int zeros = 0;
while(data != dataEnd) {
if(*data == 0) {
zeros++;
} else if((zeros == 2 || zeros == 3) && *data == 1 && data + 1 != dataEnd) {
*nalType = (*(data + 1)) & 0x1F;
return data - zeros;
} else {
zeros = 0;
}
data++;
}
return dataEnd;
}
static size_t delete_nals(uint8_t *data, size_t sz, int targetNalType) {
uint8_t *dataStart = data;
uint8_t *dataEnd = data + sz;
while(1) {
int nalType;
uint8_t *start = find_nal(data, dataEnd, &nalType);
if(start == dataEnd) {
break;
}
if(nalType == targetNalType) {
uint8_t *start2 = find_nal(start + 3, dataEnd, &nalType);
memmove(start, start2, dataEnd - start2);
dataEnd -= start2 - start;
}
data = start + 3;
}
return dataEnd - dataStart;
}
int encodeh264_perform(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
CHiImage *img = CHi_Crawl(&pub->sinks[0])->data.sample;
size_t strideY = (img->width + 15) & ~15;
size_t strideU = (((img->width + 1) / 2) + 15) & ~15;
size_t strideV = (((img->width + 1) / 2) + 15) & ~15;
uint8_t *outY = malloc(strideY * img->height + 15);
uint8_t *outU = malloc(strideU * ((img->height + 1) / 2) + 15);
uint8_t *outV = malloc(strideV * ((img->height + 1) / 2) + 15);
bgra64toycbcr(img->data8, img->stride, img->width, img->height, outY, outU, outV, strideY, strideU, strideV);
H264E_run_param_t params;
memset(&params, 0, sizeof(params));
params.encode_speed = H264E_SPEED_FASTEST;
params.frame_type = 0;
params.desired_frame_bytes = 10000;
params.qp_min = 10;
params.qp_max = 30;
params.desired_nalu_bytes = 0;
size_t sizeofCodedData = 0;
uint8_t *codedData = NULL;
assert(!H264E_encode(n->enc, n->scratch, &params, &(H264E_io_yuv_t) {
.yuv = {outY, outU, outV},
.stride = {strideY, strideU, strideV}
}, &codedData, &sizeofCodedData));
free(outY);
free(outU);
free(outV);
/* We only want SPS and PPS NALs once */
if(!n->firstFrame) {
sizeofCodedData = delete_nals(codedData, sizeofCodedData, 7);
sizeofCodedData = delete_nals(codedData, sizeofCodedData, 8);
}
CHiBSFrames *frames = malloc(sizeof(*frames) + sizeof(CHiBSFrame));
frames->count = 1;
frames->data[0].timestamp = CHi_Time_Get(pub->ng) * 1000;
frames->data[0].sz = sizeofCodedData;
frames->data[0].ptr = codedData;
frames->data[0].flags = 0;
if(contains_nal(codedData, sizeofCodedData, 5)) {
frames->data[0].flags |= CUTIHI_BS_FLAG_KEY;
}
if(n->firstFrame) {
frames->data[0].flags |= CUTIHI_BS_SETUP_PACKET;
}
pub->sources[0].type = CUTIHI_VAL_H264BS;
pub->sources[0].data.bitstream = frames;
n->firstFrame = false;
return 1;
}
CUTIVIS CHiPubNode *CHi_EncodeH264() {
Internal *ret = calloc(1, sizeof(*ret));
ret->pub.type = CUTIHI_T('CEnc', 'H264');
ret->pub.Start = encodeh264_start;
ret->pub.Perform = encodeh264_perform;
ret->pub.Stop = encodeh264_stop;
ret->pub.sinks = calloc(sizeof(*ret->pub.sinks), ret->pub.sinkCount = 1);
ret->pub.sources = calloc(sizeof(*ret->pub.sources), ret->pub.sourceCount = 1);
return &ret->pub;
}

573
hi/h264enc_sys.c Normal file
View File

@ -0,0 +1,573 @@
#ifndef __LGE_SYSTEM_H__
#define __LGE_SYSTEM_H__
#ifdef _WIN32
#include <windows.h>
typedef DWORD THREAD_RET;
#define THRAPI __stdcall
#include <stdint.h>
#else //_WIN32
#include <stdint.h>
#include <pthread.h>
typedef void * THREAD_RET;
typedef THREAD_RET (*PTHREAD_START_ROUTINE)(void *lpThreadParameter);
typedef PTHREAD_START_ROUTINE LPTHREAD_START_ROUTINE;
typedef pthread_mutex_t CRITICAL_SECTION, *PCRITICAL_SECTION, *LPCRITICAL_SECTION;
#define THRAPI
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
typedef void * HANDLE;
#define MAXIMUM_WAIT_OBJECTS 64
#define INFINITE (uint32_t)(-1)
#define WAIT_FAILED (-1)
#define WAIT_TIMEOUT 0x102
#define WAIT_OBJECT 0
#define WAIT_OBJECT_0 0
#define WAIT_ABANDONED 128
#define WAIT_ABANDONED_0 128
#endif //_WIN32
#ifdef __cplusplus
extern "C" {
#else
#ifndef bool
#define bool int
#endif
#endif
HANDLE event_create(bool manualReset, bool initialState);
bool event_destroy(HANDLE event);
#ifndef _WIN32
#define SetEvent event_set
#define ResetEvent event_reset
#define WaitForSingleObject event_wait
#define WaitForMultipleObjects event_wait_multiple
bool event_set(HANDLE event);
bool event_reset(HANDLE event);
int event_wait(HANDLE event, uint32_t milliseconds);
int event_wait_multiple(uint32_t count, const HANDLE *events, bool waitAll, uint32_t milliseconds);
bool InitializeCriticalSection(LPCRITICAL_SECTION lpCriticalSection);
bool DeleteCriticalSection(LPCRITICAL_SECTION lpCriticalSection);
bool EnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection);
bool LeaveCriticalSection(LPCRITICAL_SECTION lpCriticalSection);
#else
#define event_set SetEvent
#define event_reset ResetEvent
#define event_wait WaitForSingleObject
#define event_wait_multiple WaitForMultipleObjects
#endif
HANDLE thread_create(LPTHREAD_START_ROUTINE lpStartAddress, void *lpParameter);
bool thread_close(HANDLE thread);
void *thread_wait(HANDLE thread);
bool thread_name(const char *name);
void thread_sleep(uint32_t milliseconds);
uint64_t GetTime();
#ifdef __cplusplus
}
#endif
#endif //__LGE_SYSTEM_H__
#ifndef _WIN32
#include <stdlib.h>
#include <time.h>
#include <errno.h>
#include <unistd.h>
#if defined(__linux) || defined(__linux__)
#include <sys/prctl.h>
#endif
#define nullptr 0
typedef struct Event Event;
typedef struct Event
{
Event * volatile pMultipleCond;
pthread_mutex_t mutex;
pthread_cond_t cond;
volatile bool signaled;
bool manual_reset;
} Event;
static bool InitEvent(Event *e)
{
#if (defined(ANDROID) && !defined(__LP64__)) || defined(__APPLE__)
if (pthread_cond_init(&e->cond, NULL))
return FALSE;
#else
pthread_condattr_t attr;
if (pthread_condattr_init(&attr))
return FALSE;
if (pthread_condattr_setclock(&attr, CLOCK_MONOTONIC))
{
pthread_condattr_destroy(&attr);
return FALSE;
}
if (pthread_cond_init(&e->cond, &attr))
{
pthread_condattr_destroy(&attr);
return FALSE;
}
pthread_condattr_destroy(&attr);
#endif
if (pthread_mutex_init(&e->mutex, NULL))
{
pthread_cond_destroy(&e->cond);
return FALSE;
}
e->pMultipleCond = NULL;
return TRUE;
}
#ifdef __APPLE__
#include <mach/mach_time.h>
static inline uint64_t GetAbsTimeInNanoseconds()
{
static mach_timebase_info_data_t g_timebase_info;
if (g_timebase_info.denom == 0)
mach_timebase_info(&g_timebase_info);
return mach_absolute_time()*g_timebase_info.numer/g_timebase_info.denom;
}
#endif
static inline void GetAbsTime(struct timespec *ts, uint32_t timeout)
{
#if defined(__APPLE__)
uint64_t cur_time = GetAbsTimeInNanoseconds();
ts->tv_sec = cur_time/1000000000u + timeout/1000u;
ts->tv_nsec = (cur_time % 1000000000u) + (timeout % 1000u)*1000000u;
#else
clock_gettime(CLOCK_MONOTONIC, ts);
ts->tv_sec += timeout/1000u;
ts->tv_nsec += (timeout % 1000u)*1000000u;
#endif
if (ts->tv_nsec >= 1000000000)
{
ts->tv_nsec -= 1000000000;
ts->tv_sec++;
}
}
static inline int CondTimedWait(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec *abstime)
{
#if defined(ANDROID) && !defined(__LP64__)
return pthread_cond_timedwait_monotonic_np(cond, mutex, abstime);
#elif defined(__APPLE__)
struct timespec reltime;
uint64_t cur_time = GetAbsTimeInNanoseconds();
reltime.tv_sec = abstime->tv_sec - cur_time/1000000000u;
reltime.tv_nsec = abstime->tv_nsec - (cur_time % 1000000000u);
if (reltime.tv_nsec < 0)
{
reltime.tv_nsec += 1000000000;
reltime.tv_sec--;
}
if ((reltime.tv_sec < 0) || ((reltime.tv_sec == 0) && (reltime.tv_nsec == 0)))
return ETIMEDOUT;
return pthread_cond_timedwait_relative_np(cond, mutex, &reltime);
#else
return pthread_cond_timedwait(cond, mutex, abstime);
#endif
}
static bool WaitForEvent(Event *e, uint32_t timeout, bool *signaled)
{
if (pthread_mutex_lock(&e->mutex))
return FALSE;
if (timeout == INFINITE)
{
while (!e->signaled)
pthread_cond_wait(&e->cond, &e->mutex);
} else if (timeout != 0)
{
struct timespec t;
GetAbsTime(&t, timeout);
while (!e->signaled)
{
if (CondTimedWait(&e->cond, &e->mutex, &t))
break;
}
}
*signaled = e->signaled;
if (!e->manual_reset)
e->signaled = FALSE;
if (pthread_mutex_unlock(&e->mutex))
return FALSE;
return TRUE;
}
static bool WaitForMultipleEvents(Event **e, uint32_t count, uint32_t timeout, bool waitAll, int *signaled_num)
{
uint32_t i;
#define PTHR(func, num) for (i = num; i < count; i++)\
if (func(&e[i]->mutex))\
return FALSE;
PTHR(pthread_mutex_lock, 0);
int sig_num = -1;
if (timeout == 0)
{
#define CHECK_SIGNALED \
if (waitAll)\
{\
for (i = 0; i < count; i++)\
if (!e[i]->signaled)\
break;\
if (i == count)\
for (i = 0; i < count; i++)\
{\
if (sig_num < 0 && e[i]->signaled)\
sig_num = (int)i;\
if (!e[i]->manual_reset)\
e[i]->signaled = FALSE;\
}\
} else\
{\
for (i = 0; i < count; i++)\
if (e[i]->signaled)\
{\
sig_num = (int)i;\
if (!e[i]->manual_reset)\
e[i]->signaled = FALSE;\
break;\
}\
}
CHECK_SIGNALED;
} else
if (timeout == INFINITE)
{
#define SET_MULTIPLE(val) for (i = 1; i < count; i++)\
e[i]->pMultipleCond = val;
SET_MULTIPLE(e[0]);
for (;;)
{
CHECK_SIGNALED;
if (sig_num >= 0)
break;
PTHR(pthread_mutex_unlock, 1);
pthread_cond_wait(&e[0]->cond, &e[0]->mutex);
PTHR(pthread_mutex_lock, 1);
}
SET_MULTIPLE(0);
} else
{
SET_MULTIPLE(e[0]);
struct timespec t;
GetAbsTime(&t, timeout);
for (;;)
{
CHECK_SIGNALED;
if (sig_num >= 0)
break;
PTHR(pthread_mutex_unlock, 1);
int res = CondTimedWait(&e[0]->cond, &e[0]->mutex, &t);
PTHR(pthread_mutex_lock, 1);
if (res)
break;
}
SET_MULTIPLE(0);
}
PTHR(pthread_mutex_unlock, 0);
*signaled_num = sig_num;
return TRUE;
}
HANDLE event_create(bool manualReset, bool initialState)
{
Event *e = (Event *)malloc(sizeof(*e));
if (!e)
return NULL;
if (!InitEvent(e))
{
free(e);
return NULL;
}
e->manual_reset = manualReset;
e->signaled = initialState;
return (HANDLE)e;
}
bool event_destroy(HANDLE event)
{
Event *e = (Event *)event;
if (!e)
return FALSE;
if (pthread_cond_destroy(&e->cond))
return FALSE;
if (pthread_mutex_destroy(&e->mutex))
return FALSE;
free((void *)e);
return TRUE;
}
bool event_set(HANDLE event)
{
Event *e = (Event *)event;
if (pthread_mutex_lock(&e->mutex))
return FALSE;
Event *pMultipleCond = e->pMultipleCond;
e->signaled = TRUE;
if (pthread_cond_signal(&e->cond))
return FALSE;
if (pthread_mutex_unlock(&e->mutex))
return FALSE;
if (pMultipleCond && pMultipleCond != e)
{
if (pthread_mutex_lock(&pMultipleCond->mutex))
return FALSE;
if (pthread_cond_signal(&pMultipleCond->cond))
return FALSE;
if (pthread_mutex_unlock(&pMultipleCond->mutex))
return FALSE;
}
return TRUE;
}
bool event_reset(HANDLE event)
{
Event *e = (Event *)event;
if (pthread_mutex_lock(&e->mutex))
return FALSE;
e->signaled = FALSE;
if (pthread_mutex_unlock(&e->mutex))
return FALSE;
return TRUE;
}
int event_wait(HANDLE event, uint32_t milliseconds)
{
bool signaled;
if (!WaitForEvent((Event *)event, milliseconds, &signaled))
return WAIT_FAILED;
return signaled ? WAIT_OBJECT : WAIT_TIMEOUT;
}
int event_wait_multiple(uint32_t count, const HANDLE *events, bool waitAll, uint32_t milliseconds)
{
if (count == 1)
return event_wait(events[0], milliseconds);
int signaled_num = -1;
if (!WaitForMultipleEvents((Event **)events, count, milliseconds, waitAll, &signaled_num))
return WAIT_FAILED;
return (signaled_num < 0) ? WAIT_TIMEOUT : (WAIT_OBJECT_0 + signaled_num);
}
bool InitializeCriticalSection(LPCRITICAL_SECTION lpCriticalSection)
{
pthread_mutexattr_t ma;
if (pthread_mutexattr_init(&ma))
return FALSE;
if (pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_RECURSIVE))
{
pthread_mutexattr_destroy(&ma);
return FALSE;
}
if (pthread_mutex_init((pthread_mutex_t *)lpCriticalSection, &ma))
{
pthread_mutexattr_destroy(&ma);
return FALSE;
}
if (pthread_mutexattr_destroy(&ma))
return FALSE;
return TRUE;
}
bool DeleteCriticalSection(LPCRITICAL_SECTION lpCriticalSection)
{
if (pthread_mutex_destroy((pthread_mutex_t *)lpCriticalSection))
return FALSE;
return TRUE;
}
bool EnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection)
{
if (pthread_mutex_lock((pthread_mutex_t *)lpCriticalSection))
return FALSE;
return TRUE;
}
bool LeaveCriticalSection(LPCRITICAL_SECTION lpCriticalSection)
{
if (pthread_mutex_unlock((pthread_mutex_t *)lpCriticalSection))
return FALSE;
return TRUE;
}
HANDLE thread_create(LPTHREAD_START_ROUTINE lpStartAddress, void *lpParameter)
{
pthread_t *t = (pthread_t *)malloc(sizeof(*t));
if (!t)
return nullptr;
if (pthread_create(t, 0, lpStartAddress, lpParameter))
{
free(t);
return nullptr;
}
//if (lpThreadId)
// *lpThreadId = (uint32_t)*t;
return (HANDLE)t;
}
bool thread_close(HANDLE thread)
{
if (!thread)
return FALSE;
pthread_t *t = (pthread_t *)thread;
if (*t)
pthread_detach(*t);
free(t);
return TRUE;
}
void *thread_wait(HANDLE thread)
{
if (!thread)
return (void*)-1;
void *ret = 0;
pthread_t *t = (pthread_t *)thread;
if (!*t)
return ret;
int res = pthread_join(*t, &ret);
if (res)
return (void*)-1;
#if 0
if (timeout == 0)
{
int res = pthread_tryjoin_np(*t, &ret);
if (res)
return FALSE;
} else
if (timeout == INFINITE)
{
int res = pthread_join(*t, &ret);
if (res)
return FALSE;
} else
{
struct timespec ts;
GetAbsTime(&ts, timeout);
int res = pthread_timedjoin_np(*t, &ret, &ts);
if (res)
return FALSE;
}
#endif
*t = 0; // thread joined - no need to detach
return ret;
}
#else //_WIN32
HANDLE thread_create(LPTHREAD_START_ROUTINE lpStartAddress, void *lpParameter)
{
DWORD tid;
return CreateThread(0, 0, lpStartAddress, lpParameter, 0, &tid);
}
HANDLE event_create(bool manualReset, bool initialState)
{
return CreateEvent(0, manualReset, initialState, 0);
}
bool event_destroy(HANDLE event)
{
CloseHandle(event);
return TRUE;
}
bool thread_close(HANDLE thread)
{
CloseHandle(thread);
return TRUE;
}
void *thread_wait(HANDLE thread)
{
if (WaitForSingleObject(thread, INFINITE) == WAIT_OBJECT_0)
{
DWORD ExitCode;
GetExitCodeThread(thread, &ExitCode);
return (void *)(intptr_t)ExitCode;
}
return (void *)(intptr_t)-1;
}
#endif //_WIN32
bool thread_name(const char *name)
{
#ifdef _WIN32
#ifdef _MSC_VER
struct tagTHREADNAME_INFO
{
DWORD dwType;
LPCSTR szName;
DWORD dwThreadID;
DWORD dwFlags;
} info = { 0x1000, name, (DWORD)-1, 0 };
__try
{
RaiseException(0x406D1388, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info);
}
__except(EXCEPTION_EXECUTE_HANDLER)
{
}
#endif
return TRUE;
#elif defined(__linux) || defined(__linux__)
return (0 == prctl(PR_SET_NAME, name, 0, 0, 0));
//return (0 == pthread_setname_np(pthread_self(), name));
#else // macos, ios
return (0 == pthread_setname_np(name));
#endif
}
void thread_sleep(uint32_t milliseconds)
{
#ifdef _WIN32
Sleep(milliseconds);
#else
usleep((useconds_t)milliseconds*1000);
#endif
}
uint64_t GetTime()
{
uint64_t time;
#ifdef _WIN32
GetSystemTimeAsFileTime((FILETIME*)&time);
time = time/10 - 11644473600000000;
#elif defined(__APPLE__)
time = GetAbsTimeInNanoseconds() / 1000u;
#else
struct timespec ts;
// CLOCK_PROCESS_CPUTIME_ID CLOCK_THREAD_CPUTIME_ID
clock_gettime(CLOCK_MONOTONIC, &ts);
time = (uint64_t)ts.tv_sec * 1000000u + ts.tv_nsec / 1000u;
#endif
return time;
}

301
hi/rtmp.c Normal file
View File

@ -0,0 +1,301 @@
#include"node.h"
#include<stdio.h>
#include<assert.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<librtmp/rtmp.h>
#include<librtmp/log.h>
#include<arpa/inet.h>
#include<sys/select.h>
#include"img.h"
#define NALLENSZ 4
static size_t annexb_parse(const uint8_t *src, const uint8_t *srcEnd) {
int zeros = 0;
const uint8_t *src2;
for(src2 = src; src2 != srcEnd; src2++) {
if(*src2 == 0) {
zeros++;
} else if((zeros == 2 || zeros == 3) && *src2 == 1) {
src2 -= zeros;
break;
} else {
zeros = 0;
}
}
return src2 - src;
}
static uint8_t *annexb_to_extradata(const uint8_t *src, const uint8_t *srcEnd, size_t *szRet, size_t *srcSzEnd) {
const uint8_t *sps = src;
while(*sps == 0) sps++;
assert(sps[0] == 1);
sps++;
size_t szSps = annexb_parse(sps, srcEnd);
const uint8_t *pps = sps + szSps;
while(*pps == 0) pps++;
assert(pps[0] == 1);
pps++;
size_t szPps = annexb_parse(pps, srcEnd);
uint8_t *ret = malloc(*szRet = (6 + 2 + szSps + 1 + 2 + szPps));
ret[0] = 1;
ret[1] = sps[1];
ret[2] = sps[2];
ret[3] = sps[3];
ret[4] = 0xFC | (NALLENSZ - 1);
ret[5] = 0xE0 | 1;
ret[6] = szSps >> 8;
ret[7] = szSps & 0xFF;
memcpy(&ret[8], sps, szSps);
ret[8 + szSps + 0] = 1;
ret[8 + szSps + 1] = szPps >> 8;
ret[8 + szSps + 2] = szPps & 0xFF;
memcpy(&ret[8 + szSps + 3], pps, szPps);
*srcSzEnd = pps + szPps - src;
return ret;
}
static uint8_t *annexb_to_avcc(const uint8_t *src, size_t szSrc, size_t *szRet) {
size_t cap = 4096, sz = 0;
uint8_t *ret = malloc(cap);
const uint8_t *srcEnd = src + szSrc;
while(src != srcEnd) {
assert(*src == 0);
while(*src == 0) {
src++;
}
assert(*src == 1);
src++;
size_t nalSize = annexb_parse(src, srcEnd);
size_t additionSz = NALLENSZ + nalSize;
if(sz + additionSz > cap) {
ret = realloc(ret, cap = (sz + additionSz));
}
*(uint32_t*) &ret[sz] = htonl(nalSize);
memcpy(&ret[sz + NALLENSZ], src, nalSize);
sz += additionSz;
src += nalSize;
}
*szRet = sz;
return ret;
}
typedef struct {
CHiPubNode pub;
RTMP *rtmp;
RTMPPacket rtmppkt;
} Internal;
static int streamrtmp_start(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
n->rtmp = RTMP_Alloc();
if(!n->rtmp) return 0;
RTMP_Init(n->rtmp);
RTMP_LogSetLevel(RTMP_LOGINFO);
RTMP_LogSetOutput(stderr);
RTMP_SetupURL(n->rtmp, CHi_Crawl(&pub->sinks[2])->data.text);
RTMP_EnableWrite(n->rtmp);
if(!RTMP_Connect(n->rtmp, NULL)) {
return 0;
}
if(!RTMP_ConnectStream(n->rtmp, 0)) {
return 0;
}
memset(&n->rtmppkt, 0, sizeof(n->rtmppkt));
RTMPPacket_Alloc(&n->rtmppkt, 4096);
return 1;
}
static int streamrtmp_stop(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
RTMP_Free(n->rtmp);
n->rtmp = NULL;
return 1;
}
#define FLV_TAG_HEADER_SIZE 11
#define FLV_VIDEO_HDR_SIZE 5
#define FLV_AUDIO_HDR_SIZE 2
#define FLV_PREV_TAG_SIZE_SIZE 4
static int do_video(Internal *n) {
if(!CHi_Crawl(&n->pub.sinks[0]) || !CHi_Crawl(&n->pub.sinks[0])->data.bitstream) {
return 1;
}
CHiBSFrames *frames = CHi_Crawl(&n->pub.sinks[0])->data.bitstream;
for(size_t fi = 0; fi < frames->count; fi++) {
CHiBSFrame *f = &frames->data[fi];
size_t avccSize;
uint8_t *avcc;
if(f->flags & CUTIHI_BS_SETUP_PACKET) {
size_t annexbextradatasrcsize;
avcc = annexb_to_extradata(f->ptr, f->ptr + f->sz, &avccSize, &annexbextradatasrcsize);
} else {
avcc = annexb_to_avcc(f->ptr, f->sz, &avccSize);
}
size_t dataSize = FLV_VIDEO_HDR_SIZE + avccSize;
size_t tagSize = FLV_TAG_HEADER_SIZE + dataSize;
size_t rtmpPacketSize = tagSize + FLV_PREV_TAG_SIZE_SIZE;
uint8_t *packet = malloc(rtmpPacketSize);
// Tag
packet[0] = 9; // video
packet[1] = (dataSize >> 16) & 0xFF;
packet[2] = (dataSize >> 8) & 0xFF;
packet[3] = (dataSize >> 0) & 0xFF;
packet[4] = (f->timestamp >> 16) & 0xFF;
packet[5] = (f->timestamp >> 8) & 0xFF;
packet[6] = (f->timestamp >> 0) & 0xFF;
packet[7] = (f->timestamp >> 24) & 0xFF;
packet[8] = 0;
packet[9] = 0;
packet[10] = 0;
// Video Header
packet[11] = (f->flags & CUTIHI_BS_FLAG_KEY) ? 0x17 : 0x27;
packet[12] = (f->flags & CUTIHI_BS_SETUP_PACKET) ? 0 : 1;
packet[13] = 0;
packet[14] = 0;
packet[15] = 0;
memcpy(&packet[16], avcc, avccSize);
packet[16 + avccSize + 0] = (tagSize >> 24) & 0xFF;
packet[16 + avccSize + 1] = (tagSize >> 16) & 0xFF;
packet[16 + avccSize + 2] = (tagSize >> 8) & 0xFF;
packet[16 + avccSize + 3] = (tagSize >> 0) & 0xFF;
RTMP_Write(n->rtmp, packet, rtmpPacketSize);
free(packet);
free(avcc);
}
return 1;
}
static int do_audio(Internal *n) {
if(!CHi_Crawl(&n->pub.sinks[1]) || !CHi_Crawl(&n->pub.sinks[1])->data.bitstream) {
return 1;
}
CHiBSFrames *frames = CHi_Crawl(&n->pub.sinks[1])->data.bitstream;
for(size_t fi = 0; fi < frames->count; fi++) {
CHiBSFrame *f = &frames->data[fi];
size_t avccSize = f->sz;
uint8_t *avcc = f->ptr;
size_t dataSize = FLV_AUDIO_HDR_SIZE + avccSize;
size_t tagSize = FLV_TAG_HEADER_SIZE + dataSize;
size_t rtmpPacketSize = tagSize + FLV_PREV_TAG_SIZE_SIZE;
uint8_t *packet = malloc(rtmpPacketSize);
// Tag
packet[0] = 8; // audio
packet[1] = (dataSize >> 16) & 0xFF;
packet[2] = (dataSize >> 8) & 0xFF;
packet[3] = (dataSize >> 0) & 0xFF;
packet[4] = (f->timestamp >> 16) & 0xFF;
packet[5] = (f->timestamp >> 8) & 0xFF;
packet[6] = (f->timestamp >> 0) & 0xFF;
packet[7] = (f->timestamp >> 24) & 0xFF;
packet[8] = 0;
packet[9] = 0;
packet[10] = 0;
// Audio Header
packet[11] = (1 << 0) | (1 << 1) | (3 << 2) | (10 << 4);
packet[12] = (f->flags & CUTIHI_BS_SETUP_PACKET) ? 0 : 1;
memcpy(&packet[13], avcc, avccSize);
packet[13 + avccSize + 0] = (tagSize >> 24) & 0xFF;
packet[13 + avccSize + 1] = (tagSize >> 16) & 0xFF;
packet[13 + avccSize + 2] = (tagSize >> 8) & 0xFF;
packet[13 + avccSize + 3] = (tagSize >> 0) & 0xFF;
RTMP_Write(n->rtmp, packet, rtmpPacketSize);
free(packet);
}
return 1;
}
static int streamrtmp_perform(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
if(!do_video(n)) return 0;
if(!do_audio(n)) return 0;
int fd = RTMP_Socket(n->rtmp);
fd_set sockset;
struct timeval timeout = {};
FD_ZERO(&sockset);
FD_SET(fd, &sockset);
int result = select(fd + 1, &sockset, NULL, NULL, &timeout);
if(result == 1 && FD_ISSET(fd, &sockset)) {
RTMP_ReadPacket(n->rtmp, &n->rtmppkt);
if(!RTMPPacket_IsReady(&n->rtmppkt)) {
RTMP_ClientPacket(n->rtmp, &n->rtmppkt);
}
}
return 1;
}
CUTIVIS CHiPubNode *CHi_StreamRTMP() {
Internal *ret = calloc(1, sizeof(*ret));
ret->pub.type = CUTIHI_T('CStr', 'RTMP');
ret->pub.Start = streamrtmp_start;
ret->pub.Perform = streamrtmp_perform;
ret->pub.Stop = streamrtmp_stop;
ret->pub.sinks = calloc(sizeof(*ret->pub.sinks), ret->pub.sinkCount = 3);
ret->pub.sources = calloc(sizeof(*ret->pub.sources), ret->pub.sourceCount = 0);
return &ret->pub;
}

182
hi/vpxenc.c Normal file
View File

@ -0,0 +1,182 @@
#include"node.h"
#include<vpx/vpx_encoder.h>
#include<vpx/vp8cx.h>
#include<assert.h>
#include"mode.h"
#include"img.h"
#include<math.h>
#include<smmintrin.h>
#include<string.h>
#include"minitrace.h"
#include"linearity.h"
#include"yuv.h"
typedef struct CHiEncodeVP9Node {
CHiPubNode pub;
vpx_codec_ctx_t codec;
vpx_codec_enc_cfg_t cfg;
enum {
WAITING, IN_PROGRESS
} state;
uint8_t *outY, *outU, *outV;
uint16_t strideY, strideU, strideV;
vpx_codec_iface_t *iface;
} CHiEncodeVP9Node;
static int encodevpx_perform(CHiPubNode *pub) {
CHiEncodeVP9Node *node = (void*) pub;
MTR_BEGIN("CHi", "encodevp9_perform");
pub->sources[0].type = CUTIHI_VAL_VP9BS;
pub->sources[0].data.bitstream = NULL;
if(node->state == WAITING) return 1;
CHiImage *rgbIn = (CHiImage*) CHi_Crawl(&pub->sinks[0])->data.sample;
bgra64toycbcr(rgbIn->data8, rgbIn->stride, rgbIn->width, rgbIn->height, node->outY, node->outU, node->outV, node->strideY, node->strideU, node->strideV);
vpx_image_t vpxraw;
vpxraw.fmt = VPX_IMG_FMT_I420;
vpxraw.cs = VPX_CS_BT_709;
vpxraw.range = VPX_CR_STUDIO_RANGE;
vpxraw.bit_depth = 8;
vpxraw.w = vpxraw.d_w = node->cfg.g_w;
vpxraw.h = vpxraw.d_h = node->cfg.g_h;
vpxraw.r_w = vpxraw.r_h = 0;
vpxraw.x_chroma_shift = vpxraw.y_chroma_shift = 1;
vpxraw.img_data_owner = 0;
vpxraw.self_allocd = 0;
vpxraw.bps = 12;
vpxraw.stride[VPX_PLANE_Y] = node->strideY;
vpxraw.planes[VPX_PLANE_Y] = node->outY;
vpxraw.stride[VPX_PLANE_U] = node->strideU;
vpxraw.planes[VPX_PLANE_U] = node->outU;
vpxraw.stride[VPX_PLANE_V] = node->strideV;
vpxraw.planes[VPX_PLANE_V] = node->outV;
vpx_codec_encode(&node->codec, &vpxraw, CHi_Time_Get(pub->ng) * 1000.f, 1, 0, VPX_DL_REALTIME);
CHiBSFrames *ret = malloc(sizeof(CHiBSFrames));
ret->count = 0;
vpx_codec_iter_t iter = NULL;
const vpx_codec_cx_pkt_t *pkt;
while((pkt = vpx_codec_get_cx_data(&node->codec, &iter)) != NULL) {
if(pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
ret = (CHiBSFrames*) realloc(ret, sizeof(CHiBSFrames) + sizeof(CHiBSFrame) * (ret->count + 1));
ret->data[ret->count].timestamp = pkt->data.frame.pts;
ret->data[ret->count].sz = pkt->data.frame.sz;
ret->data[ret->count].flags = pkt->data.frame.flags & VPX_FRAME_IS_KEY;
ret->data[ret->count].ptr = malloc(ret->data[ret->count].sz);
memcpy(ret->data[ret->count].ptr, pkt->data.frame.buf, ret->data[ret->count].sz);
ret->count++;
}
}
pub->sources[0].data.bitstream = ret;
MTR_END("CHi", "encodevp9_perform");
return 1;
}
static void encodevpx_destroy(CHiPubNode *pub) {
CHiEncodeVP9Node *node = (void*) pub;
free(node);
}
static int encodevpx_start(CHiPubNode *pubn) {
CHiEncodeVP9Node *node = (void*) pubn;
node->state = IN_PROGRESS;
CHiImage *firstFrame = (CHiImage*) CHi_Crawl(&pubn->sinks[0])->data.sample;
vpx_codec_enc_config_default(node->iface, &node->cfg, 0);
node->cfg.g_w = firstFrame->width;
node->cfg.g_h = firstFrame->height;
node->cfg.g_timebase.num = 1;
node->cfg.g_timebase.den = 30;
node->cfg.g_lag_in_frames = 0;
node->cfg.g_threads = 8;
node->cfg.kf_mode = VPX_KF_AUTO;
node->cfg.kf_max_dist = 300;
node->cfg.rc_end_usage = VPX_VBR;
node->cfg.rc_target_bitrate = 512;
node->cfg.rc_min_quantizer = 4;
node->cfg.rc_max_quantizer = 48;
vpx_codec_enc_init(&node->codec, node->iface, &node->cfg, 0);
vpx_codec_control(&node->codec, VP8E_SET_CPUUSED, 8);
vpx_codec_control(&node->codec, VP9E_SET_ROW_MT, 1);
vpx_codec_control(&node->codec, VP9E_SET_TILE_COLUMNS, 2);
vpx_codec_control(&node->codec, VP9E_SET_TILE_ROWS, 1);
vpx_codec_control(&node->codec, VP9E_SET_TUNE_CONTENT, VP9E_CONTENT_SCREEN);
node->strideY = (node->cfg.g_w + 64) & ~63;
node->strideU = (node->cfg.g_w / 2 + 64) & ~63;
node->strideV = (node->cfg.g_w / 2 + 64) & ~63;
node->outY = (uint8_t*) _mm_malloc(node->strideY * node->cfg.g_h, 16);
node->outU = (uint8_t*) _mm_malloc(node->strideU * node->cfg.g_h / 2, 16);
node->outV = (uint8_t*) _mm_malloc(node->strideV * node->cfg.g_h / 2, 16);
return 1;
}
static int encodevpx_stop(CHiPubNode *pubn) {
CHiEncodeVP9Node *node = (void*) pubn;
node->state = WAITING;
_mm_free(node->outY);
_mm_free(node->outU);
_mm_free(node->outV);
vpx_codec_destroy(&node->codec);
return 1;
}
CUTIVIS CHiPubNode *CHi_EncodeVP8() {
CHiEncodeVP9Node *n = calloc(1, sizeof(*n));
n->pub.type = CUTIHI_T('CEnc','GVP8');
n->pub.Start = encodevpx_start;
n->pub.Perform = encodevpx_perform;
n->pub.Stop = encodevpx_stop;
n->pub.Destroy = encodevpx_destroy;
n->pub.sinks = (CHiValue*) calloc(sizeof(*n->pub.sinks), n->pub.sinkCount = 1);
n->pub.sources = (CHiValue*) calloc(sizeof(*n->pub.sources), n->pub.sourceCount = 1);
n->state = WAITING;
n->iface = vpx_codec_vp8_cx();
return &n->pub;
}
CUTIVIS CHiPubNode *CHi_EncodeVP9() {
CHiEncodeVP9Node *n = calloc(1, sizeof(*n));
n->pub.type = CUTIHI_T('CEnc','GVP9');
n->pub.Start = encodevpx_start;
n->pub.Perform = encodevpx_perform;
n->pub.Stop = encodevpx_stop;
n->pub.Destroy = encodevpx_destroy;
n->pub.sinks = (CHiValue*) calloc(sizeof(*n->pub.sinks), n->pub.sinkCount = 1);
n->pub.sources = (CHiValue*) calloc(sizeof(*n->pub.sources), n->pub.sourceCount = 1);
n->state = WAITING;
n->iface = vpx_codec_vp9_cx();
return &n->pub;
}

259
hi/x264enc.c Normal file
View File

@ -0,0 +1,259 @@
#include"node.h"
#include<stdio.h>
#include<assert.h>
#include<stdlib.h>
#include<string.h>
#include<sys/random.h>
#include<unistd.h>
#include<fcntl.h>
#include<sys/types.h>
#include<sys/stat.h>
#include<errno.h>
#include<pthread.h>
#include"mode.h"
#include"img.h"
typedef struct {
CHiPubNode pub;
char fifoIn[256];
int fdOut;
int fdIn;
pthread_t iothread;
pthread_mutex_t iomutex;
size_t outQueueLen;
uint8_t *outQueue;
size_t inQueueLen;
size_t inQueueCap;
uint8_t *inQueue;
size_t waitingForNum;
} Internal;
void *iothread_func(void *ud) {
Internal *n = ud;
while(1) {
pthread_mutex_lock(&n->iomutex);
while(1) {
uint8_t toread[4096];
ssize_t readcount = read(n->fdIn, toread, sizeof(toread));
if(readcount == -1) {
if(errno != EAGAIN && errno != EWOULDBLOCK) {
goto error;
} else {
break;
}
}
if(n->inQueueLen + readcount > n->inQueueCap) {
n->inQueue = realloc(n->inQueue, n->inQueueCap += 8192);
}
memcpy(n->inQueue + n->inQueueLen, toread, readcount);
n->inQueueLen += readcount;
if(readcount < sizeof(toread)) {
break;
}
}
//printf("inqueue %lu outqueue %lu\n\n", n->inQueueLen, n->outQueueLen);
while(n->outQueueLen > 0) {
ssize_t wrotecount = write(n->fdOut, n->outQueue, n->outQueueLen);
if(wrotecount == -1) {
if(errno != EAGAIN && errno != EWOULDBLOCK) {
goto error;
} else {
break;
}
}
memmove(n->outQueue, n->outQueue + wrotecount, n->outQueueLen - wrotecount);
n->outQueueLen -= wrotecount;
if(wrotecount < n->outQueueLen) {
break;
}
}
pthread_mutex_unlock(&n->iomutex);
}
return NULL;
error:
pthread_mutex_unlock(&n->iomutex);
return NULL;
}
int encodeh264_start(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
CHiImage *firstFrame = CHi_Crawl(&pub->sinks[0])->data.sample;
#ifndef _WIN32
uint8_t randoms[4];
getrandom(randoms, sizeof(randoms), 0);
snprintf(n->fifoIn, sizeof(n->fifoIn), "/tmp/cuticlex264_%02X%02X%02X%02X.264", randoms[0], randoms[1], randoms[2], randoms[3]);
char res[256];
snprintf(res, sizeof(res), "%ix%i", firstFrame->width, firstFrame->height);
assert(mkfifo(n->fifoIn, 0666) == 0);
int fd[2];
pipe(fd);
if(fork() == 0) {
close(fd[1]);
dup2(fd[0], STDIN_FILENO);
close(fd[0]);
if(execlp("x264", "x264", "--profile", "main", "--tune", "zerolatency", "--preset", "ultrafast", "--input-csp", "bgra", "--input-depth", "16", "--input-res", res, "--aud", "--demuxer", "raw", "--fps", "30", "--qp", "50", "-o", n->fifoIn, "-", NULL) == -1) {
abort();
}
} else {
close(fd[0]);
n->fdOut = fd[1];
}
n->fdIn = open(n->fifoIn, O_RDONLY | O_NONBLOCK, 0);
fcntl(n->fdOut, F_SETFL, fcntl(n->fdOut, F_GETFL) | O_NONBLOCK);
#endif
n->outQueueLen = 0;
n->outQueue = NULL;
n->inQueueLen = 0;
n->inQueueCap = 8192;
n->inQueue = malloc(sizeof(*n->inQueue) * n->inQueueCap);
n->waitingForNum = 0;
assert(pthread_create(&n->iothread, NULL, iothread_func, n) == 0);
return 1;
}
int encodeh264_stop(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
close(n->fdOut);
close(n->fdIn);
unlink(n->fifoIn);
return 1;
}
static uint8_t *find_aud(uint8_t *bs, size_t len, int *sz) {
if(len <= 3) {
return NULL;
}
for(size_t i = 3; i < len; i++) {
if(bs[i] == 9 && bs[i - 1] == 1 && bs[i - 2] == 0 && bs[i - 3] == 0) {
if(i >= 4 && bs[i - 4] == 0) {
*sz = 4;
return &bs[i - 4];
} else {
*sz = 3;
return &bs[i - 3];
}
}
}
return NULL;
}
int encodeh264_perform(CHiPubNode *pub) {
Internal *n = (Internal*) pub;
pthread_mutex_lock(&n->iomutex);
{
//if(CHi_GetMode() == CUTIHI_MODE_OFFLINE || n->waitingForNum == 0) {
CHiImage *frame = CHi_Crawl(&pub->sinks[0])->data.sample;
size_t frameSize = frame->width * 8 * frame->height;
n->outQueue = realloc(n->outQueue, n->outQueueLen + frameSize);
CHi_Restride(frame->data8, n->outQueue + n->outQueueLen, frame->stride, frame->width * 8, frame->height);
n->outQueueLen += frameSize;
// n->waitingForNum++;
//}
}
CHiBSFrames *frames = calloc(1, sizeof(*frames));
while(1) {
int aud0sz;
uint8_t *aud0 = find_aud(n->inQueue, n->inQueueLen, &aud0sz);
if(aud0 == NULL) {
break;
}
// First AUD must always be at the start because it's the delimiter
assert(aud0 == n->inQueue);
int aud1sz;
uint8_t *aud1 = find_aud(n->inQueue + aud0sz, n->inQueueLen - aud0sz, &aud1sz);
// Second AUD must exist, otherwise we don't actually know if the packet's over
if(aud1 == NULL) {
break;
}
size_t framesz = aud1 - aud0;
static int nextts = 0;
nextts++;
frames = realloc(frames, sizeof(*frames) + sizeof(CHiBSFrame) * (frames->count + 1));
//frames->data[frames->count].timestamp = CHi_Time_Get(pub->ng) * 1000;
frames->data[frames->count].timestamp = nextts * 33;
frames->data[frames->count].sz = framesz - 6;
memcpy(frames->data[frames->count].ptr = malloc(framesz - 6), aud0 + 6, framesz - 6);
frames->data[frames->count].flags = 0;
frames->count++;
memmove(n->inQueue, n->inQueue + framesz, n->inQueueLen - framesz);
n->inQueueLen -= framesz;
//if(n->waitingForNum > 0) {
// n->waitingForNum--;
//}
}
pthread_mutex_unlock(&n->iomutex);
pub->sources[0].type = CUTIHI_VAL_H264BS;
pub->sources[0].data.bitstream = frames;
return 1;
}
CUTIVIS CHiPubNode *CHi_EncodeH264() {
Internal *ret = calloc(1, sizeof(*ret));
ret->pub.type = CUTIHI_T('CEnc', 'H264');
ret->pub.Start = encodeh264_start;
ret->pub.Perform = encodeh264_perform;
ret->pub.Stop = encodeh264_stop;
ret->pub.sinks = calloc(sizeof(*ret->pub.sinks), ret->pub.sinkCount = 1);
ret->pub.sources = calloc(sizeof(*ret->pub.sources), ret->pub.sourceCount = 1);
return &ret->pub;
}

127
hi/yuv.h Normal file
View File

@ -0,0 +1,127 @@
#pragma once
#include<smmintrin.h>
#include"linearity.h"
// strides must be 16-byte aligned
static inline void bgra64toycbcr(uint8_t *bgra64, size_t bgra64stride, size_t imgW, size_t imgH, uint8_t *outY, uint8_t *outU, uint8_t *outV, size_t strideY, size_t strideU, size_t strideV) {
#pragma omp parallel for simd
for(size_t y = 0; y < imgH; y += 2) {
for(size_t x = 0; x < imgW; x += 16) {
__m128i rgb, partY, partU, partV, dotY, dotU, dotV;
__m128i wipY0 = _mm_setzero_si128();
__m128i wipY1 = _mm_setzero_si128();
__m128i wipU = _mm_setzero_si128();
__m128i wipV = _mm_setzero_si128();
__m128i tempU = _mm_setzero_si128();
__m128i tempV = _mm_setzero_si128();
#define DO_DAH_DOO_DOO(LoOrHi, shufY, shufUV) \
/* Process top two */\
rgb = _mm_srli_epi16(apply_gamma_epi16(line0, _mm_set1_ps(1 / 2.2f)), 8); \
/* Start matrix multiplication (BT.709 + full->studio range) */\
partY = _mm_mullo_epi16(rgb, _mm_set_epi16(0, 47, 157, 16, 0, 47, 157, 16));\
partU = _mm_mullo_epi16(rgb, _mm_set_epi16(0, -25, -85, 110, 0, -25, -85, 110));\
partV = _mm_mullo_epi16(rgb, _mm_set_epi16(0, 110, -100, -10, 0, 110, -100, -10));\
/* Finish mat-mul with dot products */\
dotY = _mm_madd_epi16(partY, _mm_set1_epi16(1));\
dotY = _mm_hadd_epi32(dotY, _mm_setzero_si128());\
dotU = _mm_madd_epi16(partU, _mm_set1_epi16(1));\
dotU = _mm_hadd_epi32(dotU, _mm_setzero_si128());\
dotV = _mm_madd_epi16(partV, _mm_set1_epi16(1));\
dotV = _mm_hadd_epi32(dotV, _mm_setzero_si128());\
/* Insert Ys */\
wipY0 = _mm_or_si128(wipY0, _mm_shuffle_epi8(dotY, shufY));\
/* Save top UV */\
tempU = dotU;\
tempV = dotV;\
\
/* Process bottom two */\
rgb = _mm_srli_epi16(apply_gamma_epi16(line1, _mm_set1_ps(1 / 2.2f)), 8); \
/* Start matrix multiplication (BT.709 + full->studio range) */\
partY = _mm_mullo_epi16(rgb, _mm_set_epi16(0, 47, 157, 16, 0, 47, 157, 16));\
partU = _mm_mullo_epi16(rgb, _mm_set_epi16(0, -25, -85, 110, 0, -25, -85, 110));\
partV = _mm_mullo_epi16(rgb, _mm_set_epi16(0, 110, -100, -10, 0, 110, -100, -10));\
/* Finish mat-mul with dot products */\
dotY = _mm_madd_epi16(partY, _mm_set1_epi16(1));\
dotY = _mm_hadd_epi32(dotY, _mm_setzero_si128());\
dotU = _mm_madd_epi16(partU, _mm_set1_epi16(1));\
dotU = _mm_hadd_epi32(dotU, _mm_setzero_si128());\
dotV = _mm_madd_epi16(partV, _mm_set1_epi16(1));\
dotV = _mm_hadd_epi32(dotV, _mm_setzero_si128());\
/* Insert Ys */\
wipY1 = _mm_or_si128(wipY1, _mm_shuffle_epi8(dotY, shufY));\
/* Save bottom UVs */\
tempU = _mm_hadd_epi32(_mm_add_epi32(tempU, dotU), _mm_setzero_si128());\
tempV = _mm_hadd_epi32(_mm_add_epi32(tempV, dotV), _mm_setzero_si128());\
\
/* Insert UVs */\
wipU = _mm_or_si128(wipU, _mm_shuffle_epi8(_mm_srli_epi32(tempU, 2), shufUV));\
wipV = _mm_or_si128(wipV, _mm_shuffle_epi8(_mm_srli_epi32(tempV, 2), shufUV));
__m128i line0 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 0) * bgra64stride + (x + 0) * 8)); // Load two pixels
__m128i line1 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 1) * bgra64stride + (x + 0) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 5, 1),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 0) * bgra64stride + (x + 2) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 1) * bgra64stride + (x + 2) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 5, 1, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, -128));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 0) * bgra64stride + (x + 4) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 1) * bgra64stride + (x + 4) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 5, 1, -128, -128, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, -128, -128));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 0) * bgra64stride + (x + 6) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 1) * bgra64stride + (x + 6) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, 5, 1, -128, -128, -128, -128, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, -128, -128, -128));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 0) * bgra64stride + (x + 8) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 1) * bgra64stride + (x + 8) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, -128, -128, -128, -128, 5, 1, -128, -128, -128, -128, -128, -128, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, -128, -128, -128, -128));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 0) * bgra64stride + (x + 10) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 1) * bgra64stride + (x + 10) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, -128, -128, 5, 1, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 1, -128, -128, -128, -128, -128));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 0) * bgra64stride + (x + 12) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 1) * bgra64stride + (x + 12) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8(-128, -128, 5, 1, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, 1, -128, -128, -128, -128, -128, -128));
line0 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 0) * bgra64stride + (x + 14) * 8)); // Load two pixels
line1 = _mm_load_si128((__m128i*) ((uintptr_t) bgra64 + (y + 1) * bgra64stride + (x + 14) * 8)); // Load two pixels
DO_DAH_DOO_DOO(_mm_unpacklo_epi8,
_mm_set_epi8( 5, 1, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128),
_mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, 1, -128, -128, -128, -128, -128, -128, -128));
_mm_stream_si128((__m128i*) &outY[strideY * (y + 0) + x], _mm_add_epi8(_mm_set1_epi8(16), wipY0));
_mm_stream_si128((__m128i*) &outY[strideY * (y + 1) + x], _mm_add_epi8(_mm_set1_epi8(16), wipY1));
_mm_storeu_si128((__m128i*) &outU[strideU * (y / 2) + x / 2], _mm_add_epi8(wipU, _mm_set1_epi8(128)));
_mm_storeu_si128((__m128i*) &outV[strideV * (y / 2) + x / 2], _mm_add_epi8(wipV, _mm_set1_epi8(128)));
}
}
}