cuticle/hi/webmdec.cpp
2024-06-30 14:43:13 +03:00

454 lines
15 KiB
C++

#include"node.h"
#include<stdlib.h>
#include<webm/webm_parser.h>
#include<webm/file_reader.h>
#include<vpx/vpx_decoder.h>
#include<vpx/vp8dx.h>
#include<assert.h>
#include<time.h>
#include"img.h"
#include<string.h>
#include<tmmintrin.h>
#include<smmintrin.h>
#include<opus.h>
#include<math.h>
#include"minitrace.h"
#include"linearity.h"
struct CHiMovieNode;
struct CueParser : webm::Callback {
CHiMovieNode *node;
CueParser(CHiMovieNode *node) : node(node) {}
webm::Status OnInfo(const webm::ElementMetadata& metadata, const webm::Info& info) final override;
webm::Status OnTrackEntry(const webm::ElementMetadata &metadata, const webm::TrackEntry &info) override;
webm::Status OnSegmentBegin(const webm::ElementMetadata &metadata, webm::Action *action) override;
webm::Status OnCuePoint(const webm::ElementMetadata &metadata, const webm::CuePoint &cue) override;
};
struct AudioParser final : webm::Callback {
uint64_t audioTrack;
uint64_t currentClusterTimecode;
uint64_t untihl;
bool stop = true;
bool skip = false;
#define SAMPLE_ARR 48000
OpusDecoder *opus;
size_t sampleI = 0;
size_t sampleReadI = 0;
int16_t sampleArray[SAMPLE_ARR];
~AudioParser() {
if(opus) {
opus_decoder_destroy(opus);
}
}
webm::Status OnClusterBegin(const webm::ElementMetadata &metadata, const webm::Cluster &cluster, webm::Action *action) final override {
currentClusterTimecode = cluster.timecode.value();
return webm::Status(webm::Status::kOkCompleted);
}
webm::Status OnBlockBegin(const webm::ElementMetadata &metadata, const webm::Block &block, webm::Action *action) final override {
if(block.track_number != audioTrack) {
skip = true;
*action = webm::Action::kSkip;
} else {
skip = false;
if(currentClusterTimecode + block.timecode >= untihl) {
stop = true;
} else {
stop = false;
}
}
return webm::Status(webm::Status::kOkCompleted);
}
webm::Status OnSimpleBlockBegin(const webm::ElementMetadata &metadata, const webm::SimpleBlock &block, webm::Action *action) final override {
return OnBlockBegin(metadata, block, action);
}
webm::Status OnFrame(const webm::FrameMetadata &metadata, webm::Reader *reader, uint64_t *bytes_remaining) final override {
uint8_t *data = new uint8_t[metadata.size];
uint64_t actuallyRead;
reader->Read(metadata.size, data, &actuallyRead);
if(!skip) {
int16_t *f = new int16_t[6400];
int numSamples = opus_decode(opus, data, metadata.size, f, 6400, 0);
if(numSamples >= 0) {
if(SAMPLE_ARR - sampleI >= (size_t) numSamples) {
memcpy(&sampleArray[sampleI], f, sizeof(*sampleArray) * numSamples);
sampleI = (sampleI + numSamples) % SAMPLE_ARR;
} else {
memcpy(&sampleArray[sampleI], f, sizeof(*sampleArray) * (SAMPLE_ARR - sampleI));
memcpy(sampleArray, &f[SAMPLE_ARR - sampleI], sizeof(*sampleArray) * (numSamples - SAMPLE_ARR + sampleI));
sampleI = (sampleI + numSamples) % SAMPLE_ARR;
}
}
delete[] f;
}
delete[] data;
*bytes_remaining = 0;
return webm::Status{stop ? webm::Status::kOkPartial : webm::Status::kOkCompleted};
}
};
struct FrameParser final : webm::Callback {
uint64_t videoTrack, audioTrack;
uint64_t currentClusterTimecode;
uint64_t untihl;
bool skip = true;
vpx_image *lastImg = nullptr;
CHiImage *output = nullptr;
vpx_codec_ctx_t *codec;
vpx_codec_iter_t *iter;
uint64_t currentlyAt = 0;
webm::Status OnClusterBegin(const webm::ElementMetadata &metadata, const webm::Cluster &cluster, webm::Action *action) final override {
currentClusterTimecode = cluster.timecode.value();
return webm::Status(webm::Status::kOkCompleted);
}
webm::Status OnBlockBegin(const webm::ElementMetadata &metadata, const webm::Block &block, webm::Action *action) final override {
/*if(block.track_number == videoTrack) {
printf("%lu %lu %i\n", currentClusterTimecode + block.timecode, untihl, currentlyAt <= untihl && currentClusterTimecode + block.timecode >= untihl);
}*/
if(block.track_number != videoTrack) {
*action = webm::Action::kSkip;
} else {
if(currentlyAt <= untihl && currentClusterTimecode + block.timecode >= untihl) {
skip = false;
} else {
skip = true;
}
currentlyAt = currentClusterTimecode + block.timecode;
}
return webm::Status(webm::Status::kOkCompleted);
}
webm::Status OnSimpleBlockBegin(const webm::ElementMetadata &metadata, const webm::SimpleBlock &block, webm::Action *action) final override {
return OnBlockBegin(metadata, block, action);
}
webm::Status OnFrame(const webm::FrameMetadata &metadata, webm::Reader *reader, uint64_t *bytes_remaining) final override {
//printf("FRAME WITH SKIP %i\n", skip);
uint8_t *data = new uint8_t[metadata.size];
uint64_t actuallyRead;
reader->Read(metadata.size, data, &actuallyRead);
vpx_codec_decode(codec, data, metadata.size, NULL, 0);
vpx_image *img = NULL;
while((img = vpx_codec_get_frame(codec, iter)) != NULL) {
if(lastImg) vpx_img_free(lastImg);
lastImg = img;
}
if(!skip && lastImg) {
assert(lastImg->fmt & VPX_IMG_FMT_PLANAR);
output = CHi_Image_New(2, 4, 8 * ((lastImg->d_w + 15) & ~15), lastImg->d_w, lastImg->d_h, NULL);
__m128i z = _mm_set1_epi32(0);
__m128i alpha = _mm_set_epi32(0xFFFF0000, 0, 0xFFFF0000, 0);
__m128i sub16 = _mm_set1_epi32(-16);
__m128i sub128 = _mm_set1_epi32(-128);
#pragma omp parallel for simd
for(size_t y = 0; y < lastImg->d_h; y++) {
for(size_t x = 0; x < lastImg->d_w; x += 4) {
__m128i ychannel = _mm_loadu_si128((__m128i*) (lastImg->planes[VPX_PLANE_Y] + y * lastImg->stride[VPX_PLANE_Y] + x));
__m128i uchannel = _mm_loadu_si128((__m128i*) (lastImg->planes[VPX_PLANE_U] + y / 2 * lastImg->stride[VPX_PLANE_U] + x / 2));
uchannel = _mm_unpacklo_epi8(uchannel, uchannel); // stretch color channels
__m128i vchannel = _mm_loadu_si128((__m128i*) (lastImg->planes[VPX_PLANE_V] + y / 2 * lastImg->stride[VPX_PLANE_V] + x / 2));
vchannel = _mm_unpacklo_epi8(vchannel, vchannel); // stretch color channels
/* Interleave with zeroes to push out 12 of 16 pixels (we're working in groups of four) */
__m128i ylo = _mm_add_epi32(sub16, _mm_unpacklo_epi16(_mm_unpacklo_epi8(ychannel, z), z));
__m128i ulo = _mm_add_epi32(sub128, _mm_unpacklo_epi16(_mm_unpacklo_epi8(uchannel, z), z));
__m128i vlo = _mm_add_epi32(sub128, _mm_unpacklo_epi16(_mm_unpacklo_epi8(vchannel, z), z));
/* Start parallel matrix multiplication (BT.709 matrix * 255/219 to turn from studio to full range) */
/*
/ 1.164 0 1.833 \
RGB = | 1.164 -0.218 -0.545 | * (Y - 16, U - 128, V - 128)
\ 1.164 2.160 0 /
*/
__m128i partY = _mm_mullo_epi32(ylo, _mm_set1_epi32(297));
__m128i partVR = _mm_mullo_epi32(vlo, _mm_set1_epi32(467));
__m128i partUG = _mm_mullo_epi32(ulo, _mm_set1_epi32(-56));
__m128i partVG = _mm_mullo_epi32(vlo, _mm_set1_epi32(-139));
__m128i partUB = _mm_mullo_epi32(ulo, _mm_set1_epi32(551));
/* Finish matrix multiplication by summing up parts (finishing the dot products), clip */
__m128i r = _mm_max_epi32(z, _mm_min_epi32(_mm_set1_epi32(0xFFFF), _mm_add_epi32(partY, partVR)));
__m128i g = _mm_max_epi32(z, _mm_min_epi32(_mm_set1_epi32(0xFFFF), _mm_add_epi32(partY, _mm_add_epi32(partUG, partVG))));
__m128i b = _mm_max_epi32(z, _mm_min_epi32(_mm_set1_epi32(0xFFFF), _mm_add_epi32(partY, partUB)));
r = apply_gamma_epi32(r, _mm_set1_ps(2.2f));
g = apply_gamma_epi32(g, _mm_set1_ps(2.2f));
b = apply_gamma_epi32(b, _mm_set1_ps(2.2f));
__m128i rgblo = _mm_or_si128(alpha, _mm_or_si128(_mm_or_si128(_mm_unpacklo_epi32(b, z), _mm_slli_si128(_mm_unpacklo_epi32(g, z), 2)), _mm_slli_si128(_mm_unpacklo_epi32(r, z), 4)));
_mm_stream_si128((__m128i*) ((uintptr_t) output->data16 + y * output->stride + x * 8 + 0), rgblo);
__m128i rgbhi = _mm_or_si128(alpha, _mm_or_si128(_mm_or_si128(_mm_unpackhi_epi32(b, z), _mm_slli_si128(_mm_unpackhi_epi32(g, z), 2)), _mm_slli_si128(_mm_unpackhi_epi32(r, z), 4)));
_mm_stream_si128((__m128i*) ((uintptr_t) output->data16 + y * output->stride + x * 8 + 16), rgbhi);
}
}
}
delete[] data;
*bytes_remaining = 0;
webm::Status ret{skip ? webm::Status::kOkCompleted : webm::Status::kOkPartial};
skip = true;
return ret;
}
};
struct CHiMovieNode {
int64_t timeCache = -1;
char *filepathCache;
FILE *vf;
webm::FileReader vreader;
webm::WebmParser vparser;
FrameParser fp;
std::string vcodecid;
size_t vw, vh;
FILE *af;
webm::FileReader areader;
webm::WebmParser aparser;
AudioParser ap;
std::vector<webm::CuePoint> cuepoints;
uint64_t segmentOff, videoTrack, audioTrack;
double duration;
vpx_codec_ctx_t codec;
vpx_codec_iter_t iter;
CHiPubNode pub;
};
webm::Status CueParser::OnInfo(const webm::ElementMetadata &metadata, const webm::Info &info) {
node->duration = info.duration.value() / 1000;
return webm::Status(webm::Status::kOkCompleted);
}
webm::Status CueParser::OnTrackEntry(const webm::ElementMetadata &metadata, const webm::TrackEntry &info) {
if(info.track_type.value() == webm::TrackType::kVideo && info.is_enabled.value() /*&& !info.uses_lacing.value()*/) {
node->vcodecid = info.codec_id.value();
node->videoTrack = info.track_number.value();
node->vw = info.video.value().pixel_width.value();
node->vh = info.video.value().pixel_height.value();
}
if(info.track_type.value() == webm::TrackType::kAudio && info.is_enabled.value()) {
node->audioTrack = info.track_number.value();
}
return webm::Status(webm::Status::kOkCompleted);
}
webm::Status CueParser::OnSegmentBegin(const webm::ElementMetadata &metadata, webm::Action *action) {
node->segmentOff = metadata.position + metadata.header_size;
return webm::Status(webm::Status::kOkCompleted);
}
webm::Status CueParser::OnCuePoint(const webm::ElementMetadata &metadata, const webm::CuePoint &cue) {
node->cuepoints.push_back(cue);
return webm::Status(webm::Status::kOkCompleted);
}
static int movie_perform(CHiPubNode *pub) {
CHiMovieNode *node = (CHiMovieNode*) ((uintptr_t) pub - offsetof(CHiMovieNode, pub));
MTR_BEGIN("CHi", "movie_perform");
int64_t t;
if(pub->sinks[1].type == CUTIHI_VAL_NONE) t = CHi_Time_Get(pub->ng) * 1000;
else t = CHi_Crawl(&pub->sinks[1])->data.vec4[0] * 1000;
pub->sources[0].type = CUTIHI_VAL_SAMPLE;
char *filepath = CHi_Crawl(&pub->sinks[0])->data.text;
if(!node->filepathCache || strcmp(node->filepathCache, filepath) != 0) {
node->vf = fopen(filepath, "rb");
new (&node->vreader) webm::FileReader{node->vf};
new (&node->vparser) webm::WebmParser{};
node->af = fopen(filepath, "rb");
new (&node->areader) webm::FileReader{node->af};
new (&node->aparser) webm::WebmParser{};
node->cuepoints.clear();
CueParser cp{node};
node->vparser.Feed(&cp, &node->vreader);
free(node->filepathCache);
node->filepathCache = strdup(filepath);
node->timeCache = std::numeric_limits<int64_t>::max();
if(node->vcodecid == "V_VP9") {
vpx_codec_dec_init(&node->codec, vpx_codec_vp9_dx(), NULL, 0);
} else if(node->vcodecid == "V_VP8") {
vpx_codec_dec_init(&node->codec, vpx_codec_vp8_dx(), NULL, 0);
} else {
return 1;
}
new (&node->fp) FrameParser{};
node->fp.videoTrack = node->videoTrack;
node->fp.codec = &node->codec;
node->fp.iter = &node->iter;
new (&node->ap) AudioParser{};
int error;
node->ap.opus = opus_decoder_create(48000, 1, &error);
node->ap.audioTrack = node->audioTrack;
}
if(t == node->timeCache) {
return 1;
}
if(pub->sources[0].data.sample) {
CHi_Image_Free(pub->sources[0].data.sample);
pub->sources[0].data.sample = nullptr;
}
if(t >= 0 && t < 1000 * node->duration) {
if(t < node->timeCache || (t - node->timeCache) > 5000) {
if(node->cuepoints.size() > 0) {
size_t i;
for(i = 0; i < node->cuepoints.size(); i++) {
if(t < node->cuepoints[i].time.value()) {
break;
}
}
if(i != 0) i--;
for(webm::Element<webm::CueTrackPositions> &p : node->cuepoints[i].cue_track_positions) {
if(p.value().track.value() == node->videoTrack) {
fseek(node->vf, node->segmentOff + p.value().cluster_position.value(), SEEK_SET);
fseek(node->af, node->segmentOff + p.value().cluster_position.value(), SEEK_SET);
break;
}
}
} else {
fseek(node->vf, 0, SEEK_SET);
fseek(node->af, 0, SEEK_SET);
}
}
node->fp.untihl = t;
node->ap.untihl = t;
/* Always necessary for some reason, else stops parsing after seek (as in no callbacks called).. */
node->vparser.DidSeek();
node->aparser.DidSeek();
node->vparser.Feed(&node->fp, &node->vreader);
node->aparser.Feed(&node->ap, &node->areader);
pub->sources[0].data.sample = node->fp.output;
node->timeCache = t;
}
if(!pub->sources[0].data.sample) {
pub->sources[0].data.sample = CHi_Image_New(2, 4, 8 * node->vw, node->vw, node->vh, NULL);
}
size_t width = roundf(CHi_Time_GetDelta(pub->ng) * 48000);
CHiImage *aud = CHi_Image_New(4, 1, 4 * width, width, 1, NULL);
if(node->pub.ng->compilationStatus == CUTIHI_COMP_RUNNING) {
if(node->ap.sampleReadI + width > SAMPLE_ARR) {
memcpy(aud->data16, node->ap.sampleArray + node->ap.sampleReadI, sizeof(*node->ap.sampleArray) * (SAMPLE_ARR - node->ap.sampleReadI));
memcpy(aud->data16 + SAMPLE_ARR - node->ap.sampleReadI, node->ap.sampleArray, sizeof(*node->ap.sampleArray) * (width - SAMPLE_ARR + node->ap.sampleReadI));
} else {
memcpy(aud->data16, node->ap.sampleArray + node->ap.sampleReadI, sizeof(*node->ap.sampleArray) * width);
}
node->ap.sampleReadI = (node->ap.sampleReadI + width) % SAMPLE_ARR;
} else {
memset(aud->data16, 0, aud->stride * aud->height);
}
if(pub->sources[1].data.sample) CHi_Image_Free(pub->sources[1].data.sample);
pub->sources[1].type = CUTIHI_VAL_SAMPLE;
pub->sources[1].data.sample = aud;
pub->clean = 0;
MTR_END("CHi", "movie_perform");
return 1;
}
static void movie_destroy(CHiPubNode *pub) {
CHiMovieNode *node = (CHiMovieNode*) ((uintptr_t) pub - offsetof(CHiMovieNode, pub));
if(node->filepathCache) {
free(node->filepathCache);
}
if(node->af) {
fclose(node->af);
}
if(node->vf) {
fclose(node->vf);
vpx_codec_destroy(&node->codec);
}
node->~CHiMovieNode();
free(node);
}
extern "C" {
CUTIVIS CHiPubNode *CHi_Movie() {
CHiMovieNode *n = (CHiMovieNode*) calloc(1, sizeof(*n));
new (n) CHiMovieNode();
n->pub.type = CUTIHI_T('CMov','ie ');
n->pub.Perform = movie_perform;
n->pub.Destroy = movie_destroy;
n->pub.clean = 0;
n->pub.sinkCount = 2;
n->pub.sinks = (CHiValue*) calloc(sizeof(*n->pub.sinks), n->pub.sinkCount);
n->pub.sinks[1].type = CUTIHI_VAL_VEC4;
n->pub.sinks[1].data.vec4[0] = 0;
n->pub.sourceCount = 2;
n->pub.sources = (CHiValue*) calloc(sizeof(*n->pub.sources), n->pub.sourceCount);
return &n->pub;
}
}