454 lines
15 KiB
C++
454 lines
15 KiB
C++
#include"node.h"
|
|
|
|
#include<stdlib.h>
|
|
#include<webm/webm_parser.h>
|
|
#include<webm/file_reader.h>
|
|
|
|
#include<vpx/vpx_decoder.h>
|
|
#include<vpx/vp8dx.h>
|
|
|
|
#include<assert.h>
|
|
#include<time.h>
|
|
|
|
#include"img.h"
|
|
|
|
#include<string.h>
|
|
|
|
#include<tmmintrin.h>
|
|
#include<smmintrin.h>
|
|
|
|
#include<opus.h>
|
|
|
|
#include<math.h>
|
|
|
|
#include"minitrace.h"
|
|
|
|
#include"linearity.h"
|
|
|
|
struct CHiMovieNode;
|
|
struct CueParser : webm::Callback {
|
|
CHiMovieNode *node;
|
|
|
|
CueParser(CHiMovieNode *node) : node(node) {}
|
|
|
|
webm::Status OnInfo(const webm::ElementMetadata& metadata, const webm::Info& info) final override;
|
|
webm::Status OnTrackEntry(const webm::ElementMetadata &metadata, const webm::TrackEntry &info) override;
|
|
webm::Status OnSegmentBegin(const webm::ElementMetadata &metadata, webm::Action *action) override;
|
|
webm::Status OnCuePoint(const webm::ElementMetadata &metadata, const webm::CuePoint &cue) override;
|
|
};
|
|
|
|
struct AudioParser final : webm::Callback {
|
|
|
|
uint64_t audioTrack;
|
|
uint64_t currentClusterTimecode;
|
|
uint64_t untihl;
|
|
|
|
bool stop = true;
|
|
bool skip = false;
|
|
|
|
#define SAMPLE_ARR 48000
|
|
OpusDecoder *opus;
|
|
size_t sampleI = 0;
|
|
size_t sampleReadI = 0;
|
|
int16_t sampleArray[SAMPLE_ARR];
|
|
|
|
~AudioParser() {
|
|
if(opus) {
|
|
opus_decoder_destroy(opus);
|
|
}
|
|
}
|
|
|
|
webm::Status OnClusterBegin(const webm::ElementMetadata &metadata, const webm::Cluster &cluster, webm::Action *action) final override {
|
|
currentClusterTimecode = cluster.timecode.value();
|
|
return webm::Status(webm::Status::kOkCompleted);
|
|
}
|
|
webm::Status OnBlockBegin(const webm::ElementMetadata &metadata, const webm::Block &block, webm::Action *action) final override {
|
|
if(block.track_number != audioTrack) {
|
|
skip = true;
|
|
*action = webm::Action::kSkip;
|
|
} else {
|
|
skip = false;
|
|
if(currentClusterTimecode + block.timecode >= untihl) {
|
|
stop = true;
|
|
} else {
|
|
stop = false;
|
|
}
|
|
}
|
|
return webm::Status(webm::Status::kOkCompleted);
|
|
}
|
|
webm::Status OnSimpleBlockBegin(const webm::ElementMetadata &metadata, const webm::SimpleBlock &block, webm::Action *action) final override {
|
|
return OnBlockBegin(metadata, block, action);
|
|
}
|
|
webm::Status OnFrame(const webm::FrameMetadata &metadata, webm::Reader *reader, uint64_t *bytes_remaining) final override {
|
|
uint8_t *data = new uint8_t[metadata.size];
|
|
uint64_t actuallyRead;
|
|
reader->Read(metadata.size, data, &actuallyRead);
|
|
|
|
if(!skip) {
|
|
int16_t *f = new int16_t[6400];
|
|
int numSamples = opus_decode(opus, data, metadata.size, f, 6400, 0);
|
|
|
|
if(numSamples >= 0) {
|
|
if(SAMPLE_ARR - sampleI >= (size_t) numSamples) {
|
|
memcpy(&sampleArray[sampleI], f, sizeof(*sampleArray) * numSamples);
|
|
sampleI = (sampleI + numSamples) % SAMPLE_ARR;
|
|
} else {
|
|
memcpy(&sampleArray[sampleI], f, sizeof(*sampleArray) * (SAMPLE_ARR - sampleI));
|
|
memcpy(sampleArray, &f[SAMPLE_ARR - sampleI], sizeof(*sampleArray) * (numSamples - SAMPLE_ARR + sampleI));
|
|
|
|
sampleI = (sampleI + numSamples) % SAMPLE_ARR;
|
|
}
|
|
}
|
|
|
|
delete[] f;
|
|
}
|
|
|
|
delete[] data;
|
|
*bytes_remaining = 0;
|
|
|
|
return webm::Status{stop ? webm::Status::kOkPartial : webm::Status::kOkCompleted};
|
|
}
|
|
};
|
|
struct FrameParser final : webm::Callback {
|
|
uint64_t videoTrack, audioTrack;
|
|
uint64_t currentClusterTimecode;
|
|
uint64_t untihl;
|
|
|
|
bool skip = true;
|
|
|
|
vpx_image *lastImg = nullptr;
|
|
|
|
CHiImage *output = nullptr;
|
|
|
|
vpx_codec_ctx_t *codec;
|
|
vpx_codec_iter_t *iter;
|
|
|
|
uint64_t currentlyAt = 0;
|
|
|
|
webm::Status OnClusterBegin(const webm::ElementMetadata &metadata, const webm::Cluster &cluster, webm::Action *action) final override {
|
|
currentClusterTimecode = cluster.timecode.value();
|
|
return webm::Status(webm::Status::kOkCompleted);
|
|
}
|
|
webm::Status OnBlockBegin(const webm::ElementMetadata &metadata, const webm::Block &block, webm::Action *action) final override {
|
|
/*if(block.track_number == videoTrack) {
|
|
printf("%lu %lu %i\n", currentClusterTimecode + block.timecode, untihl, currentlyAt <= untihl && currentClusterTimecode + block.timecode >= untihl);
|
|
}*/
|
|
if(block.track_number != videoTrack) {
|
|
*action = webm::Action::kSkip;
|
|
} else {
|
|
if(currentlyAt <= untihl && currentClusterTimecode + block.timecode >= untihl) {
|
|
skip = false;
|
|
} else {
|
|
skip = true;
|
|
}
|
|
currentlyAt = currentClusterTimecode + block.timecode;
|
|
}
|
|
return webm::Status(webm::Status::kOkCompleted);
|
|
}
|
|
webm::Status OnSimpleBlockBegin(const webm::ElementMetadata &metadata, const webm::SimpleBlock &block, webm::Action *action) final override {
|
|
return OnBlockBegin(metadata, block, action);
|
|
}
|
|
webm::Status OnFrame(const webm::FrameMetadata &metadata, webm::Reader *reader, uint64_t *bytes_remaining) final override {
|
|
//printf("FRAME WITH SKIP %i\n", skip);
|
|
uint8_t *data = new uint8_t[metadata.size];
|
|
uint64_t actuallyRead;
|
|
reader->Read(metadata.size, data, &actuallyRead);
|
|
vpx_codec_decode(codec, data, metadata.size, NULL, 0);
|
|
vpx_image *img = NULL;
|
|
while((img = vpx_codec_get_frame(codec, iter)) != NULL) {
|
|
if(lastImg) vpx_img_free(lastImg);
|
|
lastImg = img;
|
|
}
|
|
if(!skip && lastImg) {
|
|
assert(lastImg->fmt & VPX_IMG_FMT_PLANAR);
|
|
|
|
output = CHi_Image_New(2, 4, 8 * ((lastImg->d_w + 15) & ~15), lastImg->d_w, lastImg->d_h, NULL);
|
|
|
|
__m128i z = _mm_set1_epi32(0);
|
|
__m128i alpha = _mm_set_epi32(0xFFFF0000, 0, 0xFFFF0000, 0);
|
|
__m128i sub16 = _mm_set1_epi32(-16);
|
|
__m128i sub128 = _mm_set1_epi32(-128);
|
|
#pragma omp parallel for simd
|
|
for(size_t y = 0; y < lastImg->d_h; y++) {
|
|
for(size_t x = 0; x < lastImg->d_w; x += 4) {
|
|
__m128i ychannel = _mm_loadu_si128((__m128i*) (lastImg->planes[VPX_PLANE_Y] + y * lastImg->stride[VPX_PLANE_Y] + x));
|
|
__m128i uchannel = _mm_loadu_si128((__m128i*) (lastImg->planes[VPX_PLANE_U] + y / 2 * lastImg->stride[VPX_PLANE_U] + x / 2));
|
|
uchannel = _mm_unpacklo_epi8(uchannel, uchannel); // stretch color channels
|
|
__m128i vchannel = _mm_loadu_si128((__m128i*) (lastImg->planes[VPX_PLANE_V] + y / 2 * lastImg->stride[VPX_PLANE_V] + x / 2));
|
|
vchannel = _mm_unpacklo_epi8(vchannel, vchannel); // stretch color channels
|
|
|
|
/* Interleave with zeroes to push out 12 of 16 pixels (we're working in groups of four) */
|
|
__m128i ylo = _mm_add_epi32(sub16, _mm_unpacklo_epi16(_mm_unpacklo_epi8(ychannel, z), z));
|
|
__m128i ulo = _mm_add_epi32(sub128, _mm_unpacklo_epi16(_mm_unpacklo_epi8(uchannel, z), z));
|
|
__m128i vlo = _mm_add_epi32(sub128, _mm_unpacklo_epi16(_mm_unpacklo_epi8(vchannel, z), z));
|
|
|
|
/* Start parallel matrix multiplication (BT.709 matrix * 255/219 to turn from studio to full range) */
|
|
/*
|
|
/ 1.164 0 1.833 \
|
|
RGB = | 1.164 -0.218 -0.545 | * (Y - 16, U - 128, V - 128)
|
|
\ 1.164 2.160 0 /
|
|
*/
|
|
__m128i partY = _mm_mullo_epi32(ylo, _mm_set1_epi32(297));
|
|
__m128i partVR = _mm_mullo_epi32(vlo, _mm_set1_epi32(467));
|
|
__m128i partUG = _mm_mullo_epi32(ulo, _mm_set1_epi32(-56));
|
|
__m128i partVG = _mm_mullo_epi32(vlo, _mm_set1_epi32(-139));
|
|
__m128i partUB = _mm_mullo_epi32(ulo, _mm_set1_epi32(551));
|
|
|
|
/* Finish matrix multiplication by summing up parts (finishing the dot products), clip */
|
|
__m128i r = _mm_max_epi32(z, _mm_min_epi32(_mm_set1_epi32(0xFFFF), _mm_add_epi32(partY, partVR)));
|
|
__m128i g = _mm_max_epi32(z, _mm_min_epi32(_mm_set1_epi32(0xFFFF), _mm_add_epi32(partY, _mm_add_epi32(partUG, partVG))));
|
|
__m128i b = _mm_max_epi32(z, _mm_min_epi32(_mm_set1_epi32(0xFFFF), _mm_add_epi32(partY, partUB)));
|
|
|
|
r = apply_gamma_epi32(r, _mm_set1_ps(2.2f));
|
|
g = apply_gamma_epi32(g, _mm_set1_ps(2.2f));
|
|
b = apply_gamma_epi32(b, _mm_set1_ps(2.2f));
|
|
|
|
__m128i rgblo = _mm_or_si128(alpha, _mm_or_si128(_mm_or_si128(_mm_unpacklo_epi32(b, z), _mm_slli_si128(_mm_unpacklo_epi32(g, z), 2)), _mm_slli_si128(_mm_unpacklo_epi32(r, z), 4)));
|
|
|
|
_mm_stream_si128((__m128i*) ((uintptr_t) output->data16 + y * output->stride + x * 8 + 0), rgblo);
|
|
|
|
__m128i rgbhi = _mm_or_si128(alpha, _mm_or_si128(_mm_or_si128(_mm_unpackhi_epi32(b, z), _mm_slli_si128(_mm_unpackhi_epi32(g, z), 2)), _mm_slli_si128(_mm_unpackhi_epi32(r, z), 4)));
|
|
|
|
_mm_stream_si128((__m128i*) ((uintptr_t) output->data16 + y * output->stride + x * 8 + 16), rgbhi);
|
|
}
|
|
}
|
|
}
|
|
|
|
delete[] data;
|
|
*bytes_remaining = 0;
|
|
|
|
webm::Status ret{skip ? webm::Status::kOkCompleted : webm::Status::kOkPartial};
|
|
skip = true;
|
|
return ret;
|
|
}
|
|
};
|
|
|
|
struct CHiMovieNode {
|
|
int64_t timeCache = -1;
|
|
char *filepathCache;
|
|
|
|
FILE *vf;
|
|
webm::FileReader vreader;
|
|
webm::WebmParser vparser;
|
|
FrameParser fp;
|
|
std::string vcodecid;
|
|
size_t vw, vh;
|
|
|
|
FILE *af;
|
|
webm::FileReader areader;
|
|
webm::WebmParser aparser;
|
|
AudioParser ap;
|
|
|
|
std::vector<webm::CuePoint> cuepoints;
|
|
uint64_t segmentOff, videoTrack, audioTrack;
|
|
|
|
double duration;
|
|
|
|
vpx_codec_ctx_t codec;
|
|
vpx_codec_iter_t iter;
|
|
|
|
CHiPubNode pub;
|
|
};
|
|
|
|
webm::Status CueParser::OnInfo(const webm::ElementMetadata &metadata, const webm::Info &info) {
|
|
node->duration = info.duration.value() / 1000;
|
|
return webm::Status(webm::Status::kOkCompleted);
|
|
}
|
|
|
|
webm::Status CueParser::OnTrackEntry(const webm::ElementMetadata &metadata, const webm::TrackEntry &info) {
|
|
if(info.track_type.value() == webm::TrackType::kVideo && info.is_enabled.value() /*&& !info.uses_lacing.value()*/) {
|
|
node->vcodecid = info.codec_id.value();
|
|
node->videoTrack = info.track_number.value();
|
|
node->vw = info.video.value().pixel_width.value();
|
|
node->vh = info.video.value().pixel_height.value();
|
|
}
|
|
if(info.track_type.value() == webm::TrackType::kAudio && info.is_enabled.value()) {
|
|
node->audioTrack = info.track_number.value();
|
|
}
|
|
|
|
return webm::Status(webm::Status::kOkCompleted);
|
|
}
|
|
webm::Status CueParser::OnSegmentBegin(const webm::ElementMetadata &metadata, webm::Action *action) {
|
|
node->segmentOff = metadata.position + metadata.header_size;
|
|
return webm::Status(webm::Status::kOkCompleted);
|
|
}
|
|
webm::Status CueParser::OnCuePoint(const webm::ElementMetadata &metadata, const webm::CuePoint &cue) {
|
|
node->cuepoints.push_back(cue);
|
|
return webm::Status(webm::Status::kOkCompleted);
|
|
}
|
|
|
|
static int movie_perform(CHiPubNode *pub) {
|
|
CHiMovieNode *node = (CHiMovieNode*) ((uintptr_t) pub - offsetof(CHiMovieNode, pub));
|
|
|
|
MTR_BEGIN("CHi", "movie_perform");
|
|
|
|
int64_t t;
|
|
if(pub->sinks[1].type == CUTIHI_VAL_NONE) t = CHi_Time_Get(pub->ng) * 1000;
|
|
else t = CHi_Crawl(&pub->sinks[1])->data.vec4[0] * 1000;
|
|
|
|
pub->sources[0].type = CUTIHI_VAL_SAMPLE;
|
|
|
|
char *filepath = CHi_Crawl(&pub->sinks[0])->data.text;
|
|
|
|
if(!node->filepathCache || strcmp(node->filepathCache, filepath) != 0) {
|
|
node->vf = fopen(filepath, "rb");
|
|
new (&node->vreader) webm::FileReader{node->vf};
|
|
new (&node->vparser) webm::WebmParser{};
|
|
|
|
node->af = fopen(filepath, "rb");
|
|
new (&node->areader) webm::FileReader{node->af};
|
|
new (&node->aparser) webm::WebmParser{};
|
|
|
|
node->cuepoints.clear();
|
|
|
|
CueParser cp{node};
|
|
node->vparser.Feed(&cp, &node->vreader);
|
|
|
|
free(node->filepathCache);
|
|
node->filepathCache = strdup(filepath);
|
|
node->timeCache = std::numeric_limits<int64_t>::max();
|
|
|
|
if(node->vcodecid == "V_VP9") {
|
|
vpx_codec_dec_init(&node->codec, vpx_codec_vp9_dx(), NULL, 0);
|
|
} else if(node->vcodecid == "V_VP8") {
|
|
vpx_codec_dec_init(&node->codec, vpx_codec_vp8_dx(), NULL, 0);
|
|
} else {
|
|
return 1;
|
|
}
|
|
|
|
new (&node->fp) FrameParser{};
|
|
node->fp.videoTrack = node->videoTrack;
|
|
node->fp.codec = &node->codec;
|
|
node->fp.iter = &node->iter;
|
|
|
|
new (&node->ap) AudioParser{};
|
|
int error;
|
|
node->ap.opus = opus_decoder_create(48000, 1, &error);
|
|
node->ap.audioTrack = node->audioTrack;
|
|
}
|
|
|
|
if(t == node->timeCache) {
|
|
return 1;
|
|
}
|
|
|
|
if(pub->sources[0].data.sample) {
|
|
CHi_Image_Free(pub->sources[0].data.sample);
|
|
pub->sources[0].data.sample = nullptr;
|
|
}
|
|
|
|
if(t >= 0 && t < 1000 * node->duration) {
|
|
|
|
if(t < node->timeCache || (t - node->timeCache) > 5000) {
|
|
|
|
if(node->cuepoints.size() > 0) {
|
|
|
|
size_t i;
|
|
for(i = 0; i < node->cuepoints.size(); i++) {
|
|
if(t < node->cuepoints[i].time.value()) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(i != 0) i--;
|
|
|
|
for(webm::Element<webm::CueTrackPositions> &p : node->cuepoints[i].cue_track_positions) {
|
|
if(p.value().track.value() == node->videoTrack) {
|
|
fseek(node->vf, node->segmentOff + p.value().cluster_position.value(), SEEK_SET);
|
|
fseek(node->af, node->segmentOff + p.value().cluster_position.value(), SEEK_SET);
|
|
break;
|
|
}
|
|
}
|
|
|
|
} else {
|
|
|
|
fseek(node->vf, 0, SEEK_SET);
|
|
fseek(node->af, 0, SEEK_SET);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
node->fp.untihl = t;
|
|
node->ap.untihl = t;
|
|
|
|
/* Always necessary for some reason, else stops parsing after seek (as in no callbacks called).. */
|
|
node->vparser.DidSeek();
|
|
node->aparser.DidSeek();
|
|
|
|
node->vparser.Feed(&node->fp, &node->vreader);
|
|
node->aparser.Feed(&node->ap, &node->areader);
|
|
|
|
pub->sources[0].data.sample = node->fp.output;
|
|
|
|
node->timeCache = t;
|
|
|
|
}
|
|
|
|
if(!pub->sources[0].data.sample) {
|
|
pub->sources[0].data.sample = CHi_Image_New(2, 4, 8 * node->vw, node->vw, node->vh, NULL);
|
|
}
|
|
|
|
size_t width = roundf(CHi_Time_GetDelta(pub->ng) * 48000);
|
|
CHiImage *aud = CHi_Image_New(4, 1, 4 * width, width, 1, NULL);
|
|
if(node->pub.ng->compilationStatus == CUTIHI_COMP_RUNNING) {
|
|
if(node->ap.sampleReadI + width > SAMPLE_ARR) {
|
|
memcpy(aud->data16, node->ap.sampleArray + node->ap.sampleReadI, sizeof(*node->ap.sampleArray) * (SAMPLE_ARR - node->ap.sampleReadI));
|
|
memcpy(aud->data16 + SAMPLE_ARR - node->ap.sampleReadI, node->ap.sampleArray, sizeof(*node->ap.sampleArray) * (width - SAMPLE_ARR + node->ap.sampleReadI));
|
|
} else {
|
|
memcpy(aud->data16, node->ap.sampleArray + node->ap.sampleReadI, sizeof(*node->ap.sampleArray) * width);
|
|
}
|
|
node->ap.sampleReadI = (node->ap.sampleReadI + width) % SAMPLE_ARR;
|
|
} else {
|
|
memset(aud->data16, 0, aud->stride * aud->height);
|
|
}
|
|
|
|
if(pub->sources[1].data.sample) CHi_Image_Free(pub->sources[1].data.sample);
|
|
pub->sources[1].type = CUTIHI_VAL_SAMPLE;
|
|
pub->sources[1].data.sample = aud;
|
|
|
|
pub->clean = 0;
|
|
|
|
MTR_END("CHi", "movie_perform");
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void movie_destroy(CHiPubNode *pub) {
|
|
CHiMovieNode *node = (CHiMovieNode*) ((uintptr_t) pub - offsetof(CHiMovieNode, pub));
|
|
|
|
if(node->filepathCache) {
|
|
free(node->filepathCache);
|
|
}
|
|
|
|
if(node->af) {
|
|
fclose(node->af);
|
|
}
|
|
|
|
if(node->vf) {
|
|
fclose(node->vf);
|
|
vpx_codec_destroy(&node->codec);
|
|
}
|
|
|
|
node->~CHiMovieNode();
|
|
|
|
free(node);
|
|
}
|
|
|
|
extern "C" {
|
|
CUTIVIS CHiPubNode *CHi_Movie() {
|
|
CHiMovieNode *n = (CHiMovieNode*) calloc(1, sizeof(*n));
|
|
new (n) CHiMovieNode();
|
|
n->pub.type = CUTIHI_T('CMov','ie ');
|
|
n->pub.Perform = movie_perform;
|
|
n->pub.Destroy = movie_destroy;
|
|
n->pub.clean = 0;
|
|
n->pub.sinkCount = 2;
|
|
n->pub.sinks = (CHiValue*) calloc(sizeof(*n->pub.sinks), n->pub.sinkCount);
|
|
n->pub.sinks[1].type = CUTIHI_VAL_VEC4;
|
|
n->pub.sinks[1].data.vec4[0] = 0;
|
|
n->pub.sourceCount = 2;
|
|
n->pub.sources = (CHiValue*) calloc(sizeof(*n->pub.sources), n->pub.sourceCount);
|
|
return &n->pub;
|
|
}
|
|
}
|