#include"node.h" #include #include #include #include #include #include #include #include"img.h" #include #include #include #include #include #include"linearity.h" struct CHiMovieNode; struct CueParser : webm::Callback { CHiMovieNode *node; CueParser(CHiMovieNode *node) : node(node) {} webm::Status OnInfo(const webm::ElementMetadata& metadata, const webm::Info& info) final override; webm::Status OnTrackEntry(const webm::ElementMetadata &metadata, const webm::TrackEntry &info) override; webm::Status OnSegmentBegin(const webm::ElementMetadata &metadata, webm::Action *action) override; webm::Status OnCuePoint(const webm::ElementMetadata &metadata, const webm::CuePoint &cue) override; }; struct AudioParser final : webm::Callback { uint64_t audioTrack; uint64_t currentClusterTimecode; uint64_t untihl; bool stop = true; bool skip = false; #define SAMPLE_ARR 48000 OpusDecoder *opus; size_t sampleI = 0; size_t sampleReadI = 0; int16_t sampleArray[SAMPLE_ARR]; webm::Status OnClusterBegin(const webm::ElementMetadata &metadata, const webm::Cluster &cluster, webm::Action *action) final override { currentClusterTimecode = cluster.timecode.value(); return webm::Status(webm::Status::kOkCompleted); } webm::Status OnBlockBegin(const webm::ElementMetadata &metadata, const webm::Block &block, webm::Action *action) final override { if(block.track_number != audioTrack) { skip = true; *action = webm::Action::kSkip; } else { skip = false; if(currentClusterTimecode + block.timecode >= untihl) { stop = true; } else { stop = false; } } return webm::Status(webm::Status::kOkCompleted); } webm::Status OnSimpleBlockBegin(const webm::ElementMetadata &metadata, const webm::SimpleBlock &block, webm::Action *action) final override { return OnBlockBegin(metadata, block, action); } webm::Status OnFrame(const webm::FrameMetadata &metadata, webm::Reader *reader, uint64_t *bytes_remaining) final override { uint8_t *data = new uint8_t[metadata.size]; uint64_t actuallyRead; reader->Read(metadata.size, data, &actuallyRead); if(!skip) { int16_t *f = new int16_t[6400]; int numSamples = opus_decode(opus, data, metadata.size, f, 6400, 0); if(numSamples >= 0) { if(SAMPLE_ARR - sampleI >= (size_t) numSamples) { memcpy(&sampleArray[sampleI], f, sizeof(*sampleArray) * numSamples); sampleI = (sampleI + numSamples) % SAMPLE_ARR; } else { memcpy(&sampleArray[sampleI], f, sizeof(*sampleArray) * (SAMPLE_ARR - sampleI)); memcpy(sampleArray, &f[SAMPLE_ARR - sampleI], sizeof(*sampleArray) * (numSamples - SAMPLE_ARR + sampleI)); sampleI = (sampleI + numSamples) % SAMPLE_ARR; } } delete[] f; } delete[] data; *bytes_remaining = 0; return webm::Status{stop ? webm::Status::kOkPartial : webm::Status::kOkCompleted}; } }; struct FrameParser final : webm::Callback { uint64_t videoTrack, audioTrack; uint64_t currentClusterTimecode; uint64_t untihl; bool skip = true; vpx_image *lastImg = nullptr; CHiImage *output = nullptr; vpx_codec_ctx_t *codec; vpx_codec_iter_t *iter; uint64_t currentlyAt = 0; webm::Status OnClusterBegin(const webm::ElementMetadata &metadata, const webm::Cluster &cluster, webm::Action *action) final override { currentClusterTimecode = cluster.timecode.value(); return webm::Status(webm::Status::kOkCompleted); } webm::Status OnBlockBegin(const webm::ElementMetadata &metadata, const webm::Block &block, webm::Action *action) final override { /*if(block.track_number == videoTrack) { printf("%lu %lu %i\n", currentClusterTimecode + block.timecode, untihl, currentlyAt <= untihl && currentClusterTimecode + block.timecode >= untihl); }*/ if(block.track_number != videoTrack) { *action = webm::Action::kSkip; } else { if(currentlyAt <= untihl && currentClusterTimecode + block.timecode >= untihl) { skip = false; } else { skip = true; } currentlyAt = currentClusterTimecode + block.timecode; } return webm::Status(webm::Status::kOkCompleted); } webm::Status OnSimpleBlockBegin(const webm::ElementMetadata &metadata, const webm::SimpleBlock &block, webm::Action *action) final override { return OnBlockBegin(metadata, block, action); } webm::Status OnFrame(const webm::FrameMetadata &metadata, webm::Reader *reader, uint64_t *bytes_remaining) final override { //printf("FRAME WITH SKIP %i\n", skip); uint8_t *data = new uint8_t[metadata.size]; uint64_t actuallyRead; reader->Read(metadata.size, data, &actuallyRead); vpx_codec_decode(codec, data, metadata.size, NULL, 0); vpx_image *img = NULL; while((img = vpx_codec_get_frame(codec, iter)) != NULL) { if(lastImg) vpx_img_free(lastImg); lastImg = img; } if(!skip && lastImg) { assert(lastImg->fmt & VPX_IMG_FMT_PLANAR); output = CHi_Image_New(2, 4, 8 * ((lastImg->d_w + 15) & ~15), lastImg->d_w, lastImg->d_h, NULL); __m128i z = _mm_set1_epi32(0); __m128i alpha = _mm_set_epi32(0xFFFF0000, 0, 0xFFFF0000, 0); __m128i sub16 = _mm_set1_epi32(-16); __m128i sub128 = _mm_set1_epi32(-128); #pragma omp parallel for simd for(size_t y = 0; y < lastImg->d_h; y++) { for(size_t x = 0; x < lastImg->d_w; x += 4) { __m128i ychannel = _mm_loadu_si128((__m128i*) (lastImg->planes[VPX_PLANE_Y] + y * lastImg->stride[VPX_PLANE_Y] + x)); __m128i uchannel = _mm_loadu_si128((__m128i*) (lastImg->planes[VPX_PLANE_U] + y / 2 * lastImg->stride[VPX_PLANE_U] + x / 2)); uchannel = _mm_unpacklo_epi8(uchannel, uchannel); // stretch color channels __m128i vchannel = _mm_loadu_si128((__m128i*) (lastImg->planes[VPX_PLANE_V] + y / 2 * lastImg->stride[VPX_PLANE_V] + x / 2)); vchannel = _mm_unpacklo_epi8(vchannel, vchannel); // stretch color channels /* Interleave with zeroes to push out 12 of 16 pixels (we're working in groups of four) */ __m128i ylo = _mm_add_epi32(sub16, _mm_unpacklo_epi16(_mm_unpacklo_epi8(ychannel, z), z)); __m128i ulo = _mm_add_epi32(sub128, _mm_unpacklo_epi16(_mm_unpacklo_epi8(uchannel, z), z)); __m128i vlo = _mm_add_epi32(sub128, _mm_unpacklo_epi16(_mm_unpacklo_epi8(vchannel, z), z)); /* Start parallel matrix multiplication (BT.709 matrix * 255/219 to turn from studio to full range) */ /* / 1.164 0 1.833 \ RGB = | 1.164 -0.218 -0.545 | * (Y - 16, U - 128, V - 128) \ 1.164 2.160 0 / */ __m128i partY = _mm_mullo_epi32(ylo, _mm_set1_epi32(297)); __m128i partVR = _mm_mullo_epi32(vlo, _mm_set1_epi32(467)); __m128i partUG = _mm_mullo_epi32(ulo, _mm_set1_epi32(-56)); __m128i partVG = _mm_mullo_epi32(vlo, _mm_set1_epi32(-139)); __m128i partUB = _mm_mullo_epi32(ulo, _mm_set1_epi32(551)); /* Finish matrix multiplication by summing up parts (finishing the dot products), clip */ __m128i r = _mm_max_epi32(z, _mm_min_epi32(_mm_set1_epi32(0xFFFF), _mm_add_epi32(partY, partVR))); __m128i g = _mm_max_epi32(z, _mm_min_epi32(_mm_set1_epi32(0xFFFF), _mm_add_epi32(partY, _mm_add_epi32(partUG, partVG)))); __m128i b = _mm_max_epi32(z, _mm_min_epi32(_mm_set1_epi32(0xFFFF), _mm_add_epi32(partY, partUB))); r = apply_gamma_epi32(r, _mm_set1_ps(2.2f)); g = apply_gamma_epi32(g, _mm_set1_ps(2.2f)); b = apply_gamma_epi32(b, _mm_set1_ps(2.2f)); __m128i rgblo = _mm_or_si128(alpha, _mm_or_si128(_mm_or_si128(_mm_unpacklo_epi32(b, z), _mm_slli_si128(_mm_unpacklo_epi32(g, z), 2)), _mm_slli_si128(_mm_unpacklo_epi32(r, z), 4))); _mm_stream_si128((__m128i*) ((uintptr_t) output->data16 + y * output->stride + x * 8 + 0), rgblo); __m128i rgbhi = _mm_or_si128(alpha, _mm_or_si128(_mm_or_si128(_mm_unpackhi_epi32(b, z), _mm_slli_si128(_mm_unpackhi_epi32(g, z), 2)), _mm_slli_si128(_mm_unpackhi_epi32(r, z), 4))); _mm_stream_si128((__m128i*) ((uintptr_t) output->data16 + y * output->stride + x * 8 + 16), rgbhi); } } } delete[] data; *bytes_remaining = 0; webm::Status ret{skip ? webm::Status::kOkCompleted : webm::Status::kOkPartial}; skip = true; return ret; } }; struct CHiMovieNode { int64_t timeCache = -1; char *filepathCache; FILE *vf; webm::FileReader vreader; webm::WebmParser vparser; FrameParser fp; std::string vcodecid; size_t vw, vh; FILE *af; webm::FileReader areader; webm::WebmParser aparser; AudioParser ap; std::vector cuepoints; uint64_t segmentOff, videoTrack, audioTrack; double duration; vpx_codec_ctx_t codec; vpx_codec_iter_t iter; CHiPubNode pub; }; webm::Status CueParser::OnInfo(const webm::ElementMetadata &metadata, const webm::Info &info) { node->duration = info.duration.value() / 1000; return webm::Status(webm::Status::kOkCompleted); } webm::Status CueParser::OnTrackEntry(const webm::ElementMetadata &metadata, const webm::TrackEntry &info) { if(info.track_type.value() == webm::TrackType::kVideo && info.is_enabled.value() /*&& !info.uses_lacing.value()*/) { node->vcodecid = info.codec_id.value(); node->videoTrack = info.track_number.value(); node->vw = info.video.value().pixel_width.value(); node->vh = info.video.value().pixel_height.value(); } if(info.track_type.value() == webm::TrackType::kAudio && info.is_enabled.value()) { node->audioTrack = info.track_number.value(); } return webm::Status(webm::Status::kOkCompleted); } webm::Status CueParser::OnSegmentBegin(const webm::ElementMetadata &metadata, webm::Action *action) { node->segmentOff = metadata.position + metadata.header_size; return webm::Status(webm::Status::kOkCompleted); } webm::Status CueParser::OnCuePoint(const webm::ElementMetadata &metadata, const webm::CuePoint &cue) { node->cuepoints.push_back(cue); return webm::Status(webm::Status::kOkCompleted); } static int movie_perform(CHiPubNode *pub) { CHiMovieNode *node = (CHiMovieNode*) ((uintptr_t) pub - offsetof(CHiMovieNode, pub)); int64_t t; if(pub->sinks[1].type == CUTIHI_VAL_NONE) t = CHi_Time_Get(pub->ng) * 1000; else t = CHi_Crawl(&pub->sinks[1])->data.vec4[0] * 1000; pub->sources[0].type = CUTIHI_VAL_SAMPLE; char *filepath = CHi_Crawl(&pub->sinks[0])->data.text; if(!node->filepathCache || strcmp(node->filepathCache, filepath) != 0) { node->vf = fopen(filepath, "rb"); new (&node->vreader) webm::FileReader{node->vf}; new (&node->vparser) webm::WebmParser{}; node->af = fopen(filepath, "rb"); new (&node->areader) webm::FileReader{node->af}; new (&node->aparser) webm::WebmParser{}; node->cuepoints.clear(); CueParser cp{node}; node->vparser.Feed(&cp, &node->vreader); free(node->filepathCache); node->filepathCache = filepath; node->timeCache = std::numeric_limits::max(); if(node->vcodecid == "V_VP9") { vpx_codec_dec_init(&node->codec, vpx_codec_vp9_dx(), NULL, 0); } else if(node->vcodecid == "V_VP8") { vpx_codec_dec_init(&node->codec, vpx_codec_vp8_dx(), NULL, 0); } else { return 1; } new (&node->fp) FrameParser{}; node->fp.videoTrack = node->videoTrack; node->fp.codec = &node->codec; node->fp.iter = &node->iter; new (&node->ap) AudioParser{}; int error; node->ap.opus = opus_decoder_create(48000, 1, &error); node->ap.audioTrack = node->audioTrack; } if(t == node->timeCache) { return 1; } if(pub->sources[0].data.sample) { CHi_Image_Free(pub->sources[0].data.sample); pub->sources[0].data.sample = nullptr; } if(t >= 0 && t < 1000 * node->duration) { if(t < node->timeCache || (t - node->timeCache) > 5000) { if(node->cuepoints.size() > 0) { size_t i; for(i = 0; i < node->cuepoints.size(); i++) { if(t < node->cuepoints[i].time.value()) { break; } } if(i != 0) i--; for(webm::Element &p : node->cuepoints[i].cue_track_positions) { if(p.value().track.value() == node->videoTrack) { fseek(node->vf, node->segmentOff + p.value().cluster_position.value(), SEEK_SET); fseek(node->af, node->segmentOff + p.value().cluster_position.value(), SEEK_SET); break; } } } else { fseek(node->vf, 0, SEEK_SET); fseek(node->af, 0, SEEK_SET); } } node->fp.untihl = t; node->ap.untihl = t; /* Always necessary for some reason, else stops parsing after seek (as in no callbacks called).. */ node->vparser.DidSeek(); node->aparser.DidSeek(); node->vparser.Feed(&node->fp, &node->vreader); node->aparser.Feed(&node->ap, &node->areader); pub->sources[0].data.sample = node->fp.output; node->timeCache = t; } if(!pub->sources[0].data.sample) { pub->sources[0].data.sample = CHi_Image_New(2, 4, 8 * node->vw, node->vw, node->vh, NULL); } size_t width = roundf(CHi_Time_GetDelta(pub->ng) * 48000); CHiImage *aud = CHi_Image_New(4, 1, 4 * width, width, 1, NULL); if(node->pub.ng->compilationStatus == CUTIHI_COMP_RUNNING) { if(node->ap.sampleReadI + width > SAMPLE_ARR) { memcpy(aud->data16, node->ap.sampleArray + node->ap.sampleReadI, sizeof(*node->ap.sampleArray) * (SAMPLE_ARR - node->ap.sampleReadI)); memcpy(aud->data16 + SAMPLE_ARR - node->ap.sampleReadI, node->ap.sampleArray, sizeof(*node->ap.sampleArray) * (width - SAMPLE_ARR + node->ap.sampleReadI)); } else { memcpy(aud->data16, node->ap.sampleArray + node->ap.sampleReadI, sizeof(*node->ap.sampleArray) * width); } node->ap.sampleReadI = (node->ap.sampleReadI + width) % SAMPLE_ARR; } else { memset(aud->data16, 0, aud->stride * aud->height); } if(pub->sources[1].data.sample) CHi_Image_Free(pub->sources[1].data.sample); pub->sources[1].type = CUTIHI_VAL_SAMPLE; pub->sources[1].data.sample = aud; pub->clean = 0; return 1; } extern "C" { CUTIVIS CHiPubNode *CHi_Movie() { CHiMovieNode *n = (CHiMovieNode*) malloc(sizeof(*n)); new (n) CHiMovieNode(); n->pub.type = CUTIHI_T('CMov','ie '); n->pub.Perform = movie_perform; n->pub.clean = 0; n->pub.sinkCount = 2; n->pub.sinks = (CHiValue*) calloc(sizeof(*n->pub.sinks), n->pub.sinkCount); n->pub.sinks[1].type = CUTIHI_VAL_VEC4; n->pub.sinks[1].data.vec4[0] = 0; n->pub.sourceCount = 2; n->pub.sources = (CHiValue*) calloc(sizeof(*n->pub.sources), n->pub.sourceCount); return &n->pub; } }