diff --git a/ui/frame.cpp b/ui/frame.cpp index f444d03..a49abe8 100644 --- a/ui/frame.cpp +++ b/ui/frame.cpp @@ -915,8 +915,19 @@ __attribute__((optimize("O3"))) static uint8_t *bgra64torgb24(uint8_t *orig, siz __m128i z[8] = {}; for(int zi = 0; zi < 8; zi++) { - z[zi] = _mm_loadu_si128((__m128i*) temp + zi); - z[zi] = apply_gamma_epi16(z[zi], _mm_set_ps(1, 1 / 2.2f, 1 / 2.2f, 1 / 2.2f)); + // Alpha-blend with checkered background. + __m128i bottom = (((x + zi) / 4 ^ y / 4) & 1) + ? _mm_set_epi16(0xFFFF, 0x8000, 0x8000, 0x8000, 0xFFFF, 0x8000, 0x8000, 0x8000) + : _mm_set_epi16(0xFFFF, 0x4000, 0x4000, 0x4000, 0xFFFF, 0x4000, 0x4000, 0x4000); + + __m128i top = _mm_loadu_si128((__m128i*) temp + zi); + + __m128i alpha = _mm_shuffle_epi8(top, _mm_set_epi8(15, 14, 15, 14, 15, 14, 15, 14, 7, 6, 7, 6, 7, 6, 7, 6)); + __m128i invAlpha = _mm_sub_epi16(_mm_set1_epi16(0xFFFF), alpha); + + __m128i result = _mm_add_epi16(_mm_mulhi_epu16(top, alpha), _mm_mulhi_epu16(bottom, invAlpha)); + + z[zi] = apply_gamma_epi16(result, _mm_set_ps(1, 1 / 2.2f, 1 / 2.2f, 1 / 2.2f)); } __m128i a = _mm_shuffle_epi8(z[0], _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 9, 11, 13, 1, 3, 5));