^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: LGPL-2.1+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright 2016 Tom aan de Wiel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * R.D. Brown, 1977
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "codec-fwht.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #define OVERFLOW_BIT BIT(14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * Note: bit 0 of the header must always be 0. Otherwise it cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * be guaranteed that the magic 8 byte sequence (see below) can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * never occur in the rlc output.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define PFRAME_BIT BIT(15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #define DUPS_MASK 0x1ffe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #define PBLOCK 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #define IBLOCK 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define ALL_ZEROS 15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) static const uint8_t zigzag[64] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) 1, 8,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) 2, 9, 16,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) 3, 10, 17, 24,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) 4, 11, 18, 25, 32,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) 5, 12, 19, 26, 33, 40,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) 6, 13, 20, 27, 34, 41, 48,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) 7, 14, 21, 28, 35, 42, 49, 56,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) 15, 22, 29, 36, 43, 50, 57,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) 23, 30, 37, 44, 51, 58,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) 31, 38, 45, 52, 59,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) 39, 46, 53, 60,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) 47, 54, 61,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) 55, 62,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) 63,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * noinline_for_stack to work around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * https://bugs.llvm.org/show_bug.cgi?id=38809
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) static int noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) rlc(const s16 *in, __be16 *output, int blocktype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) s16 block[8 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) s16 *wp = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) int x, y;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) /* read in block from framebuffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) int lastzero_run = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) int to_encode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) for (y = 0; y < 8; y++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) for (x = 0; x < 8; x++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) *wp = in[x + y * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) wp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /* keep track of amount of trailing zeros */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) lastzero_run++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) *output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) ret++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) while (i < to_encode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) int cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) int tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) /* count leading zeros */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) if (i == to_encode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) cnt--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /* 4 bits for run, 12 for coefficient (quantization by 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) *output++ = htons((cnt | tmp << 4));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) ret++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) if (lastzero_run > 14) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) *output = htons(ALL_ZEROS | 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) ret++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * This function will worst-case increase rlc_in by 65*2 bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * one s16 value for the header and 8 * 8 coefficients of type s16.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) static noinline_for_stack u16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) derlc(const __be16 **rlc_in, s16 *dwht_out, const __be16 *end_of_input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) /* header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) const __be16 *input = *rlc_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) u16 stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) int dec_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) s16 block[8 * 8 + 16];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) s16 *wp = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) if (input > end_of_input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) return OVERFLOW_BIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) stat = ntohs(*input++);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * Now de-compress, it expands one byte to up to 15 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * (or fills the remainder of the 64 bytes with zeroes if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * is the last byte to expand).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * allow for overflow if the incoming data was malformed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) while (dec_count < 8 * 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) s16 in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) int length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) int coeff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) if (input > end_of_input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) return OVERFLOW_BIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) in = ntohs(*input++);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) length = in & 0xf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) coeff = in >> 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) /* fill remainder with zeros */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) if (length == 15) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) for (i = 0; i < 64 - dec_count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) *wp++ = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) for (i = 0; i < length; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) *wp++ = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) *wp++ = coeff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) dec_count += length + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) wp = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) for (i = 0; i < 64; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) int pos = zigzag[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) int y = pos / 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) int x = pos % 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) dwht_out[x + y * 8] = *wp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) *rlc_in = input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) return stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) static const int quant_table[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 2, 2, 2, 2, 2, 2, 2, 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 2, 2, 2, 2, 2, 2, 2, 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 2, 2, 2, 2, 2, 2, 2, 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 2, 2, 2, 2, 2, 2, 3, 6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 2, 2, 2, 2, 2, 3, 6, 6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 2, 2, 2, 2, 3, 6, 6, 6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 2, 2, 2, 3, 6, 6, 6, 6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 2, 2, 3, 6, 6, 6, 6, 8,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) static const int quant_table_p[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 3, 3, 3, 3, 3, 3, 3, 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 3, 3, 3, 3, 3, 3, 3, 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 3, 3, 3, 3, 3, 3, 3, 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 3, 3, 3, 3, 3, 3, 3, 6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 3, 3, 3, 3, 3, 3, 6, 6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 3, 3, 3, 3, 3, 6, 6, 9,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 3, 3, 3, 3, 6, 6, 9, 9,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 3, 3, 3, 6, 6, 9, 9, 10,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) static void quantize_intra(s16 *coeff, s16 *de_coeff, u16 qp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) const int *quant = quant_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) for (j = 0; j < 8; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) *coeff >>= *quant;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) if (*coeff >= -qp && *coeff <= qp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) *coeff = *de_coeff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) *de_coeff = *coeff << *quant;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) static void dequantize_intra(s16 *coeff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) const int *quant = quant_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) for (j = 0; j < 8; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) for (i = 0; i < 8; i++, quant++, coeff++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) *coeff <<= *quant;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) static void quantize_inter(s16 *coeff, s16 *de_coeff, u16 qp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) const int *quant = quant_table_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) for (j = 0; j < 8; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) *coeff >>= *quant;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) if (*coeff >= -qp && *coeff <= qp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) *coeff = *de_coeff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) *de_coeff = *coeff << *quant;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) static void dequantize_inter(s16 *coeff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) const int *quant = quant_table_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) for (j = 0; j < 8; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) for (i = 0; i < 8; i++, quant++, coeff++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) *coeff <<= *quant;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) static void noinline_for_stack fwht(const u8 *block, s16 *output_block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) unsigned int stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) unsigned int input_step, bool intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) /* we'll need more than 8 bits for the transformed coefficients */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) s32 workspace1[8], workspace2[8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) const u8 *tmp = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) s16 *out = output_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) int add = intra ? 256 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) /* stage 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) for (i = 0; i < 8; i++, tmp += stride, out += 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) switch (input_step) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) case 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) workspace1[0] = tmp[0] + tmp[1] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) workspace1[1] = tmp[0] - tmp[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) workspace1[2] = tmp[2] + tmp[3] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) workspace1[3] = tmp[2] - tmp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) workspace1[4] = tmp[4] + tmp[5] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) workspace1[5] = tmp[4] - tmp[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) workspace1[6] = tmp[6] + tmp[7] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) workspace1[7] = tmp[6] - tmp[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) case 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) workspace1[0] = tmp[0] + tmp[2] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) workspace1[1] = tmp[0] - tmp[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) workspace1[2] = tmp[4] + tmp[6] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) workspace1[3] = tmp[4] - tmp[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) workspace1[4] = tmp[8] + tmp[10] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) workspace1[5] = tmp[8] - tmp[10];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) workspace1[6] = tmp[12] + tmp[14] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) workspace1[7] = tmp[12] - tmp[14];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) case 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) workspace1[0] = tmp[0] + tmp[3] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) workspace1[1] = tmp[0] - tmp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) workspace1[2] = tmp[6] + tmp[9] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) workspace1[3] = tmp[6] - tmp[9];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) workspace1[4] = tmp[12] + tmp[15] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) workspace1[5] = tmp[12] - tmp[15];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) workspace1[6] = tmp[18] + tmp[21] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) workspace1[7] = tmp[18] - tmp[21];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) workspace1[0] = tmp[0] + tmp[4] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) workspace1[1] = tmp[0] - tmp[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) workspace1[2] = tmp[8] + tmp[12] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) workspace1[3] = tmp[8] - tmp[12];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) workspace1[4] = tmp[16] + tmp[20] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) workspace1[5] = tmp[16] - tmp[20];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) workspace1[6] = tmp[24] + tmp[28] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) workspace1[7] = tmp[24] - tmp[28];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) /* stage 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) workspace2[0] = workspace1[0] + workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) workspace2[1] = workspace1[0] - workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) workspace2[2] = workspace1[1] - workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) workspace2[3] = workspace1[1] + workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) workspace2[4] = workspace1[4] + workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) workspace2[5] = workspace1[4] - workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) workspace2[6] = workspace1[5] - workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) workspace2[7] = workspace1[5] + workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) /* stage 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) out[0] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) out[1] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) out[2] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) out[3] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) out[4] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) out[5] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) out[6] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) out[7] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) out = output_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) for (i = 0; i < 8; i++, out++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) /* stage 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) workspace1[0] = out[0] + out[1 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) workspace1[1] = out[0] - out[1 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) workspace1[2] = out[2 * 8] + out[3 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) workspace1[3] = out[2 * 8] - out[3 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) workspace1[4] = out[4 * 8] + out[5 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) workspace1[5] = out[4 * 8] - out[5 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) workspace1[6] = out[6 * 8] + out[7 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) workspace1[7] = out[6 * 8] - out[7 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) /* stage 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) workspace2[0] = workspace1[0] + workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) workspace2[1] = workspace1[0] - workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) workspace2[2] = workspace1[1] - workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) workspace2[3] = workspace1[1] + workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) workspace2[4] = workspace1[4] + workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) workspace2[5] = workspace1[4] - workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) workspace2[6] = workspace1[5] - workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) workspace2[7] = workspace1[5] + workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) /* stage 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) out[0 * 8] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) out[1 * 8] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) out[2 * 8] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) out[3 * 8] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) out[4 * 8] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) out[5 * 8] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) out[6 * 8] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) out[7 * 8] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * Not the nicest way of doing it, but P-blocks get twice the range of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * that of the I-blocks. Therefore we need a type bigger than 8 bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) * Furthermore values can be negative... This is just a version that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) * works with 16 signed data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) static void noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) fwht16(const s16 *block, s16 *output_block, int stride, int intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) /* we'll need more than 8 bits for the transformed coefficients */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) s32 workspace1[8], workspace2[8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) const s16 *tmp = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) s16 *out = output_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) for (i = 0; i < 8; i++, tmp += stride, out += 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) /* stage 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) workspace1[0] = tmp[0] + tmp[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) workspace1[1] = tmp[0] - tmp[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) workspace1[2] = tmp[2] + tmp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) workspace1[3] = tmp[2] - tmp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) workspace1[4] = tmp[4] + tmp[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) workspace1[5] = tmp[4] - tmp[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) workspace1[6] = tmp[6] + tmp[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) workspace1[7] = tmp[6] - tmp[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) /* stage 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) workspace2[0] = workspace1[0] + workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) workspace2[1] = workspace1[0] - workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) workspace2[2] = workspace1[1] - workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) workspace2[3] = workspace1[1] + workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) workspace2[4] = workspace1[4] + workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) workspace2[5] = workspace1[4] - workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) workspace2[6] = workspace1[5] - workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) workspace2[7] = workspace1[5] + workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) /* stage 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) out[0] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) out[1] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) out[2] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) out[3] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) out[4] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) out[5] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) out[6] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) out[7] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) out = output_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) for (i = 0; i < 8; i++, out++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) /* stage 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) workspace1[0] = out[0] + out[1*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) workspace1[1] = out[0] - out[1*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) workspace1[2] = out[2*8] + out[3*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) workspace1[3] = out[2*8] - out[3*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) workspace1[4] = out[4*8] + out[5*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) workspace1[5] = out[4*8] - out[5*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) workspace1[6] = out[6*8] + out[7*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) workspace1[7] = out[6*8] - out[7*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) /* stage 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) workspace2[0] = workspace1[0] + workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) workspace2[1] = workspace1[0] - workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) workspace2[2] = workspace1[1] - workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) workspace2[3] = workspace1[1] + workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) workspace2[4] = workspace1[4] + workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) workspace2[5] = workspace1[4] - workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) workspace2[6] = workspace1[5] - workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) workspace2[7] = workspace1[5] + workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) /* stage 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) out[0*8] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) out[1*8] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) out[2*8] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) out[3*8] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) out[4*8] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) out[5*8] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) out[6*8] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) out[7*8] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) static noinline_for_stack void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) ifwht(const s16 *block, s16 *output_block, int intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) * we'll need more than 8 bits for the transformed coefficients
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) * use native unit of cpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) int workspace1[8], workspace2[8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) int inter = intra ? 0 : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) const s16 *tmp = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) s16 *out = output_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) for (i = 0; i < 8; i++, tmp += 8, out += 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) /* stage 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) workspace1[0] = tmp[0] + tmp[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) workspace1[1] = tmp[0] - tmp[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) workspace1[2] = tmp[2] + tmp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) workspace1[3] = tmp[2] - tmp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) workspace1[4] = tmp[4] + tmp[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) workspace1[5] = tmp[4] - tmp[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) workspace1[6] = tmp[6] + tmp[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) workspace1[7] = tmp[6] - tmp[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) /* stage 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) workspace2[0] = workspace1[0] + workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) workspace2[1] = workspace1[0] - workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) workspace2[2] = workspace1[1] - workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) workspace2[3] = workspace1[1] + workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) workspace2[4] = workspace1[4] + workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) workspace2[5] = workspace1[4] - workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) workspace2[6] = workspace1[5] - workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) workspace2[7] = workspace1[5] + workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) /* stage 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) out[0] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) out[1] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) out[2] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) out[3] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) out[4] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) out[5] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) out[6] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) out[7] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) out = output_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) for (i = 0; i < 8; i++, out++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) /* stage 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) workspace1[0] = out[0] + out[1 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) workspace1[1] = out[0] - out[1 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) workspace1[2] = out[2 * 8] + out[3 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) workspace1[3] = out[2 * 8] - out[3 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) workspace1[4] = out[4 * 8] + out[5 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) workspace1[5] = out[4 * 8] - out[5 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) workspace1[6] = out[6 * 8] + out[7 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) workspace1[7] = out[6 * 8] - out[7 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) /* stage 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) workspace2[0] = workspace1[0] + workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) workspace2[1] = workspace1[0] - workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) workspace2[2] = workspace1[1] - workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) workspace2[3] = workspace1[1] + workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) workspace2[4] = workspace1[4] + workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) workspace2[5] = workspace1[4] - workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) workspace2[6] = workspace1[5] - workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) workspace2[7] = workspace1[5] + workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) /* stage 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) if (inter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) int d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) out[0 * 8] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) out[1 * 8] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) out[2 * 8] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) out[3 * 8] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) out[4 * 8] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) out[5 * 8] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) out[6 * 8] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) out[7 * 8] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) for (d = 0; d < 8; d++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) out[8 * d] >>= 6;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) int d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) out[0 * 8] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) out[1 * 8] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) out[2 * 8] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) out[3 * 8] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) out[4 * 8] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) out[5 * 8] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) out[6 * 8] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) out[7 * 8] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) for (d = 0; d < 8; d++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) out[8 * d] >>= 6;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) out[8 * d] += 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) static void fill_encoder_block(const u8 *input, s16 *dst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) unsigned int stride, unsigned int input_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) for (i = 0; i < 8; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) for (j = 0; j < 8; j++, input += input_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) *dst++ = *input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) input += stride - 8 * input_step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) static int var_intra(const s16 *input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) int32_t mean = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) int32_t ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) const s16 *tmp = input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) for (i = 0; i < 8 * 8; i++, tmp++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) mean += *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) mean /= 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) tmp = input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) for (i = 0; i < 8 * 8; i++, tmp++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) static int var_inter(const s16 *old, const s16 *new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) int32_t ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) for (i = 0; i < 8 * 8; i++, old++, new++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) decide_blocktype(const u8 *cur, const u8 *reference, s16 *deltablock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) unsigned int stride, unsigned int input_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) s16 tmp[64];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) s16 old[64];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) s16 *work = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) unsigned int k, l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) int vari;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) int vard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) fill_encoder_block(cur, tmp, stride, input_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) fill_encoder_block(reference, old, 8, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) vari = var_intra(tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) for (k = 0; k < 8; k++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) for (l = 0; l < 8; l++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) *deltablock = *work - *reference;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) deltablock++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) work++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) reference++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) deltablock -= 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) vard = var_inter(old, tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) return vari <= vard ? IBLOCK : PBLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) static void fill_decoder_block(u8 *dst, const s16 *input, int stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) unsigned int dst_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) for (i = 0; i < 8; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) for (j = 0; j < 8; j++, input++, dst += dst_step) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) if (*input < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) *dst = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) else if (*input > 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) *dst = 255;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) *dst = *input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) dst += stride - (8 * dst_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) static void add_deltas(s16 *deltas, const u8 *ref, int stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) unsigned int ref_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) int k, l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) for (k = 0; k < 8; k++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) for (l = 0; l < 8; l++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) *deltas += *ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) ref += ref_step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) * Due to quantizing, it might possible that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) * decoded coefficients are slightly out of range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) if (*deltas < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) *deltas = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) else if (*deltas > 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) *deltas = 255;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) deltas++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) ref += stride - (8 * ref_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) struct fwht_cframe *cf, u32 height, u32 width,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) u32 stride, unsigned int input_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) bool is_intra, bool next_is_intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) u8 *input_start = input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) __be16 *rlco_start = *rlco;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) s16 deltablock[64];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) __be16 pframe_bit = htons(PFRAME_BIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) u32 encoding = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) unsigned int last_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) unsigned int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) width = round_up(width, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) height = round_up(height, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) for (j = 0; j < height / 8; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) input = input_start + j * 8 * stride;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) for (i = 0; i < width / 8; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) /* intra code, first frame is always intra coded. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) int blocktype = IBLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) unsigned int size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) if (!is_intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) blocktype = decide_blocktype(input, refp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) deltablock, stride, input_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) if (blocktype == IBLOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) fwht(input, cf->coeffs, stride, input_step, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) quantize_intra(cf->coeffs, cf->de_coeffs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) cf->i_frame_qp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) /* inter code */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) encoding |= FWHT_FRAME_PCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) fwht16(deltablock, cf->coeffs, 8, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) quantize_inter(cf->coeffs, cf->de_coeffs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) cf->p_frame_qp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) if (!next_is_intra) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) if (blocktype == PBLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) add_deltas(cf->de_fwht, refp, 8, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) fill_decoder_block(refp, cf->de_fwht, 8, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) input += 8 * input_step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) refp += 8 * 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) size = rlc(cf->coeffs, *rlco, blocktype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) if (last_size == size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) __be16 *last_rlco = *rlco - size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) s16 hdr = ntohs(*last_rlco);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) if (!((*last_rlco ^ **rlco) & pframe_bit) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) (hdr & DUPS_MASK) < DUPS_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) *last_rlco = htons(hdr + 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) *rlco += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) *rlco += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) if (*rlco >= rlco_max) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) encoding |= FWHT_FRAME_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) goto exit_loop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) last_size = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) exit_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) if (encoding & FWHT_FRAME_UNENCODED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) u8 *out = (u8 *)rlco_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) u8 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) input = input_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) * The compressed stream should never contain the magic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) * header, so when we copy the YUV data we replace 0xff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) * by 0xfe. Since YUV is limited range such values
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) * shouldn't appear anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) for (j = 0; j < height; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) for (i = 0, p = input; i < width; i++, p += input_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) *out++ = (*p == 0xff) ? 0xfe : *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) input += stride;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) *rlco = (__be16 *)out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) encoding &= ~FWHT_FRAME_PCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) return encoding;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) u32 fwht_encode_frame(struct fwht_raw_frame *frm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) struct fwht_raw_frame *ref_frm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) struct fwht_cframe *cf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) bool is_intra, bool next_is_intra,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) unsigned int width, unsigned int height,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) unsigned int stride, unsigned int chroma_stride)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) unsigned int size = height * width;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) __be16 *rlco = cf->rlc_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) __be16 *rlco_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) u32 encoding;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) rlco_max = rlco + size / 2 - 256;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) height, width, stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) frm->luma_alpha_step, is_intra, next_is_intra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) if (encoding & FWHT_FRAME_UNENCODED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) encoding |= FWHT_LUMA_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) encoding &= ~FWHT_FRAME_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) if (frm->components_num >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) u32 chroma_h = height / frm->height_div;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) u32 chroma_w = width / frm->width_div;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) unsigned int chroma_size = chroma_h * chroma_w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) rlco_max = rlco + chroma_size / 2 - 256;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) cf, chroma_h, chroma_w,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) chroma_stride, frm->chroma_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) is_intra, next_is_intra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) if (encoding & FWHT_FRAME_UNENCODED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) encoding |= FWHT_CB_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) encoding &= ~FWHT_FRAME_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) rlco_max = rlco + chroma_size / 2 - 256;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) cf, chroma_h, chroma_w,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) chroma_stride, frm->chroma_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) is_intra, next_is_intra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) if (encoding & FWHT_FRAME_UNENCODED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) encoding |= FWHT_CR_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) encoding &= ~FWHT_FRAME_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) if (frm->components_num == 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) rlco_max = rlco + size / 2 - 256;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) encoding |= encode_plane(frm->alpha, ref_frm->alpha, &rlco,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) rlco_max, cf, height, width,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) stride, frm->luma_alpha_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) is_intra, next_is_intra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) if (encoding & FWHT_FRAME_UNENCODED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) encoding |= FWHT_ALPHA_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) encoding &= ~FWHT_FRAME_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) cf->size = (rlco - cf->rlc_data) * sizeof(*rlco);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) return encoding;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) u32 height, u32 width, const u8 *ref, u32 ref_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) unsigned int ref_step, u8 *dst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) unsigned int dst_stride, unsigned int dst_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) bool uncompressed, const __be16 *end_of_rlco_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) unsigned int copies = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) s16 copy[8 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) u16 stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) unsigned int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) bool is_intra = !ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) width = round_up(width, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) height = round_up(height, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) if (uncompressed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) if (end_of_rlco_buf + 1 < *rlco + width * height / 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) for (i = 0; i < height; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) memcpy(dst, *rlco, width);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) dst += dst_stride;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) *rlco += width / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) * When decoding each macroblock the rlco pointer will be increased
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) * by 65 * 2 bytes worst-case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) * To avoid overflow the buffer has to be 65/64th of the actual raw
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) * image size, just in case someone feeds it malicious data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) for (j = 0; j < height / 8; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) for (i = 0; i < width / 8; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) const u8 *refp = ref + j * 8 * ref_stride +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) i * 8 * ref_step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) if (copies) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) memcpy(cf->de_fwht, copy, sizeof(copy));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) if ((stat & PFRAME_BIT) && !is_intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) add_deltas(cf->de_fwht, refp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) ref_stride, ref_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) fill_decoder_block(dstp, cf->de_fwht,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) dst_stride, dst_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) copies--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) stat = derlc(rlco, cf->coeffs, end_of_rlco_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) if (stat & OVERFLOW_BIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) if ((stat & PFRAME_BIT) && !is_intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) dequantize_inter(cf->coeffs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) dequantize_intra(cf->coeffs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) ifwht(cf->coeffs, cf->de_fwht,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) ((stat & PFRAME_BIT) && !is_intra) ? 0 : 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) copies = (stat & DUPS_MASK) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) if (copies)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) memcpy(copy, cf->de_fwht, sizeof(copy));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) if ((stat & PFRAME_BIT) && !is_intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) add_deltas(cf->de_fwht, refp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) ref_stride, ref_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) fill_decoder_block(dstp, cf->de_fwht, dst_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) dst_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) bool fwht_decode_frame(struct fwht_cframe *cf, u32 hdr_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) unsigned int components_num, unsigned int width,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) unsigned int height, const struct fwht_raw_frame *ref,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) unsigned int ref_stride, unsigned int ref_chroma_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) struct fwht_raw_frame *dst, unsigned int dst_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) unsigned int dst_chroma_stride)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) const __be16 *rlco = cf->rlc_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) const __be16 *end_of_rlco_buf = cf->rlc_data +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) (cf->size / sizeof(*rlco)) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) if (!decode_plane(cf, &rlco, height, width, ref->luma, ref_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) ref->luma_alpha_step, dst->luma, dst_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) dst->luma_alpha_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) end_of_rlco_buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) if (components_num >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) u32 h = height;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) u32 w = width;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) if (!(hdr_flags & FWHT_FL_CHROMA_FULL_HEIGHT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) h /= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) if (!(hdr_flags & FWHT_FL_CHROMA_FULL_WIDTH))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) w /= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) if (!decode_plane(cf, &rlco, h, w, ref->cb, ref_chroma_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) ref->chroma_step, dst->cb, dst_chroma_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) dst->chroma_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) end_of_rlco_buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) if (!decode_plane(cf, &rlco, h, w, ref->cr, ref_chroma_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) ref->chroma_step, dst->cr, dst_chroma_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) dst->chroma_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) end_of_rlco_buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) if (components_num == 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) if (!decode_plane(cf, &rlco, height, width, ref->alpha, ref_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) ref->luma_alpha_step, dst->alpha, dst_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) dst->luma_alpha_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) hdr_flags & FWHT_FL_ALPHA_IS_UNCOMPRESSED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) end_of_rlco_buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) }