Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: LGPL-2.1+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * Copyright 2016 Tom aan de Wiel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  * R.D. Brown, 1977
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) #include "codec-fwht.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) #define OVERFLOW_BIT BIT(14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19)  * Note: bit 0 of the header must always be 0. Otherwise it cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20)  * be guaranteed that the magic 8 byte sequence (see below) can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21)  * never occur in the rlc output.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) #define PFRAME_BIT BIT(15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) #define DUPS_MASK 0x1ffe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #define PBLOCK 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) #define IBLOCK 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) #define ALL_ZEROS 15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) static const uint8_t zigzag[64] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 	0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 	1,  8,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) 	2,  9, 16,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 	3, 10, 17, 24,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	4, 11, 18, 25, 32,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 	5, 12, 19, 26, 33, 40,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 	6, 13, 20, 27, 34, 41, 48,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 	7, 14, 21, 28, 35, 42, 49, 56,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 	15, 22, 29, 36, 43, 50, 57,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 	23, 30, 37, 44, 51, 58,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 	31, 38, 45, 52, 59,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 	39, 46, 53, 60,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	47, 54, 61,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 	55, 62,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	63,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50)  * noinline_for_stack to work around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51)  * https://bugs.llvm.org/show_bug.cgi?id=38809
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) static int noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) rlc(const s16 *in, __be16 *output, int blocktype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	s16 block[8 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 	s16 *wp = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 	int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	int x, y;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 	/* read in block from framebuffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	int lastzero_run = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	int to_encode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	for (y = 0; y < 8; y++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 		for (x = 0; x < 8; x++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 			*wp = in[x + y * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 			wp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 	/* keep track of amount of trailing zeros */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 	for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 		lastzero_run++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	*output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	ret++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 	to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 	while (i < to_encode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 		int cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 		int tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 		/* count leading zeros */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 		while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 			cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 			i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 			if (i == to_encode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 				cnt--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 		/* 4 bits for run, 12 for coefficient (quantization by 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 		*output++ = htons((cnt | tmp << 4));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 		i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 		ret++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 	if (lastzero_run > 14) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 		*output = htons(ALL_ZEROS | 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 		ret++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)  * This function will worst-case increase rlc_in by 65*2 bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)  * one s16 value for the header and 8 * 8 coefficients of type s16.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) static noinline_for_stack u16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) derlc(const __be16 **rlc_in, s16 *dwht_out, const __be16 *end_of_input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 	/* header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 	const __be16 *input = *rlc_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 	u16 stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 	int dec_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	s16 block[8 * 8 + 16];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	s16 *wp = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	if (input > end_of_input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 		return OVERFLOW_BIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	stat = ntohs(*input++);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	 * Now de-compress, it expands one byte to up to 15 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	 * (or fills the remainder of the 64 bytes with zeroes if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	 * is the last byte to expand).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 	 * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	 * allow for overflow if the incoming data was malformed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 	while (dec_count < 8 * 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 		s16 in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 		int length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 		int coeff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 		if (input > end_of_input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 			return OVERFLOW_BIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 		in = ntohs(*input++);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 		length = in & 0xf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 		coeff = in >> 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 		/* fill remainder with zeros */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 		if (length == 15) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 			for (i = 0; i < 64 - dec_count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 				*wp++ = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 		for (i = 0; i < length; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 			*wp++ = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 		*wp++ = coeff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 		dec_count += length + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	wp = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 	for (i = 0; i < 64; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 		int pos = zigzag[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 		int y = pos / 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 		int x = pos % 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 		dwht_out[x + y * 8] = *wp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 	*rlc_in = input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 	return stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) static const int quant_table[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 	2, 2, 2, 2, 2, 2,  2,  2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	2, 2, 2, 2, 2, 2,  2,  2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 	2, 2, 2, 2, 2, 2,  2,  3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	2, 2, 2, 2, 2, 2,  3,  6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	2, 2, 2, 2, 2, 3,  6,  6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 	2, 2, 2, 2, 3, 6,  6,  6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 	2, 2, 2, 3, 6, 6,  6,  6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 	2, 2, 3, 6, 6, 6,  6,  8,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) static const int quant_table_p[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 	3, 3, 3, 3, 3, 3,  3,  3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 	3, 3, 3, 3, 3, 3,  3,  3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 	3, 3, 3, 3, 3, 3,  3,  3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	3, 3, 3, 3, 3, 3,  3,  6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 	3, 3, 3, 3, 3, 3,  6,  6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	3, 3, 3, 3, 3, 6,  6,  9,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 	3, 3, 3, 3, 6, 6,  9,  9,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 	3, 3, 3, 6, 6, 9,  9,  10,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) static void quantize_intra(s16 *coeff, s16 *de_coeff, u16 qp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 	const int *quant = quant_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 	int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 	for (j = 0; j < 8; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 		for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 			*coeff >>= *quant;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 			if (*coeff >= -qp && *coeff <= qp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 				*coeff = *de_coeff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 				*de_coeff = *coeff << *quant;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) static void dequantize_intra(s16 *coeff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 	const int *quant = quant_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 	int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 	for (j = 0; j < 8; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 		for (i = 0; i < 8; i++, quant++, coeff++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 			*coeff <<= *quant;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) static void quantize_inter(s16 *coeff, s16 *de_coeff, u16 qp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 	const int *quant = quant_table_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 	for (j = 0; j < 8; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 		for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 			*coeff >>= *quant;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 			if (*coeff >= -qp && *coeff <= qp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 				*coeff = *de_coeff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 				*de_coeff = *coeff << *quant;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) static void dequantize_inter(s16 *coeff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 	const int *quant = quant_table_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 	for (j = 0; j < 8; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 		for (i = 0; i < 8; i++, quant++, coeff++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 			*coeff <<= *quant;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) static void noinline_for_stack fwht(const u8 *block, s16 *output_block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 				    unsigned int stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 				    unsigned int input_step, bool intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 	/* we'll need more than 8 bits for the transformed coefficients */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	s32 workspace1[8], workspace2[8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 	const u8 *tmp = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 	s16 *out = output_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 	int add = intra ? 256 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 	unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 	/* stage 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 	for (i = 0; i < 8; i++, tmp += stride, out += 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 		switch (input_step) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 		case 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 			workspace1[0]  = tmp[0] + tmp[1] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 			workspace1[1]  = tmp[0] - tmp[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 			workspace1[2]  = tmp[2] + tmp[3] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 			workspace1[3]  = tmp[2] - tmp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 			workspace1[4]  = tmp[4] + tmp[5] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 			workspace1[5]  = tmp[4] - tmp[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 			workspace1[6]  = tmp[6] + tmp[7] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 			workspace1[7]  = tmp[6] - tmp[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 		case 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 			workspace1[0]  = tmp[0] + tmp[2] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 			workspace1[1]  = tmp[0] - tmp[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 			workspace1[2]  = tmp[4] + tmp[6] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 			workspace1[3]  = tmp[4] - tmp[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 			workspace1[4]  = tmp[8] + tmp[10] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 			workspace1[5]  = tmp[8] - tmp[10];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 			workspace1[6]  = tmp[12] + tmp[14] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 			workspace1[7]  = tmp[12] - tmp[14];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 		case 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 			workspace1[0]  = tmp[0] + tmp[3] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 			workspace1[1]  = tmp[0] - tmp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 			workspace1[2]  = tmp[6] + tmp[9] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 			workspace1[3]  = tmp[6] - tmp[9];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 			workspace1[4]  = tmp[12] + tmp[15] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 			workspace1[5]  = tmp[12] - tmp[15];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 			workspace1[6]  = tmp[18] + tmp[21] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 			workspace1[7]  = tmp[18] - tmp[21];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 		default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 			workspace1[0]  = tmp[0] + tmp[4] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 			workspace1[1]  = tmp[0] - tmp[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 			workspace1[2]  = tmp[8] + tmp[12] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 			workspace1[3]  = tmp[8] - tmp[12];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 			workspace1[4]  = tmp[16] + tmp[20] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 			workspace1[5]  = tmp[16] - tmp[20];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 			workspace1[6]  = tmp[24] + tmp[28] - add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 			workspace1[7]  = tmp[24] - tmp[28];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 		/* stage 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 		workspace2[0] = workspace1[0] + workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 		workspace2[1] = workspace1[0] - workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 		workspace2[2] = workspace1[1] - workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 		workspace2[3] = workspace1[1] + workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 		workspace2[4] = workspace1[4] + workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 		workspace2[5] = workspace1[4] - workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 		workspace2[6] = workspace1[5] - workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 		workspace2[7] = workspace1[5] + workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 		/* stage 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 		out[0] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 		out[1] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 		out[2] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 		out[3] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 		out[4] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 		out[5] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 		out[6] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 		out[7] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 	out = output_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 	for (i = 0; i < 8; i++, out++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 		/* stage 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 		workspace1[0]  = out[0] + out[1 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 		workspace1[1]  = out[0] - out[1 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 		workspace1[2]  = out[2 * 8] + out[3 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 		workspace1[3]  = out[2 * 8] - out[3 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 		workspace1[4]  = out[4 * 8] + out[5 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 		workspace1[5]  = out[4 * 8] - out[5 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 		workspace1[6]  = out[6 * 8] + out[7 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 		workspace1[7]  = out[6 * 8] - out[7 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 		/* stage 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 		workspace2[0] = workspace1[0] + workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 		workspace2[1] = workspace1[0] - workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 		workspace2[2] = workspace1[1] - workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 		workspace2[3] = workspace1[1] + workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 		workspace2[4] = workspace1[4] + workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 		workspace2[5] = workspace1[4] - workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) 		workspace2[6] = workspace1[5] - workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 		workspace2[7] = workspace1[5] + workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 		/* stage 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 		out[0 * 8] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 		out[1 * 8] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 		out[2 * 8] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 		out[3 * 8] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 		out[4 * 8] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 		out[5 * 8] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 		out[6 * 8] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 		out[7 * 8] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)  * Not the nicest way of doing it, but P-blocks get twice the range of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)  * that of the I-blocks. Therefore we need a type bigger than 8 bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378)  * Furthermore values can be negative... This is just a version that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)  * works with 16 signed data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) static void noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) fwht16(const s16 *block, s16 *output_block, int stride, int intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 	/* we'll need more than 8 bits for the transformed coefficients */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 	s32 workspace1[8], workspace2[8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 	const s16 *tmp = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 	s16 *out = output_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 	for (i = 0; i < 8; i++, tmp += stride, out += 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 		/* stage 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 		workspace1[0]  = tmp[0] + tmp[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 		workspace1[1]  = tmp[0] - tmp[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 		workspace1[2]  = tmp[2] + tmp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 		workspace1[3]  = tmp[2] - tmp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 		workspace1[4]  = tmp[4] + tmp[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 		workspace1[5]  = tmp[4] - tmp[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 		workspace1[6]  = tmp[6] + tmp[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 		workspace1[7]  = tmp[6] - tmp[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 		/* stage 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 		workspace2[0] = workspace1[0] + workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 		workspace2[1] = workspace1[0] - workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 		workspace2[2] = workspace1[1] - workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 		workspace2[3] = workspace1[1] + workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) 		workspace2[4] = workspace1[4] + workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 		workspace2[5] = workspace1[4] - workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 		workspace2[6] = workspace1[5] - workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 		workspace2[7] = workspace1[5] + workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 		/* stage 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 		out[0] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 		out[1] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 		out[2] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 		out[3] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 		out[4] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 		out[5] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 		out[6] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 		out[7] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 	out = output_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 	for (i = 0; i < 8; i++, out++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 		/* stage 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 		workspace1[0]  = out[0] + out[1*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 		workspace1[1]  = out[0] - out[1*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 		workspace1[2]  = out[2*8] + out[3*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 		workspace1[3]  = out[2*8] - out[3*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 		workspace1[4]  = out[4*8] + out[5*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 		workspace1[5]  = out[4*8] - out[5*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 		workspace1[6]  = out[6*8] + out[7*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 		workspace1[7]  = out[6*8] - out[7*8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) 		/* stage 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) 		workspace2[0] = workspace1[0] + workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) 		workspace2[1] = workspace1[0] - workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 		workspace2[2] = workspace1[1] - workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 		workspace2[3] = workspace1[1] + workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) 		workspace2[4] = workspace1[4] + workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 		workspace2[5] = workspace1[4] - workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 		workspace2[6] = workspace1[5] - workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) 		workspace2[7] = workspace1[5] + workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 		/* stage 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 		out[0*8] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 		out[1*8] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 		out[2*8] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 		out[3*8] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) 		out[4*8] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 		out[5*8] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) 		out[6*8] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 		out[7*8] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) static noinline_for_stack void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) ifwht(const s16 *block, s16 *output_block, int intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) 	 * we'll need more than 8 bits for the transformed coefficients
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 	 * use native unit of cpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) 	int workspace1[8], workspace2[8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) 	int inter = intra ? 0 : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) 	const s16 *tmp = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) 	s16 *out = output_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) 	for (i = 0; i < 8; i++, tmp += 8, out += 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) 		/* stage 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) 		workspace1[0]  = tmp[0] + tmp[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) 		workspace1[1]  = tmp[0] - tmp[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) 		workspace1[2]  = tmp[2] + tmp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) 		workspace1[3]  = tmp[2] - tmp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) 		workspace1[4]  = tmp[4] + tmp[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) 		workspace1[5]  = tmp[4] - tmp[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) 		workspace1[6]  = tmp[6] + tmp[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) 		workspace1[7]  = tmp[6] - tmp[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) 		/* stage 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) 		workspace2[0] = workspace1[0] + workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) 		workspace2[1] = workspace1[0] - workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) 		workspace2[2] = workspace1[1] - workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) 		workspace2[3] = workspace1[1] + workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) 		workspace2[4] = workspace1[4] + workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) 		workspace2[5] = workspace1[4] - workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) 		workspace2[6] = workspace1[5] - workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) 		workspace2[7] = workspace1[5] + workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) 		/* stage 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 		out[0] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) 		out[1] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) 		out[2] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) 		out[3] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) 		out[4] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) 		out[5] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) 		out[6] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) 		out[7] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) 	out = output_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) 	for (i = 0; i < 8; i++, out++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) 		/* stage 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) 		workspace1[0]  = out[0] + out[1 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) 		workspace1[1]  = out[0] - out[1 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) 		workspace1[2]  = out[2 * 8] + out[3 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) 		workspace1[3]  = out[2 * 8] - out[3 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) 		workspace1[4]  = out[4 * 8] + out[5 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) 		workspace1[5]  = out[4 * 8] - out[5 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) 		workspace1[6]  = out[6 * 8] + out[7 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) 		workspace1[7]  = out[6 * 8] - out[7 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) 		/* stage 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) 		workspace2[0] = workspace1[0] + workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) 		workspace2[1] = workspace1[0] - workspace1[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) 		workspace2[2] = workspace1[1] - workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) 		workspace2[3] = workspace1[1] + workspace1[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) 		workspace2[4] = workspace1[4] + workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) 		workspace2[5] = workspace1[4] - workspace1[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) 		workspace2[6] = workspace1[5] - workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) 		workspace2[7] = workspace1[5] + workspace1[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) 		/* stage 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) 		if (inter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) 			int d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) 			out[0 * 8] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) 			out[1 * 8] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) 			out[2 * 8] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) 			out[3 * 8] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) 			out[4 * 8] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) 			out[5 * 8] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) 			out[6 * 8] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) 			out[7 * 8] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) 			for (d = 0; d < 8; d++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) 				out[8 * d] >>= 6;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) 			int d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) 			out[0 * 8] = workspace2[0] + workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) 			out[1 * 8] = workspace2[0] - workspace2[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) 			out[2 * 8] = workspace2[1] - workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) 			out[3 * 8] = workspace2[1] + workspace2[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) 			out[4 * 8] = workspace2[2] + workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) 			out[5 * 8] = workspace2[2] - workspace2[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) 			out[6 * 8] = workspace2[3] - workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) 			out[7 * 8] = workspace2[3] + workspace2[7];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) 			for (d = 0; d < 8; d++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) 				out[8 * d] >>= 6;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) 				out[8 * d] += 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) static void fill_encoder_block(const u8 *input, s16 *dst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) 			       unsigned int stride, unsigned int input_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) 	int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) 	for (i = 0; i < 8; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) 		for (j = 0; j < 8; j++, input += input_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) 			*dst++ = *input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) 		input += stride - 8 * input_step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) static int var_intra(const s16 *input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) 	int32_t mean = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) 	int32_t ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) 	const s16 *tmp = input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) 	for (i = 0; i < 8 * 8; i++, tmp++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) 		mean += *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) 	mean /= 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) 	tmp = input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) 	for (i = 0; i < 8 * 8; i++, tmp++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) 		ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) static int var_inter(const s16 *old, const s16 *new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) 	int32_t ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) 	for (i = 0; i < 8 * 8; i++, old++, new++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) 		ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) decide_blocktype(const u8 *cur, const u8 *reference, s16 *deltablock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) 		 unsigned int stride, unsigned int input_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) 	s16 tmp[64];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) 	s16 old[64];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) 	s16 *work = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) 	unsigned int k, l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) 	int vari;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) 	int vard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) 	fill_encoder_block(cur, tmp, stride, input_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) 	fill_encoder_block(reference, old, 8, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) 	vari = var_intra(tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) 	for (k = 0; k < 8; k++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) 		for (l = 0; l < 8; l++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) 			*deltablock = *work - *reference;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) 			deltablock++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) 			work++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) 			reference++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) 	deltablock -= 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) 	vard = var_inter(old, tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) 	return vari <= vard ? IBLOCK : PBLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) static void fill_decoder_block(u8 *dst, const s16 *input, int stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) 			       unsigned int dst_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) 	int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) 	for (i = 0; i < 8; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) 		for (j = 0; j < 8; j++, input++, dst += dst_step) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) 			if (*input < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) 				*dst = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) 			else if (*input > 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) 				*dst = 255;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) 				*dst = *input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) 		dst += stride - (8 * dst_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) static void add_deltas(s16 *deltas, const u8 *ref, int stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) 		       unsigned int ref_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) 	int k, l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) 	for (k = 0; k < 8; k++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) 		for (l = 0; l < 8; l++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) 			*deltas += *ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) 			ref += ref_step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) 			 * Due to quantizing, it might possible that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) 			 * decoded coefficients are slightly out of range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) 			if (*deltas < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) 				*deltas = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) 			else if (*deltas > 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) 				*deltas = 255;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) 			deltas++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) 		ref += stride - (8 * ref_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) 			struct fwht_cframe *cf, u32 height, u32 width,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) 			u32 stride, unsigned int input_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) 			bool is_intra, bool next_is_intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) 	u8 *input_start = input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) 	__be16 *rlco_start = *rlco;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) 	s16 deltablock[64];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) 	__be16 pframe_bit = htons(PFRAME_BIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) 	u32 encoding = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) 	unsigned int last_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) 	unsigned int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) 	width = round_up(width, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) 	height = round_up(height, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) 	for (j = 0; j < height / 8; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) 		input = input_start + j * 8 * stride;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) 		for (i = 0; i < width / 8; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) 			/* intra code, first frame is always intra coded. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) 			int blocktype = IBLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) 			unsigned int size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) 			if (!is_intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) 				blocktype = decide_blocktype(input, refp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) 					deltablock, stride, input_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) 			if (blocktype == IBLOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) 				fwht(input, cf->coeffs, stride, input_step, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) 				quantize_intra(cf->coeffs, cf->de_coeffs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) 					       cf->i_frame_qp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) 				/* inter code */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) 				encoding |= FWHT_FRAME_PCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) 				fwht16(deltablock, cf->coeffs, 8, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) 				quantize_inter(cf->coeffs, cf->de_coeffs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) 					       cf->p_frame_qp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) 			if (!next_is_intra) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) 				ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) 				if (blocktype == PBLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) 					add_deltas(cf->de_fwht, refp, 8, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) 				fill_decoder_block(refp, cf->de_fwht, 8, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) 			input += 8 * input_step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) 			refp += 8 * 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) 			size = rlc(cf->coeffs, *rlco, blocktype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) 			if (last_size == size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) 			    !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) 				__be16 *last_rlco = *rlco - size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) 				s16 hdr = ntohs(*last_rlco);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) 				if (!((*last_rlco ^ **rlco) & pframe_bit) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) 				    (hdr & DUPS_MASK) < DUPS_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) 					*last_rlco = htons(hdr + 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) 				else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) 					*rlco += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) 				*rlco += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) 			if (*rlco >= rlco_max) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) 				encoding |= FWHT_FRAME_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) 				goto exit_loop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) 			last_size = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) exit_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) 	if (encoding & FWHT_FRAME_UNENCODED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) 		u8 *out = (u8 *)rlco_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) 		u8 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) 		input = input_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) 		 * The compressed stream should never contain the magic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) 		 * header, so when we copy the YUV data we replace 0xff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) 		 * by 0xfe. Since YUV is limited range such values
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) 		 * shouldn't appear anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) 		for (j = 0; j < height; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) 			for (i = 0, p = input; i < width; i++, p += input_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) 				*out++ = (*p == 0xff) ? 0xfe : *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) 			input += stride;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) 		*rlco = (__be16 *)out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) 		encoding &= ~FWHT_FRAME_PCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) 	return encoding;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) u32 fwht_encode_frame(struct fwht_raw_frame *frm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) 		      struct fwht_raw_frame *ref_frm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) 		      struct fwht_cframe *cf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) 		      bool is_intra, bool next_is_intra,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) 		      unsigned int width, unsigned int height,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) 		      unsigned int stride, unsigned int chroma_stride)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) 	unsigned int size = height * width;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) 	__be16 *rlco = cf->rlc_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) 	__be16 *rlco_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) 	u32 encoding;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) 	rlco_max = rlco + size / 2 - 256;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) 	encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) 				height, width, stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) 				frm->luma_alpha_step, is_intra, next_is_intra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) 	if (encoding & FWHT_FRAME_UNENCODED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) 		encoding |= FWHT_LUMA_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) 	encoding &= ~FWHT_FRAME_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) 	if (frm->components_num >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) 		u32 chroma_h = height / frm->height_div;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) 		u32 chroma_w = width / frm->width_div;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) 		unsigned int chroma_size = chroma_h * chroma_w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) 		rlco_max = rlco + chroma_size / 2 - 256;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) 		encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) 					 cf, chroma_h, chroma_w,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) 					 chroma_stride, frm->chroma_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) 					 is_intra, next_is_intra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) 		if (encoding & FWHT_FRAME_UNENCODED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) 			encoding |= FWHT_CB_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) 		encoding &= ~FWHT_FRAME_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) 		rlco_max = rlco + chroma_size / 2 - 256;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) 		encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) 					 cf, chroma_h, chroma_w,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) 					 chroma_stride, frm->chroma_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) 					 is_intra, next_is_intra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) 		if (encoding & FWHT_FRAME_UNENCODED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) 			encoding |= FWHT_CR_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) 		encoding &= ~FWHT_FRAME_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) 	if (frm->components_num == 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) 		rlco_max = rlco + size / 2 - 256;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) 		encoding |= encode_plane(frm->alpha, ref_frm->alpha, &rlco,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) 					 rlco_max, cf, height, width,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) 					 stride, frm->luma_alpha_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) 					 is_intra, next_is_intra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) 		if (encoding & FWHT_FRAME_UNENCODED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) 			encoding |= FWHT_ALPHA_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) 		encoding &= ~FWHT_FRAME_UNENCODED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) 	cf->size = (rlco - cf->rlc_data) * sizeof(*rlco);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) 	return encoding;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) 			 u32 height, u32 width, const u8 *ref, u32 ref_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) 			 unsigned int ref_step, u8 *dst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) 			 unsigned int dst_stride, unsigned int dst_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) 			 bool uncompressed, const __be16 *end_of_rlco_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) 	unsigned int copies = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) 	s16 copy[8 * 8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) 	u16 stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) 	unsigned int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) 	bool is_intra = !ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) 	width = round_up(width, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) 	height = round_up(height, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) 	if (uncompressed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) 		int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) 		if (end_of_rlco_buf + 1 < *rlco + width * height / 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) 		for (i = 0; i < height; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) 			memcpy(dst, *rlco, width);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) 			dst += dst_stride;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) 			*rlco += width / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) 	 * When decoding each macroblock the rlco pointer will be increased
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) 	 * by 65 * 2 bytes worst-case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) 	 * To avoid overflow the buffer has to be 65/64th of the actual raw
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) 	 * image size, just in case someone feeds it malicious data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) 	for (j = 0; j < height / 8; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) 		for (i = 0; i < width / 8; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) 			const u8 *refp = ref + j * 8 * ref_stride +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) 				i * 8 * ref_step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) 			u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) 			if (copies) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) 				memcpy(cf->de_fwht, copy, sizeof(copy));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) 				if ((stat & PFRAME_BIT) && !is_intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) 					add_deltas(cf->de_fwht, refp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) 						   ref_stride, ref_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) 				fill_decoder_block(dstp, cf->de_fwht,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) 						   dst_stride, dst_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) 				copies--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) 			stat = derlc(rlco, cf->coeffs, end_of_rlco_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) 			if (stat & OVERFLOW_BIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) 				return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) 			if ((stat & PFRAME_BIT) && !is_intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) 				dequantize_inter(cf->coeffs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) 				dequantize_intra(cf->coeffs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) 			ifwht(cf->coeffs, cf->de_fwht,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) 			      ((stat & PFRAME_BIT) && !is_intra) ? 0 : 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) 			copies = (stat & DUPS_MASK) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) 			if (copies)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) 				memcpy(copy, cf->de_fwht, sizeof(copy));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) 			if ((stat & PFRAME_BIT) && !is_intra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) 				add_deltas(cf->de_fwht, refp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) 					   ref_stride, ref_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) 			fill_decoder_block(dstp, cf->de_fwht, dst_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) 					   dst_step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) bool fwht_decode_frame(struct fwht_cframe *cf, u32 hdr_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) 		       unsigned int components_num, unsigned int width,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) 		       unsigned int height, const struct fwht_raw_frame *ref,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) 		       unsigned int ref_stride, unsigned int ref_chroma_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) 		       struct fwht_raw_frame *dst, unsigned int dst_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) 		       unsigned int dst_chroma_stride)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) 	const __be16 *rlco = cf->rlc_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) 	const __be16 *end_of_rlco_buf = cf->rlc_data +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) 			(cf->size / sizeof(*rlco)) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) 	if (!decode_plane(cf, &rlco, height, width, ref->luma, ref_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) 			  ref->luma_alpha_step, dst->luma, dst_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) 			  dst->luma_alpha_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) 			  hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) 			  end_of_rlco_buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) 	if (components_num >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) 		u32 h = height;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) 		u32 w = width;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) 		if (!(hdr_flags & FWHT_FL_CHROMA_FULL_HEIGHT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) 			h /= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) 		if (!(hdr_flags & FWHT_FL_CHROMA_FULL_WIDTH))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) 			w /= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) 		if (!decode_plane(cf, &rlco, h, w, ref->cb, ref_chroma_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) 				  ref->chroma_step, dst->cb, dst_chroma_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) 				  dst->chroma_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) 				  hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) 				  end_of_rlco_buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) 		if (!decode_plane(cf, &rlco, h, w, ref->cr, ref_chroma_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) 				  ref->chroma_step, dst->cr, dst_chroma_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) 				  dst->chroma_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) 				  hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) 				  end_of_rlco_buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) 	if (components_num == 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) 		if (!decode_plane(cf, &rlco, height, width, ref->alpha, ref_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) 				  ref->luma_alpha_step, dst->alpha, dst_stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) 				  dst->luma_alpha_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) 				  hdr_flags & FWHT_FL_ALPHA_IS_UNCOMPRESSED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) 				  end_of_rlco_buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) }