^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #ifndef _ASM_HASH_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #define _ASM_HASH_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Fortunately, most people who want to run Linux on Microblaze enable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * both multiplier and barrel shifter, but omitting them is technically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * a supported configuration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * With just a barrel shifter, we can implement an efficient constant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * multiply using shifts and adds. GCC can find a 9-step solution, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * this 6-step solution was found by Yevgen Voronenko's implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * of the Hcub algorithm at http://spiral.ece.cmu.edu/mcm/gen.html.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * That software is really not designed for a single multiplier this large,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * but if you run it enough times with different seeds, it'll find several
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * 6-shift, 6-add sequences for computing x * 0x61C88647. They are all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * c = (x << 19) + x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * a = (x << 9) + c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * b = (x << 23) + a;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * return (a<<11) + (b<<6) + (c<<3) - b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * with variations on the order of the final add.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * Without even a shifter, it's hopless; any hash function will suck.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #if CONFIG_XILINX_MICROBLAZE0_USE_HW_MUL == 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define HAVE_ARCH__HASH_32 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /* Multiply by GOLDEN_RATIO_32 = 0x61C88647 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) static inline u32 __attribute_const__ __hash_32(u32 a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #if CONFIG_XILINX_MICROBLAZE0_USE_BARREL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) unsigned int b, c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) /* Phase 1: Compute three intermediate values */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) b = a << 23;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) c = (a << 19) + a;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) a = (a << 9) + c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) b += a;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) /* Phase 2: Compute (a << 11) + (b << 6) + (c << 3) - b */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) a <<= 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) a += b; /* (a << 5) + b */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) a <<= 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) a += c; /* (a << 8) + (b << 3) + c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) a <<= 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) return a - b; /* (a << 11) + (b << 6) + (c << 3) - b */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * "This is really going to hurt."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * Without a barrel shifter, left shifts are implemented as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * repeated additions, and the best we can do is an optimal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * addition-subtraction chain. This one is not known to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * optimal, but at 37 steps, it's decent for a 31-bit multiplier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * Question: given its size (37*4 = 148 bytes per instance),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * and slowness, is this worth having inline?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) unsigned int b, c, d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) b = a << 4; /* 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) c = b << 1; /* 1 5 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) b += a; /* 1 6 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) c += b; /* 1 7 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) c <<= 3; /* 3 10 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) c -= a; /* 1 11 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) d = c << 7; /* 7 18 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) d += b; /* 1 19 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) d <<= 8; /* 8 27 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) d += a; /* 1 28 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) d <<= 1; /* 1 29 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) d += b; /* 1 30 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) d <<= 6; /* 6 36 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) return d + c; /* 1 37 total instructions*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #endif /* !CONFIG_XILINX_MICROBLAZE0_USE_HW_MUL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #endif /* _ASM_HASH_H */