^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) // Copyright (C) 2019-2020 Arm Ltd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/compiler.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/kasan-checks.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <net/checksum.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) /* Looks dumb, but generates nice-ish code */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) static u64 accumulate(u64 sum, u64 data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) __uint128_t tmp = (__uint128_t)sum + data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) return tmp + (tmp >> 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * We over-read the buffer and this makes KASAN unhappy. Instead, disable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * instrumentation and call kasan explicitly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) unsigned int offset, shift, sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) const u64 *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) u64 data, sum64 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) if (unlikely(len == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) offset = (unsigned long)buff & 7;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * This is to all intents and purposes safe, since rounding down cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * result in a different page or cache line being accessed, and @buff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * should absolutely not be pointing to anything read-sensitive. We do,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * however, have to be careful not to piss off KASAN, which means using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * unchecked reads to accommodate the head and tail, for which we'll
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * compensate with an explicit check up-front.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) kasan_check_read(buff, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) ptr = (u64 *)(buff - offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) len = len + offset - 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * Head: zero out any excess leading bytes. Shifting back by the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * amount should be at least as fast as any other way of handling the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * odd/even alignment, and means we can ignore it until the very end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) shift = offset * 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) data = *ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #ifdef __LITTLE_ENDIAN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) data = (data >> shift) << shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) data = (data << shift) >> shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * Body: straightforward aligned loads from here on (the paired loads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * underlying the quadword type still only need dword alignment). The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * main loop strictly excludes the tail, so the second loop will always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * run at least once.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) while (unlikely(len > 64)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) __uint128_t tmp1, tmp2, tmp3, tmp4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) tmp1 = *(__uint128_t *)ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) tmp2 = *(__uint128_t *)(ptr + 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) tmp3 = *(__uint128_t *)(ptr + 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) tmp4 = *(__uint128_t *)(ptr + 6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) len -= 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) ptr += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /* This is the "don't dump the carry flag into a GPR" idiom */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) tmp1 += (tmp1 >> 64) | (tmp1 << 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) tmp2 += (tmp2 >> 64) | (tmp2 << 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) tmp3 += (tmp3 >> 64) | (tmp3 << 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) tmp4 += (tmp4 >> 64) | (tmp4 << 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) tmp1 += (tmp1 >> 64) | (tmp1 << 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) tmp3 += (tmp3 >> 64) | (tmp3 << 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) tmp1 += (tmp1 >> 64) | (tmp1 << 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) tmp1 = ((tmp1 >> 64) << 64) | sum64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) tmp1 += (tmp1 >> 64) | (tmp1 << 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) sum64 = tmp1 >> 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) while (len > 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) __uint128_t tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) sum64 = accumulate(sum64, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) tmp = *(__uint128_t *)ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) len -= 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) ptr += 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) #ifdef __LITTLE_ENDIAN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) data = tmp >> 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) sum64 = accumulate(sum64, tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) data = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) sum64 = accumulate(sum64, tmp >> 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) sum64 = accumulate(sum64, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) data = *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) len -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * Tail: zero any over-read bytes similarly to the head, again
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * preserving odd/even alignment.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) shift = len * -8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) #ifdef __LITTLE_ENDIAN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) data = (data << shift) >> shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) data = (data >> shift) << shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) sum64 = accumulate(sum64, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) /* Finally, folding */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) sum64 += (sum64 >> 32) | (sum64 << 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) sum = sum64 >> 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) sum += (sum >> 16) | (sum << 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) if (offset & 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) return (u16)swab32(sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) return sum >> 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) const struct in6_addr *daddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) __u32 len, __u8 proto, __wsum csum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) __uint128_t src, dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) u64 sum = (__force u64)csum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) src = *(const __uint128_t *)saddr->s6_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) dst = *(const __uint128_t *)daddr->s6_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) sum += (__force u32)htonl(len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) #ifdef __LITTLE_ENDIAN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) sum += (u32)proto << 24;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) sum += proto;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) src += (src >> 64) | (src << 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) dst += (dst >> 64) | (dst << 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) sum = accumulate(sum, src >> 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) sum = accumulate(sum, dst >> 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) sum += ((sum >> 32) | (sum << 32));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) return csum_fold((__force __wsum)(sum >> 32));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) EXPORT_SYMBOL(csum_ipv6_magic);