^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2013 ARM Ltd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2013 Linaro.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * This code is based on glibc cortex strings work originally authored by Linaro
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * be found @
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * files/head:/src/aarch64/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <asm/assembler.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * determine the length of a fixed-size string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * Parameters:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * x0 - const string pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * x1 - maximal string length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * Returns:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * x0 - the return length of specific string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) /* Arguments and results. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) srcin .req x0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) len .req x0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) limit .req x1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /* Locals and temporaries. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) src .req x2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) data1 .req x3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) data2 .req x4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) data2a .req x5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) has_nul1 .req x6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) has_nul2 .req x7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) tmp1 .req x8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) tmp2 .req x9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) tmp3 .req x10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) tmp4 .req x11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) zeroones .req x12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) pos .req x13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) limit_wd .req x14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #define REP8_01 0x0101010101010101
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #define REP8_7f 0x7f7f7f7f7f7f7f7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #define REP8_80 0x8080808080808080
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) SYM_FUNC_START_WEAK_PI(strnlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) cbz limit, .Lhit_limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) mov zeroones, #REP8_01
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) bic src, srcin, #15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) ands tmp1, srcin, #15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) b.ne .Lmisaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) /* Calculate the number of full and partial words -1. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * NUL detection works on the principle that (X - 1) & (~X) & 0x80
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * can be done in parallel across the entire word.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * The inner loop deals with two Dwords at a time. This has a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * slightly higher start-up cost, but we should win quite quickly,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * especially on cores with a high number of issue slots per
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * cycle, as we get much better parallelism out of the operations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) .Lloop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) ldp data1, data2, [src], #16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) .Lrealigned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) sub tmp1, data1, zeroones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) orr tmp2, data1, #REP8_7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) sub tmp3, data2, zeroones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) orr tmp4, data2, #REP8_7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) bic has_nul1, tmp1, tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) bic has_nul2, tmp3, tmp4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) subs limit_wd, limit_wd, #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) orr tmp1, has_nul1, has_nul2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) b.eq .Lloop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) cbz tmp1, .Lhit_limit /* No null in final Qword. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * We know there's a null in the final Qword. The easiest thing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * to do now is work out the length of the string and return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * MIN (len, limit).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) sub len, src, srcin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) cbz has_nul1, .Lnul_in_data2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) CPU_BE( mov data2, data1 ) /*perpare data to re-calculate the syndrome*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) sub len, len, #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) mov has_nul2, has_nul1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) .Lnul_in_data2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * For big-endian, carry propagation (if the final byte in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * string is 0x01) means we cannot use has_nul directly. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) * easiest way to get the correct byte is to byte-swap the data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * and calculate the syndrome a second time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) CPU_BE( rev data2, data2 )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) CPU_BE( sub tmp1, data2, zeroones )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) CPU_BE( orr tmp2, data2, #REP8_7f )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) CPU_BE( bic has_nul2, tmp1, tmp2 )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) sub len, len, #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) rev has_nul2, has_nul2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) clz pos, has_nul2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) add len, len, pos, lsr #3 /* Bits to bytes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) cmp len, limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) csel len, len, limit, ls /* Return the lower value. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) .Lmisaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * Deal with a partial first word.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * We're doing two things in parallel here;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * 1) Calculate the number of words (but avoiding overflow if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * limit is near ULONG_MAX) - to do this we need to work out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * limit + tmp1 - 1 as a 65-bit value before shifting it;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * 2) Load and mask the initial data words - we force the bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * before the ones we are interested in to 0xff - this ensures
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * early bytes will not hit any zero detection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) ldp data1, data2, [src], #16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) sub limit_wd, limit, #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) and tmp3, limit_wd, #15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) lsr limit_wd, limit_wd, #4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) add tmp3, tmp3, tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) add limit_wd, limit_wd, tmp3, lsr #4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) neg tmp4, tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) mov tmp2, #~0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) /* Big-endian. Early bytes are at MSB. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) CPU_BE( lsl tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /* Little-endian. Early bytes are at LSB. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) cmp tmp1, #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) orr data1, data1, tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) orr data2a, data2, tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) csinv data1, data1, xzr, le
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) csel data2, data2, data2a, le
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) b .Lrealigned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) .Lhit_limit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) mov len, limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) SYM_FUNC_END_PI(strnlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) EXPORT_SYMBOL_NOKASAN(strnlen)