^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2013 ARM Ltd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2013 Linaro.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * This code is based on glibc cortex strings work originally authored by Linaro
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * be found @
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * files/head:/src/aarch64/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <asm/assembler.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * calculate the length of a string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * Parameters:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * x0 - const string pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * Returns:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * x0 - the return length of specific string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) /* Arguments and results. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) srcin .req x0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) len .req x0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) /* Locals and temporaries. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) src .req x1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) data1 .req x2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) data2 .req x3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) data2a .req x4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) has_nul1 .req x5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) has_nul2 .req x6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) tmp1 .req x7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) tmp2 .req x8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) tmp3 .req x9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) tmp4 .req x10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) zeroones .req x11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) pos .req x12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #define REP8_01 0x0101010101010101
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #define REP8_7f 0x7f7f7f7f7f7f7f7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #define REP8_80 0x8080808080808080
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) SYM_FUNC_START_WEAK_PI(strlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) mov zeroones, #REP8_01
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) bic src, srcin, #15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) ands tmp1, srcin, #15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) b.ne .Lmisaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * NUL detection works on the principle that (X - 1) & (~X) & 0x80
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * can be done in parallel across the entire word.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * The inner loop deals with two Dwords at a time. This has a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * slightly higher start-up cost, but we should win quite quickly,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * especially on cores with a high number of issue slots per
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * cycle, as we get much better parallelism out of the operations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) .Lloop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) ldp data1, data2, [src], #16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) .Lrealigned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) sub tmp1, data1, zeroones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) orr tmp2, data1, #REP8_7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) sub tmp3, data2, zeroones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) orr tmp4, data2, #REP8_7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) bic has_nul1, tmp1, tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) bics has_nul2, tmp3, tmp4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) b.eq .Lloop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) sub len, src, srcin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) cbz has_nul1, .Lnul_in_data2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) CPU_BE( mov data2, data1 ) /*prepare data to re-calculate the syndrome*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) sub len, len, #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) mov has_nul2, has_nul1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) .Lnul_in_data2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * For big-endian, carry propagation (if the final byte in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * string is 0x01) means we cannot use has_nul directly. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * easiest way to get the correct byte is to byte-swap the data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * and calculate the syndrome a second time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) CPU_BE( rev data2, data2 )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) CPU_BE( sub tmp1, data2, zeroones )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) CPU_BE( orr tmp2, data2, #REP8_7f )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) CPU_BE( bic has_nul2, tmp1, tmp2 )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) sub len, len, #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) rev has_nul2, has_nul2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) clz pos, has_nul2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) add len, len, pos, lsr #3 /* Bits to bytes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) .Lmisaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) cmp tmp1, #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) neg tmp1, tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) ldp data1, data2, [src], #16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) mov tmp2, #~0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) /* Big-endian. Early bytes are at MSB. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) /* Little-endian. Early bytes are at LSB. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) orr data1, data1, tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) orr data2a, data2, tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) csinv data1, data1, xzr, le
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) csel data2, data2, data2a, le
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) b .Lrealigned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) SYM_FUNC_END_PI(strlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) EXPORT_SYMBOL_NOKASAN(strlen)