^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2013 ARM Ltd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2013 Linaro.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * This code is based on glibc cortex strings work originally authored by Linaro
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * be found @
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * files/head:/src/aarch64/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <asm/assembler.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * compare two strings
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * Parameters:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * x0 - const string 1 pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * x1 - const string 2 pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * x2 - the maximal length to be compared
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * Returns:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * x0 - an integer less than, equal to, or greater than zero if s1 is found,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * respectively, to be less than, to match, or be greater than s2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #define REP8_01 0x0101010101010101
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define REP8_7f 0x7f7f7f7f7f7f7f7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #define REP8_80 0x8080808080808080
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) /* Parameters and result. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) src1 .req x0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) src2 .req x1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) limit .req x2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) result .req x0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) /* Internal variables. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) data1 .req x3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) data1w .req w3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) data2 .req x4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) data2w .req w4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) has_nul .req x5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) diff .req x6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) syndrome .req x7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) tmp1 .req x8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) tmp2 .req x9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) tmp3 .req x10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) zeroones .req x11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) pos .req x12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) limit_wd .req x13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) mask .req x14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) endloop .req x15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) SYM_FUNC_START_WEAK_PI(strncmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) cbz limit, .Lret0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) eor tmp1, src1, src2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) mov zeroones, #REP8_01
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) tst tmp1, #7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) b.ne .Lmisaligned8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) ands tmp1, src1, #7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) b.ne .Lmutual_align
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /* Calculate the number of full and partial words -1. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * when limit is mulitply of 8, if not sub 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * the judgement of last dword will wrong.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * NUL detection works on the principle that (X - 1) & (~X) & 0x80
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * can be done in parallel across the entire word.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) .Lloop_aligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) ldr data1, [src1], #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) ldr data2, [src2], #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) .Lstart_realigned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) subs limit_wd, limit_wd, #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) sub tmp1, data1, zeroones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) orr tmp2, data1, #REP8_7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) eor diff, data1, data2 /* Non-zero if differences found. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) csinv endloop, diff, xzr, pl /* Last Dword or differences.*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) ccmp endloop, #0, #0, eq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) b.eq .Lloop_aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) /*Not reached the limit, must have found the end or a diff. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) tbz limit_wd, #63, .Lnot_limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) /* Limit % 8 == 0 => all bytes significant. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) ands limit, limit, #7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) b.eq .Lnot_limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) lsl limit, limit, #3 /* Bits -> bytes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) mov mask, #~0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) CPU_BE( lsr mask, mask, limit )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) CPU_LE( lsl mask, mask, limit )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) bic data1, data1, mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) bic data2, data2, mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) /* Make sure that the NUL byte is marked in the syndrome. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) orr has_nul, has_nul, mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) .Lnot_limit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) orr syndrome, diff, has_nul
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) b .Lcal_cmpresult
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) .Lmutual_align:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * Sources are mutually aligned, but are not currently at an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * alignment boundary. Round down the addresses and then mask off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * the bytes that precede the start point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * We also need to adjust the limit calculations, but without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * overflowing if the limit is near ULONG_MAX.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) bic src1, src1, #7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) bic src2, src2, #7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) ldr data1, [src1], #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) ldr data2, [src2], #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) mov tmp2, #~0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) /* Big-endian. Early bytes are at MSB. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) CPU_BE( lsl tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) /* Little-endian. Early bytes are at LSB. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) CPU_LE( lsr tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) and tmp3, limit_wd, #7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) lsr limit_wd, limit_wd, #3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) /* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) add limit, limit, tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) add tmp3, tmp3, tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) orr data1, data1, tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) orr data2, data2, tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) add limit_wd, limit_wd, tmp3, lsr #3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) b .Lstart_realigned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) /*when src1 offset is not equal to src2 offset...*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) .Lmisaligned8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) cmp limit, #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) b.lo .Ltiny8proc /*limit < 8... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * Get the align offset length to compare per byte first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * After this process, one string's address will be aligned.*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) and tmp1, src1, #7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) neg tmp1, tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) add tmp1, tmp1, #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) and tmp2, src2, #7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) neg tmp2, tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) add tmp2, tmp2, #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) subs tmp3, tmp1, tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) csel pos, tmp1, tmp2, hi /*Choose the maximum. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * Here, limit is not less than 8, so directly run .Ltinycmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * without checking the limit.*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) sub limit, limit, pos
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) .Ltinycmp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) ldrb data1w, [src1], #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) ldrb data2w, [src2], #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) subs pos, pos, #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) b.eq .Ltinycmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) cbnz pos, 1f /*find the null or unequal...*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) cmp data1w, #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) ccmp data1w, data2w, #0, cs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) b.eq .Lstart_align /*the last bytes are equal....*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) sub result, data1, data2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) .Lstart_align:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) lsr limit_wd, limit, #3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) cbz limit_wd, .Lremain8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) /*process more leading bytes to make str1 aligned...*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) ands xzr, src1, #7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) b.eq .Lrecal_offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) add src1, src1, tmp3 /*tmp3 is positive in this branch.*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) add src2, src2, tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) ldr data1, [src1], #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) ldr data2, [src2], #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) sub limit, limit, tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) lsr limit_wd, limit, #3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) subs limit_wd, limit_wd, #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) sub tmp1, data1, zeroones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) orr tmp2, data1, #REP8_7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) eor diff, data1, data2 /* Non-zero if differences found. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) bics has_nul, tmp1, tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) b.ne .Lunequal_proc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) /*How far is the current str2 from the alignment boundary...*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) and tmp3, tmp3, #7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) .Lrecal_offset:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) neg pos, tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) .Lloopcmp_proc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * Divide the eight bytes into two parts. First,backwards the src2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * to an alignment boundary,load eight bytes from the SRC2 alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * boundary,then compare with the relative bytes from SRC1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * If all 8 bytes are equal,then start the second part's comparison.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * Otherwise finish the comparison.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) * This special handle can garantee all the accesses are in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * thread/task space in avoid to overrange access.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) ldr data1, [src1,pos]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) ldr data2, [src2,pos]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) sub tmp1, data1, zeroones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) orr tmp2, data1, #REP8_7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) eor diff, data1, data2 /* Non-zero if differences found. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) csinv endloop, diff, xzr, eq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) cbnz endloop, .Lunequal_proc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) /*The second part process*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) ldr data1, [src1], #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) ldr data2, [src2], #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) subs limit_wd, limit_wd, #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) sub tmp1, data1, zeroones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) orr tmp2, data1, #REP8_7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) eor diff, data1, data2 /* Non-zero if differences found. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) bics has_nul, tmp1, tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) b.eq .Lloopcmp_proc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) .Lunequal_proc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) orr syndrome, diff, has_nul
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) cbz syndrome, .Lremain8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) .Lcal_cmpresult:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * reversed the byte-order as big-endian,then CLZ can find the most
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * significant zero bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) CPU_LE( rev syndrome, syndrome )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) CPU_LE( rev data1, data1 )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) CPU_LE( rev data2, data2 )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * For big-endian we cannot use the trick with the syndrome value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * as carry-propagation can corrupt the upper bits if the trailing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * bytes in the string contain 0x01.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * However, if there is no NUL byte in the dword, we can generate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * the result directly. We can't just subtract the bytes as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * MSB might be significant.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) CPU_BE( cbnz has_nul, 1f )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) CPU_BE( cmp data1, data2 )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) CPU_BE( cset result, ne )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) CPU_BE( cneg result, result, lo )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) CPU_BE( ret )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) CPU_BE( 1: )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) /* Re-compute the NUL-byte detection, using a byte-reversed value.*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) CPU_BE( rev tmp3, data1 )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) CPU_BE( sub tmp1, tmp3, zeroones )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) CPU_BE( orr tmp2, tmp3, #REP8_7f )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) CPU_BE( bic has_nul, tmp1, tmp2 )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) CPU_BE( rev has_nul, has_nul )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) CPU_BE( orr syndrome, diff, has_nul )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * The MS-non-zero bit of the syndrome marks either the first bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * that is different, or the top bit of the first zero byte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * Shifting left now will bring the critical information into the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * top bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) clz pos, syndrome
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) lsl data1, data1, pos
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) lsl data2, data2, pos
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * But we need to zero-extend (char is unsigned) the value and then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * perform a signed 32-bit subtraction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) lsr data1, data1, #56
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) sub result, data1, data2, lsr #56
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) .Lremain8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) /* Limit % 8 == 0 => all bytes significant. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) ands limit, limit, #7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) b.eq .Lret0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) .Ltiny8proc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) ldrb data1w, [src1], #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) ldrb data2w, [src2], #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) subs limit, limit, #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) b.eq .Ltiny8proc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) sub result, data1, data2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) .Lret0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) mov result, #0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) SYM_FUNC_END_PI(strncmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) EXPORT_SYMBOL_NOKASAN(strncmp)