^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) /* This is optimized primarily for the ARC700.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) It would be possible to speed up the loops by one cycle / word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) respective one cycle / byte by forcing double source 1 alignment, unrolling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) by a factor of two, and speculatively loading the second word / byte of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) source 1; however, that would increase the overhead for loop setup / finish,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) and strcmp might often terminate early. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) ENTRY_CFI(strcmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) or r2,r0,r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) bmsk_s r2,r2,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) brne r2,0,.Lcharloop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) mov_s r12,0x01010101
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) ror r5,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) .Lwordloop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) ld.ab r2,[r0,4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) ld.ab r3,[r1,4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) nop_s
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) sub r4,r2,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) bic r4,r4,r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) and r4,r4,r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) brne r4,0,.Lfound0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) breq r2,r3,.Lwordloop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #ifdef __LITTLE_ENDIAN__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) xor r0,r2,r3 ; mask for difference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) sub_s r1,r0,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) bic_s r0,r0,r1 ; mask for least significant difference bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) sub r1,r5,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) xor r0,r5,r1 ; mask for least significant difference byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) and_s r2,r2,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) and_s r3,r3,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #endif /* LITTLE ENDIAN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) cmp_s r2,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) mov_s r0,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) j_s.d [blink]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) bset.lo r0,r0,31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) .balign 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #ifdef __LITTLE_ENDIAN__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) .Lfound0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) xor r0,r2,r3 ; mask for difference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) or r0,r0,r4 ; or in zero indicator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) sub_s r1,r0,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) bic_s r0,r0,r1 ; mask for least significant difference bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) sub r1,r5,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) xor r0,r5,r1 ; mask for least significant difference byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) and_s r2,r2,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) and_s r3,r3,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) sub.f r0,r2,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) mov.hi r0,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) j_s.d [blink]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) bset.lo r0,r0,31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #else /* BIG ENDIAN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) /* The zero-detection above can mis-detect 0x01 bytes as zeroes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) because of carry-propagateion from a lower significant zero byte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) We can compensate for this by checking that bit0 is zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) This compensation is not necessary in the step where we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) get a low estimate for r2, because in any affected bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) we already have 0x00 or 0x01, which will remain unchanged
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) when bit 7 is cleared. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) .balign 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) .Lfound0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) lsr r0,r4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) lsr_s r1,r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) bic_s r2,r2,r0 ; get low estimate for r2 and get ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) cmp_s r3,r2 ; ... be independent of trailing garbage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) or_s r2,r2,r0 ; likewise for r3 > r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) bic_s r3,r3,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) rlc r0,0 ; r0 := r2 > r3 ? 1 : 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) cmp_s r2,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) j_s.d [blink]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) bset.lo r0,r0,31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #endif /* ENDIAN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) .balign 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) .Lcharloop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) ldb.ab r2,[r0,1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) ldb.ab r3,[r1,1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) nop_s
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) breq r2,0,.Lcmpend
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) breq r2,r3,.Lcharloop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) .Lcmpend:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) j_s.d [blink]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) sub r0,r2,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) END_CFI(strcmp)