^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * strlen() for PPC32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Inspired from glibc implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <asm/ppc_asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <asm/cache.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * Algorithm:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * 1) Given a word 'x', we can test to see if it contains any 0 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * by subtracting 0x01010101, and seeing if any of the high bits of each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * byte changed from 0 to 1. This works because the least significant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * 0 byte must have had no incoming carry (otherwise it's not the least
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * significant), so it is 0x00 - 0x01 == 0xff. For all other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * byte values, either they have the high bit set initially, or when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * 1 is subtracted you get a value in the range 0x00-0x7f, none of which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * have their high bit set. The expression here is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * there were no 0x00 bytes in the word. You get 0x80 in bytes that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * match, but possibly false 0x80 matches in the next more significant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * byte to a true match due to carries. For little-endian this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * of no consequence since the least significant match is the one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * we're interested in, but big-endian needs method 2 to find which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * byte matches.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * 2) Given a word 'x', we can test to see _which_ byte was zero by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * This produces 0x80 in each byte that was zero, and 0x00 in all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * the other bytes. The '| ~0x80808080' clears the low 7 bits in each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * byte, and the '| x' part ensures that bytes with the high bit set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * produce 0x00. The addition will carry into the high bit of each byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * iff that byte had one of its low 7 bits set. We can then just see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * which was the most significant bit set and divide by 8 to find how
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * many to add to the index.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * This is from the book 'The PowerPC Compiler Writer's Guide',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) _GLOBAL(strlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) andi. r0, r3, 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) lis r7, 0x0101
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) addi r10, r3, -4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) addic r7, r7, 0x0101 /* r7 = 0x01010101 (lomagic) & clear XER[CA] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) rotlwi r6, r7, 31 /* r6 = 0x80808080 (himagic) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) bne- 3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) .balign IFETCH_ALIGN_BYTES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) 1: lwzu r9, 4(r10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) 2: subf r8, r7, r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) and. r8, r8, r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) beq+ 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) andc. r8, r8, r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) beq+ 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) andc r8, r9, r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) orc r9, r9, r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) subfe r8, r6, r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) nor r8, r8, r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) cntlzw r8, r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) subf r3, r3, r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) srwi r8, r8, 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) add r3, r3, r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) /* Missaligned string: make sure bytes before string are seen not 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) 3: xor r10, r10, r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) orc r8, r8, r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) lwzu r9, 4(r10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) slwi r0, r0, 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) srw r8, r8, r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) orc r9, r9, r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) b 2b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) EXPORT_SYMBOL(strlen)