^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Finds length of a 0-terminated string. Optimized for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Alpha architecture:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * - memory accessed as aligned quadwords only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * - uses bcmpge to compare 8 bytes in parallel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * - does binary search to find 0 byte in last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * quadword (HAKMEM needed 12 instructions to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * do this instead of the 9 instructions that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * binary search needs).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) .set noreorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) .set noat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) .align 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) .globl strlen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) .ent strlen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) strlen:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) ldq_u $1, 0($16) # load first quadword ($16 may be misaligned)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) lda $2, -1($31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) insqh $2, $16, $2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) andnot $16, 7, $0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) or $2, $1, $1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) bne $2, found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) loop: ldq $1, 8($0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) addq $0, 8, $0 # addr += 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) nop # helps dual issue last two insns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) cmpbge $31, $1, $2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) beq $2, loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) found: blbs $2, done # make aligned case fast
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) negq $2, $3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) and $2, $3, $2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) and $2, 0x0f, $1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) addq $0, 4, $3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) cmoveq $1, $3, $0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) and $2, 0x33, $1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) addq $0, 2, $3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) cmoveq $1, $3, $0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) and $2, 0x55, $1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) addq $0, 1, $3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) cmoveq $1, $3, $0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) done: subq $0, $16, $0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) ret $31, ($26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) .end strlen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) EXPORT_SYMBOL(strlen)