^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * OpenRISC memset.S
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Hand-optimized assembler version of memset for OpenRISC.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Algorithm inspired by several other arch-specific memset routines
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * in the kernel tree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) .global memset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) .type memset, @function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) memset:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) /* arguments:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * r3 = *s
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * r4 = c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * r5 = n
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * r13, r15, r17, r19 used as temp regs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) /* Exit if n == 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) l.sfeqi r5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) l.bf 4f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) /* Truncate c to char */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) l.andi r13, r4, 0xff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) /* Skip word extension if c is 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) l.sfeqi r13, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) l.bf 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) /* Check for at least two whole words (8 bytes) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) l.sfleui r5, 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) /* Extend char c to 32-bit word cccc in r13 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) l.slli r15, r13, 16 // r13 = 000c, r15 = 0c00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) l.or r13, r13, r15 // r13 = 0c0c, r15 = 0c00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) l.slli r15, r13, 8 // r13 = 0c0c, r15 = c0c0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) l.or r13, r13, r15 // r13 = cccc, r15 = c0c0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) 1: l.addi r19, r3, 0 // Set r19 = src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) /* Jump to byte copy loop if less than two words */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) l.bf 3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) l.or r17, r5, r0 // Set r17 = n
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) /* Mask out two LSBs to check alignment */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) l.andi r15, r3, 0x3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /* lsb == 00, jump to word copy loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) l.sfeqi r15, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) l.bf 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) l.addi r19, r3, 0 // Set r19 = src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /* lsb == 01,10 or 11 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) l.sb 0(r3), r13 // *src = c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) l.addi r17, r17, -1 // Decrease n
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) l.sfeqi r15, 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) l.bf 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) l.addi r19, r3, 1 // src += 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) /* lsb == 01 or 10 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) l.sb 1(r3), r13 // *(src+1) = c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) l.addi r17, r17, -1 // Decrease n
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) l.sfeqi r15, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) l.bf 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) l.addi r19, r3, 2 // src += 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) /* lsb == 01 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) l.sb 2(r3), r13 // *(src+2) = c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) l.addi r17, r17, -1 // Decrease n
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) l.addi r19, r3, 3 // src += 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) /* Word copy loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) 2: l.sw 0(r19), r13 // *src = cccc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) l.addi r17, r17, -4 // Decrease n
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) l.sfgeui r17, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) l.bf 2b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) l.addi r19, r19, 4 // Increase src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) /* When n > 0, copy the remaining bytes, otherwise jump to exit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) l.sfeqi r17, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) l.bf 4f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) /* Byte copy loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) 3: l.addi r17, r17, -1 // Decrease n
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) l.sb 0(r19), r13 // *src = cccc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) l.sfnei r17, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) l.bf 3b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) l.addi r19, r19, 1 // Increase src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) 4: l.jr r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) l.ori r11, r3, 0