^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* NG4memset.S: Niagara-4 optimized memset/bzero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <asm/asi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) .register %g2, #scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) .register %g3, #scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) .align 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) .globl NG4memset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) NG4memset:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) andcc %o1, 0xff, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) be,pt %icc, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) mov %o2, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) sllx %o4, 8, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) or %g1, %o4, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) sllx %o2, 16, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) or %g1, %o2, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) sllx %o2, 32, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) ba,pt %icc, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) or %g1, %o2, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) .size NG4memset,.-NG4memset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) .align 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) .globl NG4bzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) NG4bzero:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) clr %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) 1: cmp %o1, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) ble %icc, .Ltiny
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) mov %o0, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) sub %g0, %o0, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) and %g1, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) brz,pt %g1, .Laligned8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) sub %o1, %g1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) 1: stb %o4, [%o0 + 0x00]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) subcc %g1, 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) add %o0, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) .Laligned8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) cmp %o1, 64 + (64 - 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) ble .Lmedium
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) sub %g0, %o0, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) andcc %g1, (64 - 1), %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) brz,pn %g1, .Laligned64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) sub %o1, %g1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) 1: stx %o4, [%o0 + 0x00]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) subcc %g1, 8, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) add %o0, 0x8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) .Laligned64:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) andn %o1, 64 - 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) sub %o1, %g1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) brnz,pn %o4, .Lnon_bzero_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) mov 0x20, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) 1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) subcc %g1, 0x40, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) add %o0, 0x40, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) .Lpostloop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) cmp %o1, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) bl,pn %icc, .Ltiny
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) membar #StoreStore|#StoreLoad
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) .Lmedium:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) andn %o1, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) sub %o1, %g1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) 1: stx %o4, [%o0 + 0x00]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) subcc %g1, 0x8, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) add %o0, 0x08, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) andcc %o1, 0x4, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) be,pt %icc, .Ltiny
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) sub %o1, %g1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) stw %o4, [%o0 + 0x00]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) add %o0, 0x4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) .Ltiny:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) cmp %o1, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) be,pn %icc, .Lexit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) 1: subcc %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) stb %o4, [%o0 + 0x00]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) add %o0, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) .Lexit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) mov %o3, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) .Lnon_bzero_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) mov 0x08, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) mov 0x28, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) 1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) subcc %g1, 0x40, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) add %o0, 0x10, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) add %o0, 0x30, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) ba,a,pt %icc, .Lpostloop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) .size NG4bzero,.-NG4bzero