^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 1999-2002 Hewlett-Packard Co
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Stephane Eranian <eranian@hpl.hp.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * David Mosberger-Tang <davidm@hpl.hp.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * 1/06/01 davidm Tuned for Itanium.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * 2/12/02 kchen Tuned for both Itanium and McKinley
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * 3/08/02 davidm Some more tweaking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <asm/asmmacro.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <asm/page.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #ifdef CONFIG_ITANIUM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) # define L3_LINE_SIZE 64 // Itanium L3 line size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) # define PREFETCH_LINES 9 // magic number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) # define L3_LINE_SIZE 128 // McKinley L3 line size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) # define PREFETCH_LINES 12 // magic number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define saved_lc r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #define dst_fetch r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #define dst1 r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #define dst2 r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define dst3 r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #define dst4 r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define dst_last r31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) GLOBAL_ENTRY(clear_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) .prologue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) .regstk 1,0,0,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) .save ar.lc, saved_lc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) mov saved_lc = ar.lc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) .body
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) mov ar.lc = (PREFETCH_LINES - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) mov dst_fetch = in0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) adds dst1 = 16, in0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) adds dst2 = 32, in0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) ;;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) .fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) adds dst3 = 48, in0 // executing this multiple times is harmless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) br.cloop.sptk.few .fetch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) ;;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) mov ar.lc = r16 // one L3 line per iteration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) adds dst4 = 64, in0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) ;;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #ifdef CONFIG_ITANIUM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) // Optimized for Itanium
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) 1: stf.spill.nta [dst1] = f0, 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) stf.spill.nta [dst2] = f0, 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) cmp.lt p8,p0=dst_fetch, dst_last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) ;;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) // Optimized for McKinley
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) 1: stf.spill.nta [dst1] = f0, 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) stf.spill.nta [dst2] = f0, 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) stf.spill.nta [dst3] = f0, 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) stf.spill.nta [dst4] = f0, 128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) cmp.lt p8,p0=dst_fetch, dst_last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) ;;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) stf.spill.nta [dst1] = f0, 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) stf.spill.nta [dst2] = f0, 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) stf.spill.nta [dst3] = f0, 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) (p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) br.cloop.sptk.few 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) ;;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) mov ar.lc = saved_lc // restore lc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) br.ret.sptk.many rp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) END(clear_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) EXPORT_SYMBOL(clear_page)