^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Shadow Call Stack support.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2019 Google LLC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/cpuhotplug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/kasan.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/scs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/vmstat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) static void __scs_account(void *s, int account)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) struct page *scs_page = vmalloc_to_page(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_KB,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) account * (SCS_SIZE / SZ_1K));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) /* Matches NR_CACHED_STACKS for VMAP_STACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #define NR_CACHED_SCS 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) static void *__scs_alloc(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) void *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) for (i = 0; i < NR_CACHED_SCS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) s = this_cpu_xchg(scs_cache[i], NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) if (s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) kasan_unpoison_vmalloc(s, SCS_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) memset(s, 0, SCS_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) return __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) GFP_SCS, PAGE_KERNEL, 0, node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) __builtin_return_address(0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) void *scs_alloc(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) void *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) s = __scs_alloc(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) *__scs_magic(s) = SCS_END_MAGIC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * Poison the allocation to catch unintentional accesses to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * the shadow stack when KASAN is enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) kasan_poison_vmalloc(s, SCS_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) __scs_account(s, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) void scs_free(void *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) __scs_account(s, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * We cannot sleep as this can be called in interrupt context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * so use this_cpu_cmpxchg to update the cache, and vfree_atomic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * to free the stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) for (i = 0; i < NR_CACHED_SCS; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) kasan_unpoison_vmalloc(s, SCS_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) vfree_atomic(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) static int scs_cleanup(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) void **cache = per_cpu_ptr(scs_cache, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) for (i = 0; i < NR_CACHED_SCS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) vfree(cache[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) cache[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) void __init scs_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) scs_cleanup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) int scs_prepare(struct task_struct *tsk, int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) void *s = scs_alloc(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) task_scs(tsk) = task_scs_sp(tsk) = s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) static void scs_check_usage(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) static unsigned long highest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) unsigned long *p, prev, curr = highest, used = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) if (!IS_ENABLED(CONFIG_DEBUG_STACK_USAGE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) if (!READ_ONCE_NOCHECK(*p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) used += sizeof(*p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) while (used > curr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) prev = cmpxchg_relaxed(&highest, curr, used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) if (prev == curr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) tsk->comm, task_pid_nr(tsk), used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) curr = prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) void scs_release(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) void *s = task_scs(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) WARN(task_scs_end_corrupted(tsk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) "corrupted shadow stack detected when freeing task\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) scs_check_usage(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) scs_free(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }