^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) // Copyright(c) 2018 Intel Corporation. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/mmzone.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/moduleparam.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "shuffle.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) DEFINE_STATIC_KEY_FALSE(page_alloc_shuffle_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) static bool shuffle_param;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) static int shuffle_show(char *buffer, const struct kernel_param *kp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) return sprintf(buffer, "%c\n", shuffle_param ? 'Y' : 'N');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) static __meminit int shuffle_store(const char *val,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) const struct kernel_param *kp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) int rc = param_set_bool(val, kp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) if (rc < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) if (shuffle_param)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) static_branch_enable(&page_alloc_shuffle_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) module_param_call(shuffle, shuffle_store, shuffle_show, &shuffle_param, 0400);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * For two pages to be swapped in the shuffle, they must be free (on a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * 'free_area' lru), have the same order, and have the same migratetype.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) static struct page * __meminit shuffle_valid_page(struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) unsigned long pfn, int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) struct page *page = pfn_to_online_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * Given we're dealing with randomly selected pfns in a zone we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * need to ask questions like...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) /* ... is the page managed by the buddy? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) /* ... is the page assigned to the same zone? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) if (page_zone(page) != zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) /* ...is the page free and currently on a free_area list? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) if (!PageBuddy(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * ...is the page on the same list as the page we will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * shuffle it with?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) if (buddy_order(page) != order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * Fisher-Yates shuffle the freelist which prescribes iterating through an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * array, pfns in this case, and randomly swapping each entry with another in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * the span, end_pfn - start_pfn.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * To keep the implementation simple it does not attempt to correct for sources
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * of bias in the distribution, like modulo bias or pseudo-random number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * generator bias. I.e. the expectation is that this shuffling raises the bar
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * for attacks that exploit the predictability of page allocations, but need not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * be a perfect shuffle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #define SHUFFLE_RETRY 10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) void __meminit __shuffle_zone(struct zone *z)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) unsigned long i, flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) unsigned long start_pfn = z->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) unsigned long end_pfn = zone_end_pfn(z);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) const int order = SHUFFLE_ORDER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) const int order_pages = 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) spin_lock_irqsave(&z->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) start_pfn = ALIGN(start_pfn, order_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) for (i = start_pfn; i < end_pfn; i += order_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) unsigned long j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) int migratetype, retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) struct page *page_i, *page_j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * We expect page_i, in the sub-range of a zone being added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * (@start_pfn to @end_pfn), to more likely be valid compared to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * page_j randomly selected in the span @zone_start_pfn to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * @spanned_pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) page_i = shuffle_valid_page(z, i, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) if (!page_i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) for (retry = 0; retry < SHUFFLE_RETRY; retry++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * Pick a random order aligned page in the zone span as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * a swap target. If the selected pfn is a hole, retry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * up to SHUFFLE_RETRY attempts find a random valid pfn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * in the zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) j = z->zone_start_pfn +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) ALIGN_DOWN(get_random_long() % z->spanned_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) order_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) page_j = shuffle_valid_page(z, j, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) if (page_j && page_j != page_i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) if (retry >= SHUFFLE_RETRY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) pr_debug("%s: failed to swap %#lx\n", __func__, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * Each migratetype corresponds to its own list, make sure the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * types match otherwise we're moving pages to lists where they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * do not belong.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) migratetype = get_pageblock_migratetype(page_i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) if (get_pageblock_migratetype(page_j) != migratetype) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) pr_debug("%s: migratetype mismatch %#lx\n", __func__, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) list_swap(&page_i->lru, &page_j->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) pr_debug("%s: swap: %#lx -> %#lx\n", __func__, i, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) /* take it easy on the zone lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) if ((i % (100 * order_pages)) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) spin_unlock_irqrestore(&z->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) spin_lock_irqsave(&z->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) spin_unlock_irqrestore(&z->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * shuffle_free_memory - reduce the predictability of the page allocator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * @pgdat: node page data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) void __meminit __shuffle_free_memory(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) struct zone *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) shuffle_zone(z);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) bool shuffle_pick_tail(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) static u64 rand;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) static u8 rand_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) bool ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * The lack of locking is deliberate. If 2 threads race to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * update the rand state it just adds to the entropy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) if (rand_bits == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) rand_bits = 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) rand = get_random_u64();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) ret = rand & 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) rand_bits--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) rand >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) }