^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * linux/mm/compaction.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Memory compaction for the reduction of external fragmentation. Note that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * this heavily depends upon page migration to do all the real heavy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * lifting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/compaction.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/mm_inline.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/sysctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/sysfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/page-isolation.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/kasan.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/freezer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/page_owner.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/psi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #ifdef CONFIG_COMPACTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) static inline void count_compact_event(enum vm_event_item item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) count_vm_event(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static inline void count_compact_events(enum vm_event_item item, long delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) count_vm_events(item, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #define count_compact_event(item) do { } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #define count_compact_events(item, delta) do { } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #if defined CONFIG_COMPACTION || defined CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #include <trace/events/compaction.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #define block_start_pfn(pfn, order) round_down(pfn, 1UL << (order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #define block_end_pfn(pfn, order) ALIGN((pfn) + 1, 1UL << (order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #define pageblock_start_pfn(pfn) block_start_pfn(pfn, pageblock_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #define pageblock_end_pfn(pfn) block_end_pfn(pfn, pageblock_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * Fragmentation score check interval for proactive compaction purposes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * Page order with-respect-to which proactive compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * calculates external fragmentation, which is used as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * the "fragmentation score" of a node/zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #if defined CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) #define COMPACTION_HPAGE_ORDER HPAGE_PMD_ORDER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #elif defined CONFIG_HUGETLBFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #define COMPACTION_HPAGE_ORDER HUGETLB_PAGE_ORDER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #define COMPACTION_HPAGE_ORDER (PMD_SHIFT - PAGE_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) static unsigned long release_freepages(struct list_head *freelist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) struct page *page, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) unsigned long high_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) list_for_each_entry_safe(page, next, freelist, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) unsigned long pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) __free_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) if (pfn > high_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) high_pfn = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) return high_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) static void split_map_pages(struct list_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) unsigned int i, order, nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) struct page *page, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) LIST_HEAD(tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) list_for_each_entry_safe(page, next, list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) order = page_private(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) nr_pages = 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) post_alloc_hook(page, order, __GFP_MOVABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) if (order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) split_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) list_add(&page->lru, &tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) page++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) list_splice(&tmp_list, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) #ifdef CONFIG_COMPACTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) int PageMovable(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) if (!__PageMovable(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) EXPORT_SYMBOL(PageMovable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) void __SetPageMovable(struct page *page, struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) EXPORT_SYMBOL(__SetPageMovable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) void __ClearPageMovable(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) VM_BUG_ON_PAGE(!PageMovable(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * flag so that VM can catch up released page by driver after isolation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * With it, VM migration doesn't try to put it back.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) page->mapping = (void *)((unsigned long)page->mapping &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) PAGE_MAPPING_MOVABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) EXPORT_SYMBOL(__ClearPageMovable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) /* Do not skip compaction more than 64 times */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) #define COMPACT_MAX_DEFER_SHIFT 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * Compaction is deferred when compaction fails to result in a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * allocation success. 1 << compact_defer_shift, compactions are skipped up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) void defer_compaction(struct zone *zone, int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) zone->compact_considered = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) zone->compact_defer_shift++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) if (order < zone->compact_order_failed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) zone->compact_order_failed = order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) trace_mm_compaction_defer_compaction(zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) /* Returns true if compaction should be skipped this time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) bool compaction_deferred(struct zone *zone, int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) unsigned long defer_limit = 1UL << zone->compact_defer_shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) if (order < zone->compact_order_failed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) /* Avoid possible overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) if (++zone->compact_considered >= defer_limit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) zone->compact_considered = defer_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) trace_mm_compaction_deferred(zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * Update defer tracking counters after successful compaction of given order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * which means an allocation either succeeded (alloc_success == true) or is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * expected to succeed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) void compaction_defer_reset(struct zone *zone, int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) bool alloc_success)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) if (alloc_success) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) zone->compact_considered = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) zone->compact_defer_shift = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) if (order >= zone->compact_order_failed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) zone->compact_order_failed = order + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) trace_mm_compaction_defer_reset(zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) /* Returns true if restarting compaction after many failures */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) bool compaction_restarting(struct zone *zone, int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) if (order < zone->compact_order_failed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) zone->compact_considered >= 1UL << zone->compact_defer_shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) /* Returns true if the pageblock should be scanned for pages to isolate. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) static inline bool isolation_suitable(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) if (cc->ignore_skip_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) return !get_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) static void reset_cached_positions(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) zone->compact_cached_free_pfn =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) pageblock_start_pfn(zone_end_pfn(zone) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) * Compound pages of >= pageblock_order should consistenly be skipped until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * released. It is always pointless to compact pages of such order (if they are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * migratable), and the pageblocks they occupy cannot contain any free pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) static bool pageblock_skip_persistent(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) if (!PageCompound(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) page = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) if (compound_order(page) >= pageblock_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) static bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) bool check_target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) struct page *page = pfn_to_online_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) struct page *block_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) struct page *end_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) unsigned long block_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) if (zone != page_zone(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) if (pageblock_skip_persistent(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * If skip is already cleared do no further checking once the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * restart points have been set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) if (check_source && check_target && !get_pageblock_skip(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * If clearing skip for the target scanner, do not select a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * non-movable pageblock as the starting point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) if (!check_source && check_target &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) /* Ensure the start of the pageblock or zone is online and valid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) block_pfn = pageblock_start_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) block_pfn = max(block_pfn, zone->zone_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) block_page = pfn_to_online_page(block_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) if (block_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) page = block_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) pfn = block_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) /* Ensure the end of the pageblock or zone is online and valid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) block_pfn = pageblock_end_pfn(pfn) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) end_page = pfn_to_online_page(block_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) if (!end_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * Only clear the hint if a sample indicates there is either a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) * free page or an LRU page in the block. One or other condition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) * is necessary for the block to be a migration source/target.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) if (pfn_valid_within(pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) if (check_source && PageLRU(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) clear_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) if (check_target && PageBuddy(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) clear_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) page += (1 << PAGE_ALLOC_COSTLY_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) } while (page <= end_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) * This function is called to clear all cached information on pageblocks that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) * should be skipped for page isolation when the migrate and free page scanner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) * meet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) static void __reset_isolation_suitable(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) unsigned long migrate_pfn = zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) unsigned long free_pfn = zone_end_pfn(zone) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) unsigned long reset_migrate = free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) unsigned long reset_free = migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) bool source_set = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) bool free_set = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) if (!zone->compact_blockskip_flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) zone->compact_blockskip_flush = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) * Walk the zone and update pageblock skip information. Source looks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) * for PageLRU while target looks for PageBuddy. When the scanner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) * is found, both PageBuddy and PageLRU are checked as the pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) * is suitable as both source and target.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) for (; migrate_pfn < free_pfn; migrate_pfn += pageblock_nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) free_pfn -= pageblock_nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) /* Update the migrate PFN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) if (__reset_isolation_pfn(zone, migrate_pfn, true, source_set) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) migrate_pfn < reset_migrate) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) source_set = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) reset_migrate = migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) zone->compact_init_migrate_pfn = reset_migrate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) zone->compact_cached_migrate_pfn[0] = reset_migrate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) zone->compact_cached_migrate_pfn[1] = reset_migrate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) /* Update the free PFN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) if (__reset_isolation_pfn(zone, free_pfn, free_set, true) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) free_pfn > reset_free) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) free_set = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) reset_free = free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) zone->compact_init_free_pfn = reset_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) zone->compact_cached_free_pfn = reset_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) /* Leave no distance if no suitable block was reset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) if (reset_migrate >= reset_free) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) zone->compact_cached_migrate_pfn[0] = migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) zone->compact_cached_migrate_pfn[1] = migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) zone->compact_cached_free_pfn = free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) void reset_isolation_suitable(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) int zoneid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) struct zone *zone = &pgdat->node_zones[zoneid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) if (!populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) /* Only flush if a full compaction finished recently */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) if (zone->compact_blockskip_flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) __reset_isolation_suitable(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) * Sets the pageblock skip bit if it was clear. Note that this is a hint as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) * locks are not required for read/writers. Returns true if it was already set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) static bool test_and_set_skip(struct compact_control *cc, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) bool skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) /* Do no update if skip hint is being ignored */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) if (cc->ignore_skip_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) if (!IS_ALIGNED(pfn, pageblock_nr_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) skip = get_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) if (!skip && !cc->no_set_skip_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) set_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) return skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) struct zone *zone = cc->zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) pfn = pageblock_end_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) /* Set for isolation rather than compaction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) if (cc->no_set_skip_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) if (pfn > zone->compact_cached_migrate_pfn[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) zone->compact_cached_migrate_pfn[0] = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) if (cc->mode != MIGRATE_ASYNC &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) pfn > zone->compact_cached_migrate_pfn[1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) zone->compact_cached_migrate_pfn[1] = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) * If no pages were isolated then mark this pageblock to be skipped in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) * future. The information is later cleared by __reset_isolation_suitable().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) static void update_pageblock_skip(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) struct page *page, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) struct zone *zone = cc->zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) if (cc->no_set_skip_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) set_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) /* Update where async and sync compaction should restart */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) if (pfn < zone->compact_cached_free_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) zone->compact_cached_free_pfn = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) static inline bool isolation_suitable(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) static inline bool pageblock_skip_persistent(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) static inline void update_pageblock_skip(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) struct page *page, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) static bool test_and_set_skip(struct compact_control *cc, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) #endif /* CONFIG_COMPACTION */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) * Compaction requires the taking of some coarse locks that are potentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) * very heavily contended. For async compaction, trylock and record if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) * lock is contended. The lock will still be acquired but compaction will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) * abort when the current block is finished regardless of success rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) * Sync compaction acquires the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) * Always returns true which makes it easier to track lock state in callers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) static bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) __acquires(lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) /* Track if the lock is contended in async mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) if (cc->mode == MIGRATE_ASYNC && !cc->contended) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) if (spin_trylock_irqsave(lock, *flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) cc->contended = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) spin_lock_irqsave(lock, *flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) * Compaction requires the taking of some coarse locks that are potentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * very heavily contended. The lock should be periodically unlocked to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * having disabled IRQs for a long time, even when there is nobody waiting on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * the lock. It might also be that allowing the IRQs will result in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) * need_resched() becoming true. If scheduling is needed, async compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * aborts. Sync compaction schedules.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) * Either compaction type will also abort if a fatal signal is pending.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) * In either case if the lock was locked, it is dropped and not regained.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) * Returns true if compaction should abort due to fatal signal pending, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) * async compaction due to need_resched()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * Returns false when compaction can continue (sync compaction might have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * scheduled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) static bool compact_unlock_should_abort(spinlock_t *lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) unsigned long flags, bool *locked, struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) if (*locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) spin_unlock_irqrestore(lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) *locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) if (fatal_signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) cc->contended = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) * Isolate free pages onto a private freelist. If @strict is true, will abort
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) * returning 0 on any invalid PFNs or non-free pages inside of the pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) * (even though it may still end up isolating some pages).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) static unsigned long isolate_freepages_block(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) unsigned long *start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) unsigned long end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) struct list_head *freelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) unsigned int stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) bool strict)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) int nr_scanned = 0, total_isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) struct page *cursor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) unsigned long flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) bool locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) unsigned long blockpfn = *start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) /* Strict mode is for isolation, speed is secondary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) if (strict)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) stride = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) cursor = pfn_to_page(blockpfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) /* Isolate free pages. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) for (; blockpfn < end_pfn; blockpfn += stride, cursor += stride) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) int isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) struct page *page = cursor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) * Periodically drop the lock (if held) regardless of its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) * contention, to give chance to IRQs. Abort if fatal signal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) * pending or async compaction detects need_resched()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) if (!(blockpfn % SWAP_CLUSTER_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) && compact_unlock_should_abort(&cc->zone->lock, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) &locked, cc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) nr_scanned++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) if (!pfn_valid_within(blockpfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) * For compound pages such as THP and hugetlbfs, we can save
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) * potentially a lot of iterations if we skip them at once.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) * The check is racy, but we can consider only valid values
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) * and the only danger is skipping too much.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) if (PageCompound(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) const unsigned int order = compound_order(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) if (likely(order < MAX_ORDER)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) blockpfn += (1UL << order) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) cursor += (1UL << order) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) if (!PageBuddy(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) * If we already hold the lock, we can skip some rechecking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) * Note that if we hold the lock now, checked_pageblock was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) * already set in some previous iteration (or strict is true),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) * so it is correct to skip the suitable migration target
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) * recheck as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) if (!locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) locked = compact_lock_irqsave(&cc->zone->lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) &flags, cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) /* Recheck this is a buddy page under lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) if (!PageBuddy(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) /* Found a free page, will break it into order-0 pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) order = buddy_order(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) isolated = __isolate_free_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) if (!isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) set_page_private(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) total_isolated += isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) cc->nr_freepages += isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) list_add_tail(&page->lru, freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) blockpfn += isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) /* Advance to the end of split page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) blockpfn += isolated - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) cursor += isolated - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) isolate_fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) if (strict)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) spin_unlock_irqrestore(&cc->zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) * There is a tiny chance that we have read bogus compound_order(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) * so be careful to not go outside of the pageblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) if (unlikely(blockpfn > end_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) blockpfn = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) nr_scanned, total_isolated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) /* Record how far we have got within the block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) *start_pfn = blockpfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) * If strict isolation is requested by CMA then check that all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) * pages requested were isolated. If there were any failures, 0 is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) * returned and CMA will fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) if (strict && blockpfn < end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) total_isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) cc->total_free_scanned += nr_scanned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) if (total_isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) count_compact_events(COMPACTISOLATED, total_isolated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) return total_isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) * isolate_freepages_range() - isolate free pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) * @cc: Compaction control structure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) * @start_pfn: The first PFN to start isolating.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) * @end_pfn: The one-past-last PFN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) * Non-free pages, invalid PFNs, or zone boundaries within the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) * [start_pfn, end_pfn) range are considered errors, cause function to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) * undo its actions and return zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * Otherwise, function returns one-past-the-last PFN of isolated page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * (which may be greater then end_pfn if end fell in a middle of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) * a free page).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) isolate_freepages_range(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) unsigned long start_pfn, unsigned long end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) LIST_HEAD(freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) pfn = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) block_start_pfn = pageblock_start_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) if (block_start_pfn < cc->zone->zone_start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) block_start_pfn = cc->zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) block_end_pfn = pageblock_end_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) for (; pfn < end_pfn; pfn += isolated,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) block_start_pfn = block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) block_end_pfn += pageblock_nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) /* Protect pfn from changing by isolate_freepages_block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) unsigned long isolate_start_pfn = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) block_end_pfn = min(block_end_pfn, end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) * pfn could pass the block_end_pfn if isolated freepage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) * is more than pageblock order. In this case, we adjust
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) * scanning range to right one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) if (pfn >= block_end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) block_start_pfn = pageblock_start_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) block_end_pfn = pageblock_end_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) block_end_pfn = min(block_end_pfn, end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) if (!pageblock_pfn_to_page(block_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) block_end_pfn, cc->zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) isolated = isolate_freepages_block(cc, &isolate_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) block_end_pfn, &freelist, 0, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) * In strict mode, isolate_freepages_block() returns 0 if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) * there are any holes in the block (ie. invalid PFNs or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) * non-free pages).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) if (!isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) * If we managed to isolate pages, it is always (1 << n) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) * pageblock_nr_pages for some non-negative n. (Max order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) * page may span two pageblocks).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) /* __isolate_free_page() does not map the pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) split_map_pages(&freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) if (pfn < end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) /* Loop terminated early, cleanup. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) release_freepages(&freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) /* We don't use freelists for anything. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) #ifdef CONFIG_COMPACTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) unsigned long isolate_and_split_free_page(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) struct list_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) unsigned long isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) if (!PageBuddy(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) order = buddy_order(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) isolated = __isolate_free_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) if (!isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) set_page_private(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) list_add(&page->lru, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) split_map_pages(list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) return isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) EXPORT_SYMBOL_GPL(isolate_and_split_free_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) /* Similar to reclaim, but different enough that they don't share logic */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) static bool too_many_isolated(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) unsigned long active, inactive, isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) inactive = node_page_state(pgdat, NR_INACTIVE_FILE) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) node_page_state(pgdat, NR_INACTIVE_ANON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) active = node_page_state(pgdat, NR_ACTIVE_FILE) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) node_page_state(pgdat, NR_ACTIVE_ANON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) isolated = node_page_state(pgdat, NR_ISOLATED_FILE) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) node_page_state(pgdat, NR_ISOLATED_ANON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) return isolated > (inactive + active) / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) * isolate_migratepages_block() - isolate all migrate-able pages within
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) * a single pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) * @cc: Compaction control structure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) * @low_pfn: The first PFN to isolate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) * @end_pfn: The one-past-the-last PFN to isolate, within same pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) * @isolate_mode: Isolation mode to be used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) * Isolate all pages that can be migrated from the range specified by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) * [low_pfn, end_pfn). The range is expected to be within same pageblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) * Returns zero if there is a fatal signal pending, otherwise PFN of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) * first page that was not scanned (which may be both less, equal to or more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) * than end_pfn).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) * The pages are isolated on cc->migratepages list (not required to be empty),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) * and cc->nr_migratepages is updated accordingly. The cc->migrate_pfn field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) * is neither read nor updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) static unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) unsigned long end_pfn, isolate_mode_t isolate_mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) pg_data_t *pgdat = cc->zone->zone_pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) unsigned long nr_scanned = 0, nr_isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) struct lruvec *lruvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) unsigned long flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) bool locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) struct page *page = NULL, *valid_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) unsigned long start_pfn = low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) bool skip_on_failure = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) unsigned long next_skip_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) bool skip_updated = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) * Ensure that there are not too many pages isolated from the LRU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) * list by either parallel reclaimers or compaction. If there are,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) * delay for some time until fewer pages are isolated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) while (unlikely(too_many_isolated(pgdat))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) /* stop isolation if there are still pages not migrated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) if (cc->nr_migratepages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) /* async migration should just abort */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) if (cc->mode == MIGRATE_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) congestion_wait(BLK_RW_ASYNC, HZ/10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) if (fatal_signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) skip_on_failure = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) next_skip_pfn = block_end_pfn(low_pfn, cc->order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) /* Time to isolate some pages for migration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) for (; low_pfn < end_pfn; low_pfn++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) if (skip_on_failure && low_pfn >= next_skip_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) * We have isolated all migration candidates in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) * previous order-aligned block, and did not skip it due
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) * to failure. We should migrate the pages now and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) * hopefully succeed compaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) if (nr_isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) * We failed to isolate in the previous order-aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) * block. Set the new boundary to the end of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) * current block. Note we can't simply increase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) * next_skip_pfn by 1 << order, as low_pfn might have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) * been incremented by a higher number due to skipping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) * a compound or a high-order buddy page in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) * previous loop iteration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) next_skip_pfn = block_end_pfn(low_pfn, cc->order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) * Periodically drop the lock (if held) regardless of its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) * contention, to give chance to IRQs. Abort completely if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) * a fatal signal is pending.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) if (!(low_pfn % SWAP_CLUSTER_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) && compact_unlock_should_abort(&pgdat->lru_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) flags, &locked, cc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) low_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) goto fatal_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) if (!pfn_valid_within(low_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) nr_scanned++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) page = pfn_to_page(low_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) * Check if the pageblock has already been marked skipped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) * Only the aligned PFN is checked as the caller isolates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) * COMPACT_CLUSTER_MAX at a time so the second call must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) * not falsely conclude that the block should be skipped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) if (!cc->ignore_skip_hint && get_pageblock_skip(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) low_pfn = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) goto isolate_abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) valid_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) * Skip if free. We read page order here without zone lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) * which is generally unsafe, but the race window is small and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) * the worst thing that can happen is that we skip some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) * potential isolation targets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) if (PageBuddy(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) unsigned long freepage_order = buddy_order_unsafe(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) * Without lock, we cannot be sure that what we got is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) * a valid page order. Consider only values in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) * valid order range to prevent low_pfn overflow.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) if (freepage_order > 0 && freepage_order < MAX_ORDER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) low_pfn += (1UL << freepage_order) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) * Regardless of being on LRU, compound pages such as THP and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) * hugetlbfs are not to be compacted unless we are attempting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) * an allocation much larger than the huge page size (eg CMA).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) * We can potentially save a lot of iterations if we skip them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) * at once. The check is racy, but we can consider only valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) * values and the only danger is skipping too much.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) if (PageCompound(page) && !cc->alloc_contig) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) const unsigned int order = compound_order(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) if (likely(order < MAX_ORDER))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) low_pfn += (1UL << order) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) * Check may be lockless but that's ok as we recheck later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) * It's possible to migrate LRU and non-lru movable pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) * Skip any other type of page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) if (!PageLRU(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) * __PageMovable can return false positive so we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) * to verify it under page_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) if (unlikely(__PageMovable(page)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) !PageIsolated(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) if (locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) spin_unlock_irqrestore(&pgdat->lru_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) if (!isolate_movable_page(page, isolate_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) goto isolate_success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) * Migration will fail if an anonymous page is pinned in memory,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) * so avoid taking lru_lock and isolating it unnecessarily in an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) * admittedly racy check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) if (!page_mapping(page) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) page_count(page) > page_mapcount(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) * Only allow to migrate anonymous pages in GFP_NOFS context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) * because those do not depend on fs locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) /* If we already hold the lock, we can skip some rechecking */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) if (!locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) locked = compact_lock_irqsave(&pgdat->lru_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) &flags, cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) /* Try get exclusive access under lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) if (!skip_updated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) skip_updated = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) if (test_and_set_skip(cc, page, low_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) goto isolate_abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) /* Recheck PageLRU and PageCompound under lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) if (!PageLRU(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) * Page become compound since the non-locked check,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) * and it's on LRU. It can only be a THP so the order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) * is safe to read and it's 0 for tail pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) if (unlikely(PageCompound(page) && !cc->alloc_contig)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) low_pfn += compound_nr(page) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) lruvec = mem_cgroup_page_lruvec(page, pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) /* Try isolate the page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) if (__isolate_lru_page(page, isolate_mode) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) /* The whole page is taken off the LRU; skip the tail pages. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) if (PageCompound(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) low_pfn += compound_nr(page) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) /* Successfully isolated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) del_page_from_lru_list(page, lruvec, page_lru(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) mod_node_page_state(page_pgdat(page),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) NR_ISOLATED_ANON + page_is_file_lru(page),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) thp_nr_pages(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) isolate_success:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) list_add(&page->lru, &cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) cc->nr_migratepages += compound_nr(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) nr_isolated += compound_nr(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) * Avoid isolating too much unless this block is being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) * rescanned (e.g. dirty/writeback pages, parallel allocation)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) * or a lock is contended. For contention, isolate quickly to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) * potentially remove one source of contention.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) !cc->rescan && !cc->contended) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) ++low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) isolate_fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) if (!skip_on_failure)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) * We have isolated some pages, but then failed. Release them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) * instead of migrating, as we cannot form the cc->order buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) * page anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) if (nr_isolated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) if (locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) spin_unlock_irqrestore(&pgdat->lru_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) putback_movable_pages(&cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) cc->nr_migratepages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) nr_isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) if (low_pfn < next_skip_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) low_pfn = next_skip_pfn - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) * The check near the loop beginning would have updated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) * next_skip_pfn too, but this is a bit simpler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) next_skip_pfn += 1UL << cc->order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) * The PageBuddy() check could have potentially brought us outside
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) * the range to be scanned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) if (unlikely(low_pfn > end_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) low_pfn = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) isolate_abort:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) spin_unlock_irqrestore(&pgdat->lru_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) * Updated the cached scanner pfn once the pageblock has been scanned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) * Pages will either be migrated in which case there is no point
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) * scanning in the near future or migration failed in which case the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) * failure reason may persist. The block is marked for skipping if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) * there were no pages isolated in the block or if the block is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) * rescanned twice in a row.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) if (low_pfn == end_pfn && (!nr_isolated || cc->rescan)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) if (valid_page && !skip_updated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) set_pageblock_skip(valid_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) update_cached_migrate(cc, low_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) nr_scanned, nr_isolated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) fatal_pending:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) cc->total_migrate_scanned += nr_scanned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) if (nr_isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) count_compact_events(COMPACTISOLATED, nr_isolated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) return low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) * isolate_migratepages_range() - isolate migrate-able pages in a PFN range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) * @cc: Compaction control structure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) * @start_pfn: The first PFN to start isolating.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) * @end_pfn: The one-past-last PFN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * Returns zero if isolation fails fatally due to e.g. pending signal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) * Otherwise, function returns one-past-the-last PFN of isolated page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) * (which may be greater than end_pfn if end fell in a middle of a THP page).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) unsigned long end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) unsigned long pfn, block_start_pfn, block_end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) /* Scan block by block. First and last block may be incomplete */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) pfn = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) block_start_pfn = pageblock_start_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) if (block_start_pfn < cc->zone->zone_start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) block_start_pfn = cc->zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) block_end_pfn = pageblock_end_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) for (; pfn < end_pfn; pfn = block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) block_start_pfn = block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) block_end_pfn += pageblock_nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) block_end_pfn = min(block_end_pfn, end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) if (!pageblock_pfn_to_page(block_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) block_end_pfn, cc->zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) ISOLATE_UNEVICTABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) if (!pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) #endif /* CONFIG_COMPACTION || CONFIG_CMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) #ifdef CONFIG_COMPACTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) static bool suitable_migration_source(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) int block_mt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) if (pageblock_skip_persistent(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) block_mt = get_pageblock_migratetype(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) if (cc->migratetype == MIGRATE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) return is_migrate_movable(block_mt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) return block_mt == cc->migratetype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) /* Returns true if the page is within a block suitable for migration to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) static bool suitable_migration_target(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) /* If the page is a large free page, then disallow migration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) if (PageBuddy(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) * We are checking page_order without zone->lock taken. But
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) * the only small danger is that we skip a potentially suitable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) * pageblock, so it's not worth to check order for valid range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) if (buddy_order_unsafe(page) >= pageblock_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) if (cc->ignore_block_suitable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) if (is_migrate_movable(get_pageblock_migratetype(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) /* Otherwise skip the block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) static inline unsigned int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) freelist_scan_limit(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) unsigned short shift = BITS_PER_LONG - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) return (COMPACT_CLUSTER_MAX >> min(shift, cc->fast_search_fail)) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) * Test whether the free scanner has reached the same or lower pageblock than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) * the migration scanner, and compaction should thus terminate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) static inline bool compact_scanners_met(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) return (cc->free_pfn >> pageblock_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) <= (cc->migrate_pfn >> pageblock_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) * Used when scanning for a suitable migration target which scans freelists
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) * in reverse. Reorders the list such as the unscanned pages are scanned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) * first on the next iteration of the free scanner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) move_freelist_head(struct list_head *freelist, struct page *freepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) LIST_HEAD(sublist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) if (!list_is_last(freelist, &freepage->lru)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) list_cut_before(&sublist, freelist, &freepage->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) if (!list_empty(&sublist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) list_splice_tail(&sublist, freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) * Similar to move_freelist_head except used by the migration scanner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) * when scanning forward. It's possible for these list operations to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) * move against each other if they search the free list exactly in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) * lockstep.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) move_freelist_tail(struct list_head *freelist, struct page *freepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) LIST_HEAD(sublist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) if (!list_is_first(freelist, &freepage->lru)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) list_cut_position(&sublist, freelist, &freepage->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) if (!list_empty(&sublist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) list_splice_tail(&sublist, freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) unsigned long start_pfn, end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) /* Do not search around if there are enough pages already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) if (cc->nr_freepages >= cc->nr_migratepages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) /* Minimise scanning during async compaction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) if (cc->direct_compaction && cc->mode == MIGRATE_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) /* Pageblock boundaries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) start_pfn = max(pageblock_start_pfn(pfn), cc->zone->zone_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) page = pageblock_pfn_to_page(start_pfn, end_pfn, cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) /* Scan before */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) if (start_pfn != pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) if (cc->nr_freepages >= cc->nr_migratepages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) /* Scan after */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) start_pfn = pfn + nr_isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) if (start_pfn < end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) /* Skip this pageblock in the future as it's full or nearly full */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) if (cc->nr_freepages < cc->nr_migratepages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) set_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) /* Search orders in round-robin fashion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) static int next_search_order(struct compact_control *cc, int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) order--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) if (order < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) order = cc->order - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) /* Search wrapped around? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) if (order == cc->search_order) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) cc->search_order--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) if (cc->search_order < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) cc->search_order = cc->order - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) return order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) static unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) fast_isolate_freepages(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) unsigned int nr_scanned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) unsigned long low_pfn, min_pfn, highest = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) unsigned long nr_isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) unsigned long distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) bool scan_start = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) /* Full compaction passes in a negative order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) if (cc->order <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) return cc->free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) * If starting the scan, use a deeper search and use the highest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) * PFN found if a suitable one is not found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) if (cc->free_pfn >= cc->zone->compact_init_free_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) limit = pageblock_nr_pages >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) scan_start = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) * Preferred point is in the top quarter of the scan space but take
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) * a pfn from the top half if the search is problematic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) distance = (cc->free_pfn - cc->migrate_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) low_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) min_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) if (WARN_ON_ONCE(min_pfn > low_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) low_pfn = min_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) * Search starts from the last successful isolation order or the next
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) * order to search after a previous failure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) cc->search_order = min_t(unsigned int, cc->order - 1, cc->search_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) for (order = cc->search_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) !page && order >= 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) order = next_search_order(cc, order)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) struct free_area *area = &cc->zone->free_area[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) struct list_head *freelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) struct page *freepage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) unsigned int order_scanned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) unsigned long high_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) if (!area->nr_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) spin_lock_irqsave(&cc->zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) freelist = &area->free_list[MIGRATE_MOVABLE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) list_for_each_entry_reverse(freepage, freelist, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) unsigned long pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) order_scanned++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) nr_scanned++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) pfn = page_to_pfn(freepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) if (pfn >= highest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) highest = max(pageblock_start_pfn(pfn),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) cc->zone->zone_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) if (pfn >= low_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) cc->fast_search_fail = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) cc->search_order = order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) page = freepage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) if (pfn >= min_pfn && pfn > high_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) high_pfn = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) /* Shorten the scan if a candidate is found */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) limit >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) if (order_scanned >= limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) /* Use a minimum pfn if a preferred one was not found */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) if (!page && high_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) page = pfn_to_page(high_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) /* Update freepage for the list reorder below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) freepage = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) /* Reorder to so a future search skips recent pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) move_freelist_head(freelist, freepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) /* Isolate the page if available */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) if (__isolate_free_page(page, order)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) set_page_private(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) nr_isolated = 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) cc->nr_freepages += nr_isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) list_add_tail(&page->lru, &cc->freepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) count_compact_events(COMPACTISOLATED, nr_isolated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) /* If isolation fails, abort the search */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) order = cc->search_order + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) spin_unlock_irqrestore(&cc->zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) * Smaller scan on next order so the total scan ig related
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) * to freelist_scan_limit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) if (order_scanned >= limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) limit = min(1U, limit >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) cc->fast_search_fail++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) if (scan_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) * Use the highest PFN found above min. If one was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) * not found, be pessimistic for direct compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) * and use the min mark.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) if (highest) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) page = pfn_to_page(highest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) cc->free_pfn = highest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) if (cc->direct_compaction && pfn_valid(min_pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) page = pageblock_pfn_to_page(min_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) min(pageblock_end_pfn(min_pfn),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) zone_end_pfn(cc->zone)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) cc->free_pfn = min_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) if (highest && highest >= cc->zone->compact_cached_free_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) highest -= pageblock_nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) cc->zone->compact_cached_free_pfn = highest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) cc->total_free_scanned += nr_scanned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) return cc->free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) low_pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) fast_isolate_around(cc, low_pfn, nr_isolated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) return low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) * Based on information in the current compact_control, find blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) * suitable for isolating free pages from and then isolate them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) static void isolate_freepages(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) struct zone *zone = cc->zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) unsigned long block_start_pfn; /* start of current pageblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) unsigned long isolate_start_pfn; /* exact pfn we start at */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) unsigned long block_end_pfn; /* end of current pageblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) unsigned long low_pfn; /* lowest pfn scanner is able to scan */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) struct list_head *freelist = &cc->freepages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) unsigned int stride;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) /* Try a small search of the free lists for a candidate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) isolate_start_pfn = fast_isolate_freepages(cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) if (cc->nr_freepages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) goto splitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) * Initialise the free scanner. The starting point is where we last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) * successfully isolated from, zone-cached value, or the end of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) * zone when isolating for the first time. For looping we also need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) * this pfn aligned down to the pageblock boundary, because we do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) * block_start_pfn -= pageblock_nr_pages in the for loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) * For ending point, take care when isolating in last pageblock of a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) * zone which ends in the middle of a pageblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) * The low boundary is the end of the pageblock the migration scanner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) * is using.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) isolate_start_pfn = cc->free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) block_start_pfn = pageblock_start_pfn(isolate_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) zone_end_pfn(zone));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) low_pfn = pageblock_end_pfn(cc->migrate_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) stride = cc->mode == MIGRATE_ASYNC ? COMPACT_CLUSTER_MAX : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) * Isolate free pages until enough are available to migrate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) * pages on cc->migratepages. We stop searching if the migrate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) * and free page scanners meet or enough free pages are isolated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) for (; block_start_pfn >= low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) block_end_pfn = block_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) block_start_pfn -= pageblock_nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) isolate_start_pfn = block_start_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) unsigned long nr_isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) * This can iterate a massively long zone without finding any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) * suitable migration targets, so periodically check resched.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) /* Check the block is suitable for migration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) if (!suitable_migration_target(cc, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) /* If isolation recently failed, do not retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) if (!isolation_suitable(cc, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) /* Found a block suitable for isolating free pages from. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) nr_isolated = isolate_freepages_block(cc, &isolate_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) block_end_pfn, freelist, stride, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) /* Update the skip hint if the full pageblock was scanned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) if (isolate_start_pfn == block_end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) update_pageblock_skip(cc, page, block_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) /* Are enough freepages isolated? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) if (cc->nr_freepages >= cc->nr_migratepages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) if (isolate_start_pfn >= block_end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) * Restart at previous pageblock if more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) * freepages can be isolated next time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) isolate_start_pfn =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) block_start_pfn - pageblock_nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) } else if (isolate_start_pfn < block_end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) * If isolation failed early, do not continue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) * needlessly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) /* Adjust stride depending on isolation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) if (nr_isolated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) stride = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) stride = min_t(unsigned int, COMPACT_CLUSTER_MAX, stride << 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) * Record where the free scanner will restart next time. Either we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) * broke from the loop and set isolate_start_pfn based on the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) * call to isolate_freepages_block(), or we met the migration scanner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) * and the loop terminated due to isolate_start_pfn < low_pfn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) cc->free_pfn = isolate_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) splitmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) /* __isolate_free_page() does not map the pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) split_map_pages(freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) * This is a migrate-callback that "allocates" freepages by taking pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) * from the isolated freelists in the block we are migrating to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) static struct page *compaction_alloc(struct page *migratepage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) unsigned long data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) struct compact_control *cc = (struct compact_control *)data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) struct page *freepage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) if (list_empty(&cc->freepages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) isolate_freepages(cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) if (list_empty(&cc->freepages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) freepage = list_entry(cc->freepages.next, struct page, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) list_del(&freepage->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) cc->nr_freepages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) return freepage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) * This is a migrate-callback that "frees" freepages back to the isolated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) * freelist. All pages on the freelist are from the same zone, so there is no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) * special handling needed for NUMA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) static void compaction_free(struct page *page, unsigned long data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) struct compact_control *cc = (struct compact_control *)data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) list_add(&page->lru, &cc->freepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) cc->nr_freepages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) /* possible outcome of isolate_migratepages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) typedef enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) ISOLATE_ABORT, /* Abort compaction now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) ISOLATE_NONE, /* No pages isolated, continue scanning */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) ISOLATE_SUCCESS, /* Pages isolated, migrate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) } isolate_migrate_t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) * Allow userspace to control policy on scanning the unevictable LRU for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) * compactable pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) #ifdef CONFIG_PREEMPT_RT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) int sysctl_compact_unevictable_allowed __read_mostly = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) int sysctl_compact_unevictable_allowed __read_mostly = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) if (cc->fast_start_pfn == ULONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) if (!cc->fast_start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) cc->fast_start_pfn = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) cc->fast_start_pfn = min(cc->fast_start_pfn, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) static inline unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) reinit_migrate_pfn(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) if (!cc->fast_start_pfn || cc->fast_start_pfn == ULONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) return cc->migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) cc->migrate_pfn = cc->fast_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) cc->fast_start_pfn = ULONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) return cc->migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) * Briefly search the free lists for a migration source that already has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) * some free pages to reduce the number of pages that need migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) * before a pageblock is free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) static unsigned long fast_find_migrateblock(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) unsigned int limit = freelist_scan_limit(cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) unsigned int nr_scanned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) unsigned long distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) unsigned long pfn = cc->migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) unsigned long high_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) bool found_block = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) /* Skip hints are relied on to avoid repeats on the fast search */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) if (cc->ignore_skip_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) * If the migrate_pfn is not at the start of a zone or the start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) * of a pageblock then assume this is a continuation of a previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) * scan restarted due to COMPACT_CLUSTER_MAX.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) if (pfn != cc->zone->zone_start_pfn && pfn != pageblock_start_pfn(pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) * For smaller orders, just linearly scan as the number of pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) * to migrate should be relatively small and does not necessarily
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) * justify freeing up a large block for a small allocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) if (cc->order <= PAGE_ALLOC_COSTLY_ORDER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) * Only allow kcompactd and direct requests for movable pages to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) * quickly clear out a MOVABLE pageblock for allocation. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) * reduces the risk that a large movable pageblock is freed for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) * an unmovable/reclaimable small allocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) if (cc->direct_compaction && cc->migratetype != MIGRATE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) * When starting the migration scanner, pick any pageblock within the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) * first half of the search space. Otherwise try and pick a pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) * within the first eighth to reduce the chances that a migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) * target later becomes a source.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) distance = (cc->free_pfn - cc->migrate_pfn) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) if (cc->migrate_pfn != cc->zone->zone_start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) distance >>= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) for (order = cc->order - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) order >= PAGE_ALLOC_COSTLY_ORDER && !found_block && nr_scanned < limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) order--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) struct free_area *area = &cc->zone->free_area[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) struct list_head *freelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) struct page *freepage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) if (!area->nr_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) spin_lock_irqsave(&cc->zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) freelist = &area->free_list[MIGRATE_MOVABLE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) list_for_each_entry(freepage, freelist, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) unsigned long free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) if (nr_scanned++ >= limit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) move_freelist_tail(freelist, freepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) free_pfn = page_to_pfn(freepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) if (free_pfn < high_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) * Avoid if skipped recently. Ideally it would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) * move to the tail but even safe iteration of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) * the list assumes an entry is deleted, not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) * reordered.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) if (get_pageblock_skip(freepage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) /* Reorder to so a future search skips recent pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) move_freelist_tail(freelist, freepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) update_fast_start_pfn(cc, free_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) pfn = pageblock_start_pfn(free_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) cc->fast_search_fail = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) found_block = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) set_pageblock_skip(freepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) spin_unlock_irqrestore(&cc->zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) cc->total_migrate_scanned += nr_scanned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) * If fast scanning failed then use a cached entry for a page block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) * that had free pages as the basis for starting a linear scan.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) if (!found_block) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) cc->fast_search_fail++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) pfn = reinit_migrate_pfn(cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) * Isolate all pages that can be migrated from the first suitable block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) * starting at the block pointed to by the migrate scanner pfn within
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) * compact_control.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) unsigned long block_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) unsigned long block_end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) unsigned long low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) const isolate_mode_t isolate_mode =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) (cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) bool fast_find_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) * Start at where we last stopped, or beginning of the zone as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) * initialized by compact_zone(). The first failure will use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) * the lowest PFN as the starting point for linear scanning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) low_pfn = fast_find_migrateblock(cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) block_start_pfn = pageblock_start_pfn(low_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) if (block_start_pfn < cc->zone->zone_start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) block_start_pfn = cc->zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) * fast_find_migrateblock marks a pageblock skipped so to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) * the isolation_suitable check below, check whether the fast
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) * search was successful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) fast_find_block = low_pfn != cc->migrate_pfn && !cc->fast_search_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) /* Only scan within a pageblock boundary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) block_end_pfn = pageblock_end_pfn(low_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) * Iterate over whole pageblocks until we find the first suitable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) * Do not cross the free scanner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) for (; block_end_pfn <= cc->free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) fast_find_block = false,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) low_pfn = block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) block_start_pfn = block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) block_end_pfn += pageblock_nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) * This can potentially iterate a massively long zone with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) * many pageblocks unsuitable, so periodically check if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) * need to schedule.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) page = pageblock_pfn_to_page(block_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) block_end_pfn, cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) * If isolation recently failed, do not retry. Only check the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) * pageblock once. COMPACT_CLUSTER_MAX causes a pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) * to be visited multiple times. Assume skip was checked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) * before making it "skip" so other compaction instances do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) * not scan the same block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) if (IS_ALIGNED(low_pfn, pageblock_nr_pages) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) !fast_find_block && !isolation_suitable(cc, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) * For async compaction, also only scan in MOVABLE blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) * without huge pages. Async compaction is optimistic to see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) * if the minimum amount of work satisfies the allocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) * The cached PFN is updated as it's possible that all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) * remaining blocks between source and target are unsuitable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) * and the compaction scanners fail to meet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) if (!suitable_migration_source(cc, page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) update_cached_migrate(cc, block_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) /* Perform the isolation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) low_pfn = isolate_migratepages_block(cc, low_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) block_end_pfn, isolate_mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) if (!low_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) return ISOLATE_ABORT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) * Either we isolated something and proceed with migration. Or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) * we failed and compact_zone should decide if we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) * continue or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) /* Record where migration scanner will be restarted. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) cc->migrate_pfn = low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) * order == -1 is expected when compacting via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) * /proc/sys/vm/compact_memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) static inline bool is_via_compact_memory(int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) return order == -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) static bool kswapd_is_running(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) return pgdat->kswapd && (pgdat->kswapd->state == TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) * A zone's fragmentation score is the external fragmentation wrt to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) * COMPACTION_HPAGE_ORDER. It returns a value in the range [0, 100].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) static unsigned int fragmentation_score_zone(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) return extfrag_for_order(zone, COMPACTION_HPAGE_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) * A weighted zone's fragmentation score is the external fragmentation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) * wrt to the COMPACTION_HPAGE_ORDER scaled by the zone's size. It
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) * returns a value in the range [0, 100].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) * The scaling factor ensures that proactive compaction focuses on larger
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) * zones like ZONE_NORMAL, rather than smaller, specialized zones like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) * ZONE_DMA32. For smaller zones, the score value remains close to zero,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) * and thus never exceeds the high threshold for proactive compaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) static unsigned int fragmentation_score_zone_weighted(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) unsigned long score;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) score = zone->present_pages * fragmentation_score_zone(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) return div64_ul(score, zone->zone_pgdat->node_present_pages + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) * The per-node proactive (background) compaction process is started by its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) * corresponding kcompactd thread when the node's fragmentation score
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) * exceeds the high threshold. The compaction process remains active till
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) * the node's score falls below the low threshold, or one of the back-off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) * conditions is met.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) static unsigned int fragmentation_score_node(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) unsigned int score = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) int zoneid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) zone = &pgdat->node_zones[zoneid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) score += fragmentation_score_zone_weighted(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) return score;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) static unsigned int fragmentation_score_wmark(pg_data_t *pgdat, bool low)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) unsigned int wmark_low;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) * Cap the low watermak to avoid excessive compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) * activity in case a user sets the proactivess tunable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) * close to 100 (maximum).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) wmark_low = max(100U - sysctl_compaction_proactiveness, 5U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) return low ? wmark_low : min(wmark_low + 10, 100U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) static bool should_proactive_compact_node(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) int wmark_high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) if (!sysctl_compaction_proactiveness || kswapd_is_running(pgdat))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) wmark_high = fragmentation_score_wmark(pgdat, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) return fragmentation_score_node(pgdat) > wmark_high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) static enum compact_result __compact_finished(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) const int migratetype = cc->migratetype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) /* Compaction run completes if the migrate and free scanner meet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) if (compact_scanners_met(cc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) /* Let the next compaction start anew. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) reset_cached_positions(cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) * Mark that the PG_migrate_skip information should be cleared
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) * by kswapd when it goes to sleep. kcompactd does not set the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) * flag itself as the decision to be clear should be directly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) * based on an allocation request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) if (cc->direct_compaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) cc->zone->compact_blockskip_flush = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) if (cc->whole_zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) return COMPACT_COMPLETE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) return COMPACT_PARTIAL_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) if (cc->proactive_compaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) int score, wmark_low;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) pg_data_t *pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) pgdat = cc->zone->zone_pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) if (kswapd_is_running(pgdat))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) return COMPACT_PARTIAL_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) score = fragmentation_score_zone(cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) wmark_low = fragmentation_score_wmark(pgdat, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) if (score > wmark_low)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) ret = COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) ret = COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) if (is_via_compact_memory(cc->order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) return COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) * Always finish scanning a pageblock to reduce the possibility of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) * fallbacks in the future. This is particularly important when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) * migration source is unmovable/reclaimable but it's not worth
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) * special casing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) if (!IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) return COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) /* Direct compactor: Is a suitable page free? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) ret = COMPACT_NO_SUITABLE_PAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) for (order = cc->order; order < MAX_ORDER; order++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) struct free_area *area = &cc->zone->free_area[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) bool can_steal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) /* Job done if page is free of the right migratetype */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) if (!free_area_empty(area, migratetype))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) return COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) /* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) if (migratetype == MIGRATE_MOVABLE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) !free_area_empty(area, MIGRATE_CMA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) return COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) * Job done if allocation would steal freepages from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) * other migratetype buddy lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) if (find_suitable_fallback(area, order, migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) true, &can_steal) != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) /* movable pages are OK in any pageblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) if (migratetype == MIGRATE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) return COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) * We are stealing for a non-movable allocation. Make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) * sure we finish compacting the current pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) * first so it is as free as possible and we won't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) * have to steal another one soon. This only applies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) * to sync compaction, as async compaction operates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) * on pageblocks of the same migratetype.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) if (cc->mode == MIGRATE_ASYNC ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) IS_ALIGNED(cc->migrate_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) pageblock_nr_pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) return COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) ret = COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) if (cc->contended || fatal_signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) ret = COMPACT_CONTENDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) static enum compact_result compact_finished(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) ret = __compact_finished(cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) trace_mm_compaction_finished(cc->zone, cc->order, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) if (ret == COMPACT_NO_SUITABLE_PAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) ret = COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) * compaction_suitable: Is this suitable to run compaction on this zone now?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) * Returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) * COMPACT_SKIPPED - If there are too few free pages for compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) * COMPACT_SUCCESS - If the allocation would succeed without compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) * COMPACT_CONTINUE - If compaction should run now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) static enum compact_result __compaction_suitable(struct zone *zone, int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) unsigned int alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) int highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) unsigned long wmark_target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) unsigned long watermark;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) if (is_via_compact_memory(order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) return COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) * If watermarks for high-order allocation are already met, there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) * should be no need for compaction at all.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) if (zone_watermark_ok(zone, order, watermark, highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) alloc_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) return COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) * Watermarks for order-0 must be met for compaction to be able to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) * isolate free pages for migration targets. This means that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) * watermark and alloc_flags have to match, or be more pessimistic than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) * the check in __isolate_free_page(). We don't use the direct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) * compactor's alloc_flags, as they are not relevant for freepage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) * isolation. We however do use the direct compactor's highest_zoneidx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) * to skip over zones where lowmem reserves would prevent allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) * even if compaction succeeds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) * For costly orders, we require low watermark instead of min for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) * compaction to proceed to increase its chances.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) * ALLOC_CMA is used, as pages in CMA pageblocks are considered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) * suitable migration targets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) low_wmark_pages(zone) : min_wmark_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) watermark += compact_gap(order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) if (!__zone_watermark_ok(zone, 0, watermark, highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) ALLOC_CMA, wmark_target))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) return COMPACT_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) return COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) enum compact_result compaction_suitable(struct zone *zone, int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) unsigned int alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) int highest_zoneidx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) enum compact_result ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) int fragindex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) ret = __compaction_suitable(zone, order, alloc_flags, highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) zone_page_state(zone, NR_FREE_PAGES));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) * fragmentation index determines if allocation failures are due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) * low memory or external fragmentation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) * index of -1000 would imply allocations might succeed depending on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) * watermarks, but we already failed the high-order watermark check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) * index towards 0 implies failure is due to lack of memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) * index towards 1000 implies failure is due to fragmentation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) * Only compact if a failure would be due to fragmentation. Also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) * ignore fragindex for non-costly orders where the alternative to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) * a successful reclaim/compaction is OOM. Fragindex and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) * vm.extfrag_threshold sysctl is meant as a heuristic to prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) * excessive compaction for costly orders, but it should not be at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) * expense of system stability.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) if (ret == COMPACT_CONTINUE && (order > PAGE_ALLOC_COSTLY_ORDER)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) fragindex = fragmentation_index(zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) ret = COMPACT_NOT_SUITABLE_ZONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) trace_mm_compaction_suitable(zone, order, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) if (ret == COMPACT_NOT_SUITABLE_ZONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) ret = COMPACT_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) int alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) struct zoneref *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) * Make sure at least one zone would pass __compaction_suitable if we continue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) * retrying the reclaim.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) ac->highest_zoneidx, ac->nodemask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) unsigned long available;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) enum compact_result compact_result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) * Do not consider all the reclaimable memory because we do not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) * want to trash just for a single high order allocation which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) * is even not guaranteed to appear even if __compaction_suitable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) * is happy about the watermark check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) available = zone_reclaimable_pages(zone) / order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) compact_result = __compaction_suitable(zone, order, alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) ac->highest_zoneidx, available);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) if (compact_result != COMPACT_SKIPPED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) static enum compact_result
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) compact_zone(struct compact_control *cc, struct capture_control *capc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) enum compact_result ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) unsigned long start_pfn = cc->zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) unsigned long end_pfn = zone_end_pfn(cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) unsigned long last_migrated_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) const bool sync = cc->mode != MIGRATE_ASYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) bool update_cached;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) * These counters track activities during zone compaction. Initialize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) * them before compacting a new zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) cc->total_migrate_scanned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) cc->total_free_scanned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) cc->nr_migratepages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) cc->nr_freepages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) INIT_LIST_HEAD(&cc->freepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) INIT_LIST_HEAD(&cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) cc->migratetype = gfp_migratetype(cc->gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) cc->highest_zoneidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) /* Compaction is likely to fail */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) /* huh, compaction_suitable is returning something unexpected */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) VM_BUG_ON(ret != COMPACT_CONTINUE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) * Clear pageblock skip if there were failures recently and compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) * is about to be retried after being deferred.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) if (compaction_restarting(cc->zone, cc->order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) __reset_isolation_suitable(cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) * Setup to move all movable pages to the end of the zone. Used cached
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) * information on where the scanners should start (unless we explicitly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) * want to compact the whole zone), but check that it is initialised
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) * by ensuring the values are within zone boundaries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) cc->fast_start_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) if (cc->whole_zone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) cc->migrate_pfn = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) cc->migrate_pfn = cc->zone->compact_cached_migrate_pfn[sync];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) cc->free_pfn = cc->zone->compact_cached_free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) cc->zone->compact_cached_free_pfn = cc->free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) cc->migrate_pfn = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) cc->zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) cc->zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) if (cc->migrate_pfn <= cc->zone->compact_init_migrate_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) cc->whole_zone = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) last_migrated_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) * Migrate has separate cached PFNs for ASYNC and SYNC* migration on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) * the basis that some migrations will fail in ASYNC mode. However,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) * if the cached PFNs match and pageblocks are skipped due to having
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) * no isolation candidates, then the sync state does not matter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) * Until a pageblock with isolation candidates is found, keep the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) * cached PFNs in sync to avoid revisiting the same blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) update_cached = !sync &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) cc->zone->compact_cached_migrate_pfn[0] == cc->zone->compact_cached_migrate_pfn[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) cc->free_pfn, end_pfn, sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) /* lru_add_drain_all could be expensive with involving other CPUs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) lru_add_drain();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) while ((ret = compact_finished(cc)) == COMPACT_CONTINUE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) unsigned long start_pfn = cc->migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) * Avoid multiple rescans which can happen if a page cannot be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) * isolated (dirty/writeback in async mode) or if the migrated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) * pages are being allocated before the pageblock is cleared.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) * The first rescan will capture the entire pageblock for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) * migration. If it fails, it'll be marked skip and scanning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) * will proceed as normal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) cc->rescan = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) if (pageblock_start_pfn(last_migrated_pfn) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) pageblock_start_pfn(start_pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) cc->rescan = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) switch (isolate_migratepages(cc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) case ISOLATE_ABORT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) ret = COMPACT_CONTENDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) putback_movable_pages(&cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) cc->nr_migratepages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) case ISOLATE_NONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) if (update_cached) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) cc->zone->compact_cached_migrate_pfn[1] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) cc->zone->compact_cached_migrate_pfn[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) * We haven't isolated and migrated anything, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) * there might still be unflushed migrations from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) * previous cc->order aligned block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) goto check_drain;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) case ISOLATE_SUCCESS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) update_cached = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) last_migrated_pfn = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) err = migrate_pages(&cc->migratepages, compaction_alloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) compaction_free, (unsigned long)cc, cc->mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) MR_COMPACTION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) trace_mm_compaction_migratepages(cc->nr_migratepages, err,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) &cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) /* All pages were either migrated or will be released */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) cc->nr_migratepages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) putback_movable_pages(&cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) * migrate_pages() may return -ENOMEM when scanners meet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) * and we want compact_finished() to detect it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) if (err == -ENOMEM && !compact_scanners_met(cc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) ret = COMPACT_CONTENDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) * We failed to migrate at least one page in the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) * order-aligned block, so skip the rest of it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) if (cc->direct_compaction &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) (cc->mode == MIGRATE_ASYNC)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) cc->migrate_pfn = block_end_pfn(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) cc->migrate_pfn - 1, cc->order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) /* Draining pcplists is useless in this case */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) last_migrated_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) check_drain:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) * Has the migration scanner moved away from the previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) * cc->order aligned block where we migrated from? If yes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) * flush the pages that were freed, so that they can merge and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) * compact_finished() can detect immediately if allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) * would succeed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) if (cc->order > 0 && last_migrated_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) unsigned long current_block_start =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) block_start_pfn(cc->migrate_pfn, cc->order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) if (last_migrated_pfn < current_block_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) lru_add_drain_cpu_zone(cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) /* No more flushing until we migrate again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) last_migrated_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) /* Stop if a page has been captured */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) if (capc && capc->page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) ret = COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) * Release free pages and update where the free scanner should restart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) * so we don't leave any returned pages behind in the next attempt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) if (cc->nr_freepages > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) unsigned long free_pfn = release_freepages(&cc->freepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) cc->nr_freepages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) VM_BUG_ON(free_pfn == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) /* The cached pfn is always the first in a pageblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) free_pfn = pageblock_start_pfn(free_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) * Only go back, not forward. The cached pfn might have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) * already reset to zone end in compact_finished()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) if (free_pfn > cc->zone->compact_cached_free_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) cc->zone->compact_cached_free_pfn = free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) cc->free_pfn, end_pfn, sync, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) static enum compact_result compact_zone_order(struct zone *zone, int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) gfp_t gfp_mask, enum compact_priority prio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) unsigned int alloc_flags, int highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) struct page **capture)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) enum compact_result ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) struct compact_control cc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) .order = order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) .search_order = order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) .gfp_mask = gfp_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) .zone = zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) .mode = (prio == COMPACT_PRIO_ASYNC) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) MIGRATE_ASYNC : MIGRATE_SYNC_LIGHT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) .alloc_flags = alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) .highest_zoneidx = highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) .direct_compaction = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) .whole_zone = (prio == MIN_COMPACT_PRIORITY),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) struct capture_control capc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) .cc = &cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) .page = NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) * Make sure the structs are really initialized before we expose the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) * capture control, in case we are interrupted and the interrupt handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) * frees a page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) WRITE_ONCE(current->capture_control, &capc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) ret = compact_zone(&cc, &capc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) VM_BUG_ON(!list_empty(&cc.freepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) VM_BUG_ON(!list_empty(&cc.migratepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) * Make sure we hide capture control first before we read the captured
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) * page pointer, otherwise an interrupt could free and capture a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) * and we would leak it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) WRITE_ONCE(current->capture_control, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) *capture = READ_ONCE(capc.page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) int sysctl_extfrag_threshold = 500;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) * try_to_compact_pages - Direct compact to satisfy a high-order allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) * @gfp_mask: The GFP mask of the current allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) * @order: The order of the current allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) * @alloc_flags: The allocation flags of the current allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) * @ac: The context of current allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) * @prio: Determines how hard direct compaction should try to succeed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) * @capture: Pointer to free page created by compaction will be stored here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) * This is the main entry point for direct page compaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) unsigned int alloc_flags, const struct alloc_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) enum compact_priority prio, struct page **capture)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) int may_perform_io = gfp_mask & __GFP_IO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) struct zoneref *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) enum compact_result rc = COMPACT_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) * Check if the GFP flags allow compaction - GFP_NOIO is really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) * tricky context because the migration might require IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) if (!may_perform_io)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) return COMPACT_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) /* Compact each zone in the list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) ac->highest_zoneidx, ac->nodemask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) enum compact_result status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) if (prio > MIN_COMPACT_PRIORITY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) && compaction_deferred(zone, order)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) status = compact_zone_order(zone, order, gfp_mask, prio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) alloc_flags, ac->highest_zoneidx, capture);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) rc = max(status, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) /* The allocation should succeed, stop compacting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) if (status == COMPACT_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) * We think the allocation will succeed in this zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) * but it is not certain, hence the false. The caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) * will repeat this with true if allocation indeed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) * succeeds in this zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) compaction_defer_reset(zone, order, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) status == COMPACT_PARTIAL_SKIPPED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) * We think that allocation won't succeed in this zone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) * so we defer compaction there. If it ends up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) * succeeding after all, it will be reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) defer_compaction(zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) * We might have stopped compacting due to need_resched() in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) * async compaction, or due to a fatal signal detected. In that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) * case do not try further zones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) if ((prio == COMPACT_PRIO_ASYNC && need_resched())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) || fatal_signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) * Compact all zones within a node till each zone's fragmentation score
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) * reaches within proactive compaction thresholds (as determined by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) * proactiveness tunable).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) * It is possible that the function returns before reaching score targets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) * due to various back-off conditions, such as, contention on per-node or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) * per-zone locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) static void proactive_compact_node(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) int zoneid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) struct compact_control cc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) .order = -1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) .mode = MIGRATE_SYNC_LIGHT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) .ignore_skip_hint = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) .whole_zone = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) .gfp_mask = GFP_KERNEL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) .proactive_compaction = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) zone = &pgdat->node_zones[zoneid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) if (!populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) cc.zone = zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) compact_zone(&cc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) VM_BUG_ON(!list_empty(&cc.freepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) VM_BUG_ON(!list_empty(&cc.migratepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) /* Compact all zones within a node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) static void compact_node(int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) pg_data_t *pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) int zoneid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) struct compact_control cc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) .order = -1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) .mode = MIGRATE_SYNC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) .ignore_skip_hint = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) .whole_zone = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) .gfp_mask = GFP_KERNEL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) zone = &pgdat->node_zones[zoneid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) if (!populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) cc.zone = zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) compact_zone(&cc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) VM_BUG_ON(!list_empty(&cc.freepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) VM_BUG_ON(!list_empty(&cc.migratepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) /* Compact all nodes in the system */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) static void compact_nodes(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) /* Flush pending updates to the LRU lists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) lru_add_drain_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) for_each_online_node(nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) compact_node(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) /* The written value is actually unused, all memory is compacted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) int sysctl_compact_memory;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) * Tunable for proactive compaction. It determines how
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) * aggressively the kernel should compact memory in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) * background. It takes values in the range [0, 100].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) void *buffer, size_t *length, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) int rc, nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) if (write && sysctl_compaction_proactiveness) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) for_each_online_node(nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) pg_data_t *pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) if (pgdat->proactive_compact_trigger)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) pgdat->proactive_compact_trigger = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) wake_up_interruptible(&pgdat->kcompactd_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) * This is the entry point for compacting all nodes via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) * /proc/sys/vm/compact_memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) int sysctl_compaction_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) void *buffer, size_t *length, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) if (write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) compact_nodes();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) static ssize_t sysfs_compact_node(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) struct device_attribute *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) const char *buf, size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) int nid = dev->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) if (nid >= 0 && nid < nr_node_ids && node_online(nid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) /* Flush pending updates to the LRU lists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) lru_add_drain_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) compact_node(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) static DEVICE_ATTR(compact, 0200, NULL, sysfs_compact_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) int compaction_register_node(struct node *node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) return device_create_file(&node->dev, &dev_attr_compact);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) void compaction_unregister_node(struct node *node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) return device_remove_file(&node->dev, &dev_attr_compact);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) #endif /* CONFIG_SYSFS && CONFIG_NUMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) static inline bool kcompactd_work_requested(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) return pgdat->kcompactd_max_order > 0 || kthread_should_stop() ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) pgdat->proactive_compact_trigger;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) static bool kcompactd_node_suitable(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) int zoneid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) enum zone_type highest_zoneidx = pgdat->kcompactd_highest_zoneidx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) for (zoneid = 0; zoneid <= highest_zoneidx; zoneid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) zone = &pgdat->node_zones[zoneid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) if (!populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) highest_zoneidx) == COMPACT_CONTINUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) static void kcompactd_do_work(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) * With no special task, compact all zones so that a page of requested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) * order is allocatable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) int zoneid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) struct compact_control cc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) .order = pgdat->kcompactd_max_order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) .search_order = pgdat->kcompactd_max_order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) .highest_zoneidx = pgdat->kcompactd_highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) .mode = MIGRATE_SYNC_LIGHT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) .ignore_skip_hint = false,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) .gfp_mask = GFP_KERNEL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) cc.highest_zoneidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) count_compact_event(KCOMPACTD_WAKE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) for (zoneid = 0; zoneid <= cc.highest_zoneidx; zoneid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) zone = &pgdat->node_zones[zoneid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) if (!populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) if (compaction_deferred(zone, cc.order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) if (compaction_suitable(zone, cc.order, 0, zoneid) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) COMPACT_CONTINUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) if (kthread_should_stop())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) cc.zone = zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) status = compact_zone(&cc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) if (status == COMPACT_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) compaction_defer_reset(zone, cc.order, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) } else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) * Buddy pages may become stranded on pcps that could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) * otherwise coalesce on the zone's free area for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) * order >= cc.order. This is ratelimited by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) * upcoming deferral.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) drain_all_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) * We use sync migration mode here, so we defer like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) * sync direct compaction does.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) defer_compaction(zone, cc.order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) count_compact_events(KCOMPACTD_MIGRATE_SCANNED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) cc.total_migrate_scanned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) count_compact_events(KCOMPACTD_FREE_SCANNED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) cc.total_free_scanned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) VM_BUG_ON(!list_empty(&cc.freepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) VM_BUG_ON(!list_empty(&cc.migratepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) * Regardless of success, we are done until woken up next. But remember
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) * the requested order/highest_zoneidx in case it was higher/tighter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) * than our current ones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) if (pgdat->kcompactd_max_order <= cc.order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) pgdat->kcompactd_max_order = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) if (pgdat->kcompactd_highest_zoneidx >= cc.highest_zoneidx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) void wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) if (!order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) if (pgdat->kcompactd_max_order < order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) pgdat->kcompactd_max_order = order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) if (pgdat->kcompactd_highest_zoneidx > highest_zoneidx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) pgdat->kcompactd_highest_zoneidx = highest_zoneidx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) * Pairs with implicit barrier in wait_event_freezable()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) * such that wakeups are not missed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) if (!wq_has_sleeper(&pgdat->kcompactd_wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) if (!kcompactd_node_suitable(pgdat))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) highest_zoneidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) wake_up_interruptible(&pgdat->kcompactd_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) * The background compaction daemon, started as a kernel thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) * from the init process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) static int kcompactd(void *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) pg_data_t *pgdat = (pg_data_t*)p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) struct task_struct *tsk = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) unsigned int proactive_defer = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) if (!cpumask_empty(cpumask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) set_cpus_allowed_ptr(tsk, cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) set_freezable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) pgdat->kcompactd_max_order = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) while (!kthread_should_stop()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) unsigned long pflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) long timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) timeout = sysctl_compaction_proactiveness ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) MAX_SCHEDULE_TIMEOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) if (wait_event_freezable_timeout(pgdat->kcompactd_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) kcompactd_work_requested(pgdat), timeout) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) !pgdat->proactive_compact_trigger) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) psi_memstall_enter(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) kcompactd_do_work(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) psi_memstall_leave(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) /* kcompactd wait timeout */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) if (should_proactive_compact_node(pgdat)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) unsigned int prev_score, score;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) * On wakeup of proactive compaction by sysctl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) * write, ignore the accumulated defer score.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) * Anyway, if the proactive compaction didn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) * make any progress for the new value, it will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) * be further deferred by 2^COMPACT_MAX_DEFER_SHIFT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) * times.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) if (proactive_defer &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) !pgdat->proactive_compact_trigger) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) proactive_defer--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) prev_score = fragmentation_score_node(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) proactive_compact_node(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) score = fragmentation_score_node(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) * Defer proactive compaction if the fragmentation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) * score did not go down i.e. no progress made.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) proactive_defer = score < prev_score ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) 0 : 1 << COMPACT_MAX_DEFER_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) if (pgdat->proactive_compact_trigger)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) pgdat->proactive_compact_trigger = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) * This kcompactd start function will be called by init and node-hot-add.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) int kcompactd_run(int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) pg_data_t *pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) if (pgdat->kcompactd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) if (IS_ERR(pgdat->kcompactd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) pr_err("Failed to start kcompactd on node %d\n", nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) ret = PTR_ERR(pgdat->kcompactd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) pgdat->kcompactd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) * Called by memory hotplug when all memory in a node is offlined. Caller must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) * hold mem_hotplug_begin/end().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) void kcompactd_stop(int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) if (kcompactd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) kthread_stop(kcompactd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) NODE_DATA(nid)->kcompactd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) * It's optimal to keep kcompactd on the same CPUs as their memory, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) * not required for correctness. So if the last cpu in a node goes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) * away, we get changed to run anywhere: as the first one comes back,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) * restore their cpu bindings.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) static int kcompactd_cpu_online(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) for_each_node_state(nid, N_MEMORY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) pg_data_t *pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) const struct cpumask *mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) mask = cpumask_of_node(pgdat->node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) /* One of our CPUs online: restore mask */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) set_cpus_allowed_ptr(pgdat->kcompactd, mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) static int __init kcompactd_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) "mm/compaction:online",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) kcompactd_cpu_online, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) pr_err("kcompactd: failed to register hotplug callbacks.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) for_each_node_state(nid, N_MEMORY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) kcompactd_run(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) subsys_initcall(kcompactd_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) #endif /* CONFIG_COMPACTION */