Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * linux/mm/compaction.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  * Memory compaction for the reduction of external fragmentation. Note that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  * this heavily depends upon page migration to do all the real heavy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  * lifting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include <linux/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/compaction.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/mm_inline.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/sysctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/sysfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/page-isolation.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/kasan.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <linux/freezer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <linux/page_owner.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <linux/psi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #ifdef CONFIG_COMPACTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) static inline void count_compact_event(enum vm_event_item item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) 	count_vm_event(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) static inline void count_compact_events(enum vm_event_item item, long delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) 	count_vm_events(item, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) #define count_compact_event(item) do { } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) #define count_compact_events(item, delta) do { } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) #if defined CONFIG_COMPACTION || defined CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) #include <trace/events/compaction.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) #define block_start_pfn(pfn, order)	round_down(pfn, 1UL << (order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) #define block_end_pfn(pfn, order)	ALIGN((pfn) + 1, 1UL << (order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) #define pageblock_start_pfn(pfn)	block_start_pfn(pfn, pageblock_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) #define pageblock_end_pfn(pfn)		block_end_pfn(pfn, pageblock_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54)  * Fragmentation score check interval for proactive compaction purposes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59)  * Page order with-respect-to which proactive compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60)  * calculates external fragmentation, which is used as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61)  * the "fragmentation score" of a node/zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) #if defined CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) #define COMPACTION_HPAGE_ORDER	HPAGE_PMD_ORDER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) #elif defined CONFIG_HUGETLBFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) #define COMPACTION_HPAGE_ORDER	HUGETLB_PAGE_ORDER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) #define COMPACTION_HPAGE_ORDER	(PMD_SHIFT - PAGE_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) static unsigned long release_freepages(struct list_head *freelist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 	struct page *page, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 	unsigned long high_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 	list_for_each_entry_safe(page, next, freelist, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 		unsigned long pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 		list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 		__free_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 		if (pfn > high_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 			high_pfn = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	return high_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) static void split_map_pages(struct list_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	unsigned int i, order, nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	struct page *page, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 	LIST_HEAD(tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	list_for_each_entry_safe(page, next, list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 		list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 		order = page_private(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 		nr_pages = 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 		post_alloc_hook(page, order, __GFP_MOVABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 		if (order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 			split_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 		for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 			list_add(&page->lru, &tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 			page++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	list_splice(&tmp_list, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) #ifdef CONFIG_COMPACTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) int PageMovable(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 	struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	if (!__PageMovable(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) EXPORT_SYMBOL(PageMovable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) void __SetPageMovable(struct page *page, struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) EXPORT_SYMBOL(__SetPageMovable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) void __ClearPageMovable(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 	VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 	VM_BUG_ON_PAGE(!PageMovable(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	 * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	 * flag so that VM can catch up released page by driver after isolation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	 * With it, VM migration doesn't try to put it back.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 	page->mapping = (void *)((unsigned long)page->mapping &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 				PAGE_MAPPING_MOVABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) EXPORT_SYMBOL(__ClearPageMovable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) /* Do not skip compaction more than 64 times */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) #define COMPACT_MAX_DEFER_SHIFT 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156)  * Compaction is deferred when compaction fails to result in a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157)  * allocation success. 1 << compact_defer_shift, compactions are skipped up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158)  * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) void defer_compaction(struct zone *zone, int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	zone->compact_considered = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	zone->compact_defer_shift++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 	if (order < zone->compact_order_failed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 		zone->compact_order_failed = order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 		zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 	trace_mm_compaction_defer_compaction(zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) /* Returns true if compaction should be skipped this time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) bool compaction_deferred(struct zone *zone, int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	unsigned long defer_limit = 1UL << zone->compact_defer_shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 	if (order < zone->compact_order_failed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 	/* Avoid possible overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 	if (++zone->compact_considered >= defer_limit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 		zone->compact_considered = defer_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 	trace_mm_compaction_deferred(zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194)  * Update defer tracking counters after successful compaction of given order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195)  * which means an allocation either succeeded (alloc_success == true) or is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196)  * expected to succeed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) void compaction_defer_reset(struct zone *zone, int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 		bool alloc_success)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 	if (alloc_success) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 		zone->compact_considered = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 		zone->compact_defer_shift = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 	if (order >= zone->compact_order_failed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 		zone->compact_order_failed = order + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	trace_mm_compaction_defer_reset(zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) /* Returns true if restarting compaction after many failures */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) bool compaction_restarting(struct zone *zone, int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	if (order < zone->compact_order_failed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 	return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 		zone->compact_considered >= 1UL << zone->compact_defer_shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) /* Returns true if the pageblock should be scanned for pages to isolate. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) static inline bool isolation_suitable(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 					struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 	if (cc->ignore_skip_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 	return !get_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) static void reset_cached_positions(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 	zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 	zone->compact_cached_free_pfn =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 				pageblock_start_pfn(zone_end_pfn(zone) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240)  * Compound pages of >= pageblock_order should consistenly be skipped until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241)  * released. It is always pointless to compact pages of such order (if they are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242)  * migratable), and the pageblocks they occupy cannot contain any free pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) static bool pageblock_skip_persistent(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	if (!PageCompound(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	page = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 	if (compound_order(page) >= pageblock_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) static bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 							bool check_target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 	struct page *page = pfn_to_online_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 	struct page *block_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 	struct page *end_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	unsigned long block_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 	if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 	if (zone != page_zone(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 	if (pageblock_skip_persistent(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 	 * If skip is already cleared do no further checking once the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 	 * restart points have been set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 	if (check_source && check_target && !get_pageblock_skip(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	 * If clearing skip for the target scanner, do not select a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 	 * non-movable pageblock as the starting point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 	if (!check_source && check_target &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 	    get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 	/* Ensure the start of the pageblock or zone is online and valid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 	block_pfn = pageblock_start_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 	block_pfn = max(block_pfn, zone->zone_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 	block_page = pfn_to_online_page(block_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	if (block_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 		page = block_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 		pfn = block_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 	/* Ensure the end of the pageblock or zone is online and valid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	block_pfn = pageblock_end_pfn(pfn) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 	end_page = pfn_to_online_page(block_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	if (!end_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 	 * Only clear the hint if a sample indicates there is either a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 	 * free page or an LRU page in the block. One or other condition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	 * is necessary for the block to be a migration source/target.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 		if (pfn_valid_within(pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 			if (check_source && PageLRU(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 				clear_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 				return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 			if (check_target && PageBuddy(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 				clear_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 				return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 		page += (1 << PAGE_ALLOC_COSTLY_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 		pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 	} while (page <= end_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330)  * This function is called to clear all cached information on pageblocks that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331)  * should be skipped for page isolation when the migrate and free page scanner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332)  * meet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) static void __reset_isolation_suitable(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	unsigned long migrate_pfn = zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	unsigned long free_pfn = zone_end_pfn(zone) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	unsigned long reset_migrate = free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	unsigned long reset_free = migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	bool source_set = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 	bool free_set = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 	if (!zone->compact_blockskip_flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 	zone->compact_blockskip_flush = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 	 * Walk the zone and update pageblock skip information. Source looks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 	 * for PageLRU while target looks for PageBuddy. When the scanner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 	 * is found, both PageBuddy and PageLRU are checked as the pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 	 * is suitable as both source and target.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 	for (; migrate_pfn < free_pfn; migrate_pfn += pageblock_nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 					free_pfn -= pageblock_nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 		/* Update the migrate PFN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 		if (__reset_isolation_pfn(zone, migrate_pfn, true, source_set) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 		    migrate_pfn < reset_migrate) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 			source_set = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 			reset_migrate = migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 			zone->compact_init_migrate_pfn = reset_migrate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 			zone->compact_cached_migrate_pfn[0] = reset_migrate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 			zone->compact_cached_migrate_pfn[1] = reset_migrate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 		/* Update the free PFN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 		if (__reset_isolation_pfn(zone, free_pfn, free_set, true) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 		    free_pfn > reset_free) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 			free_set = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 			reset_free = free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 			zone->compact_init_free_pfn = reset_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 			zone->compact_cached_free_pfn = reset_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	/* Leave no distance if no suitable block was reset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 	if (reset_migrate >= reset_free) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 		zone->compact_cached_migrate_pfn[0] = migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 		zone->compact_cached_migrate_pfn[1] = migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 		zone->compact_cached_free_pfn = free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) void reset_isolation_suitable(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	int zoneid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 		struct zone *zone = &pgdat->node_zones[zoneid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 		if (!populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 		/* Only flush if a full compaction finished recently */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 		if (zone->compact_blockskip_flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 			__reset_isolation_suitable(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402)  * Sets the pageblock skip bit if it was clear. Note that this is a hint as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403)  * locks are not required for read/writers. Returns true if it was already set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) static bool test_and_set_skip(struct compact_control *cc, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 							unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 	bool skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 	/* Do no update if skip hint is being ignored */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 	if (cc->ignore_skip_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 	if (!IS_ALIGNED(pfn, pageblock_nr_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 	skip = get_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	if (!skip && !cc->no_set_skip_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 		set_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	return skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 	struct zone *zone = cc->zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 	pfn = pageblock_end_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	/* Set for isolation rather than compaction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 	if (cc->no_set_skip_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 	if (pfn > zone->compact_cached_migrate_pfn[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 		zone->compact_cached_migrate_pfn[0] = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	if (cc->mode != MIGRATE_ASYNC &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 	    pfn > zone->compact_cached_migrate_pfn[1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 		zone->compact_cached_migrate_pfn[1] = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442)  * If no pages were isolated then mark this pageblock to be skipped in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443)  * future. The information is later cleared by __reset_isolation_suitable().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) static void update_pageblock_skip(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 			struct page *page, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	struct zone *zone = cc->zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 	if (cc->no_set_skip_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	set_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 	/* Update where async and sync compaction should restart */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 	if (pfn < zone->compact_cached_free_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 		zone->compact_cached_free_pfn = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) static inline bool isolation_suitable(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 					struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) static inline bool pageblock_skip_persistent(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) static inline void update_pageblock_skip(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 			struct page *page, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) static bool test_and_set_skip(struct compact_control *cc, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 							unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) #endif /* CONFIG_COMPACTION */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491)  * Compaction requires the taking of some coarse locks that are potentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492)  * very heavily contended. For async compaction, trylock and record if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493)  * lock is contended. The lock will still be acquired but compaction will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494)  * abort when the current block is finished regardless of success rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495)  * Sync compaction acquires the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497)  * Always returns true which makes it easier to track lock state in callers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) static bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 						struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 	__acquires(lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 	/* Track if the lock is contended in async mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 	if (cc->mode == MIGRATE_ASYNC && !cc->contended) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 		if (spin_trylock_irqsave(lock, *flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 		cc->contended = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	spin_lock_irqsave(lock, *flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516)  * Compaction requires the taking of some coarse locks that are potentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517)  * very heavily contended. The lock should be periodically unlocked to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518)  * having disabled IRQs for a long time, even when there is nobody waiting on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519)  * the lock. It might also be that allowing the IRQs will result in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520)  * need_resched() becoming true. If scheduling is needed, async compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521)  * aborts. Sync compaction schedules.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522)  * Either compaction type will also abort if a fatal signal is pending.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523)  * In either case if the lock was locked, it is dropped and not regained.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525)  * Returns true if compaction should abort due to fatal signal pending, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526)  *		async compaction due to need_resched()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527)  * Returns false when compaction can continue (sync compaction might have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528)  *		scheduled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) static bool compact_unlock_should_abort(spinlock_t *lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 		unsigned long flags, bool *locked, struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 	if (*locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 		spin_unlock_irqrestore(lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 		*locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 	if (fatal_signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 		cc->contended = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 	cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549)  * Isolate free pages onto a private freelist. If @strict is true, will abort
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550)  * returning 0 on any invalid PFNs or non-free pages inside of the pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551)  * (even though it may still end up isolating some pages).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) static unsigned long isolate_freepages_block(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 				unsigned long *start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 				unsigned long end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 				struct list_head *freelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 				unsigned int stride,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 				bool strict)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 	int nr_scanned = 0, total_isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 	struct page *cursor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 	unsigned long flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 	bool locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 	unsigned long blockpfn = *start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 	unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 	/* Strict mode is for isolation, speed is secondary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 	if (strict)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 		stride = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 	cursor = pfn_to_page(blockpfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	/* Isolate free pages. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 	for (; blockpfn < end_pfn; blockpfn += stride, cursor += stride) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 		int isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 		struct page *page = cursor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 		 * Periodically drop the lock (if held) regardless of its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 		 * contention, to give chance to IRQs. Abort if fatal signal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 		 * pending or async compaction detects need_resched()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 		if (!(blockpfn % SWAP_CLUSTER_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 		    && compact_unlock_should_abort(&cc->zone->lock, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 								&locked, cc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 		nr_scanned++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 		if (!pfn_valid_within(blockpfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 			goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 		 * For compound pages such as THP and hugetlbfs, we can save
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 		 * potentially a lot of iterations if we skip them at once.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 		 * The check is racy, but we can consider only valid values
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 		 * and the only danger is skipping too much.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 		if (PageCompound(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 			const unsigned int order = compound_order(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 			if (likely(order < MAX_ORDER)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 				blockpfn += (1UL << order) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 				cursor += (1UL << order) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 			goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 		if (!PageBuddy(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 			goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 		 * If we already hold the lock, we can skip some rechecking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 		 * Note that if we hold the lock now, checked_pageblock was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 		 * already set in some previous iteration (or strict is true),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 		 * so it is correct to skip the suitable migration target
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 		 * recheck as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 		if (!locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 			locked = compact_lock_irqsave(&cc->zone->lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 								&flags, cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 			/* Recheck this is a buddy page under lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 			if (!PageBuddy(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 				goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 		/* Found a free page, will break it into order-0 pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 		order = buddy_order(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 		isolated = __isolate_free_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 		if (!isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 		set_page_private(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 		total_isolated += isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 		cc->nr_freepages += isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 		list_add_tail(&page->lru, freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 		if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 			blockpfn += isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 		/* Advance to the end of split page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 		blockpfn += isolated - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 		cursor += isolated - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 		continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) isolate_fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 		if (strict)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 	if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 		spin_unlock_irqrestore(&cc->zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	 * There is a tiny chance that we have read bogus compound_order(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 	 * so be careful to not go outside of the pageblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 	if (unlikely(blockpfn > end_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 		blockpfn = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 					nr_scanned, total_isolated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	/* Record how far we have got within the block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 	*start_pfn = blockpfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 	 * If strict isolation is requested by CMA then check that all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 	 * pages requested were isolated. If there were any failures, 0 is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 	 * returned and CMA will fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 	if (strict && blockpfn < end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 		total_isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 	cc->total_free_scanned += nr_scanned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 	if (total_isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 		count_compact_events(COMPACTISOLATED, total_isolated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 	return total_isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686)  * isolate_freepages_range() - isolate free pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687)  * @cc:        Compaction control structure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688)  * @start_pfn: The first PFN to start isolating.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689)  * @end_pfn:   The one-past-last PFN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691)  * Non-free pages, invalid PFNs, or zone boundaries within the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692)  * [start_pfn, end_pfn) range are considered errors, cause function to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693)  * undo its actions and return zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695)  * Otherwise, function returns one-past-the-last PFN of isolated page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696)  * (which may be greater then end_pfn if end fell in a middle of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697)  * a free page).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) isolate_freepages_range(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 			unsigned long start_pfn, unsigned long end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 	LIST_HEAD(freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 	pfn = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 	block_start_pfn = pageblock_start_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 	if (block_start_pfn < cc->zone->zone_start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 		block_start_pfn = cc->zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 	block_end_pfn = pageblock_end_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 	for (; pfn < end_pfn; pfn += isolated,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 				block_start_pfn = block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 				block_end_pfn += pageblock_nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 		/* Protect pfn from changing by isolate_freepages_block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 		unsigned long isolate_start_pfn = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 		block_end_pfn = min(block_end_pfn, end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 		 * pfn could pass the block_end_pfn if isolated freepage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 		 * is more than pageblock order. In this case, we adjust
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 		 * scanning range to right one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 		if (pfn >= block_end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 			block_start_pfn = pageblock_start_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 			block_end_pfn = pageblock_end_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 			block_end_pfn = min(block_end_pfn, end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 		if (!pageblock_pfn_to_page(block_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 					block_end_pfn, cc->zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 		isolated = isolate_freepages_block(cc, &isolate_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 					block_end_pfn, &freelist, 0, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 		 * In strict mode, isolate_freepages_block() returns 0 if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 		 * there are any holes in the block (ie. invalid PFNs or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 		 * non-free pages).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 		if (!isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 		 * If we managed to isolate pages, it is always (1 << n) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 		 * pageblock_nr_pages for some non-negative n.  (Max order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 		 * page may span two pageblocks).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 	/* __isolate_free_page() does not map the pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	split_map_pages(&freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	if (pfn < end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 		/* Loop terminated early, cleanup. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 		release_freepages(&freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	/* We don't use freelists for anything. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 	return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) #ifdef CONFIG_COMPACTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) unsigned long isolate_and_split_free_page(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 						struct list_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 	unsigned long isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 	unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 	if (!PageBuddy(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 	order = buddy_order(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 	isolated = __isolate_free_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	if (!isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 	set_page_private(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 	list_add(&page->lru, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 	split_map_pages(list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	return isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) EXPORT_SYMBOL_GPL(isolate_and_split_free_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) /* Similar to reclaim, but different enough that they don't share logic */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) static bool too_many_isolated(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 	unsigned long active, inactive, isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 	inactive = node_page_state(pgdat, NR_INACTIVE_FILE) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 			node_page_state(pgdat, NR_INACTIVE_ANON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 	active = node_page_state(pgdat, NR_ACTIVE_FILE) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 			node_page_state(pgdat, NR_ACTIVE_ANON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 	isolated = node_page_state(pgdat, NR_ISOLATED_FILE) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 			node_page_state(pgdat, NR_ISOLATED_ANON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	return isolated > (inactive + active) / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807)  * isolate_migratepages_block() - isolate all migrate-able pages within
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808)  *				  a single pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809)  * @cc:		Compaction control structure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810)  * @low_pfn:	The first PFN to isolate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811)  * @end_pfn:	The one-past-the-last PFN to isolate, within same pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812)  * @isolate_mode: Isolation mode to be used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814)  * Isolate all pages that can be migrated from the range specified by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815)  * [low_pfn, end_pfn). The range is expected to be within same pageblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816)  * Returns zero if there is a fatal signal pending, otherwise PFN of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817)  * first page that was not scanned (which may be both less, equal to or more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818)  * than end_pfn).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820)  * The pages are isolated on cc->migratepages list (not required to be empty),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821)  * and cc->nr_migratepages is updated accordingly. The cc->migrate_pfn field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822)  * is neither read nor updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) static unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 			unsigned long end_pfn, isolate_mode_t isolate_mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	pg_data_t *pgdat = cc->zone->zone_pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 	unsigned long nr_scanned = 0, nr_isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 	struct lruvec *lruvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 	unsigned long flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 	bool locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 	struct page *page = NULL, *valid_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 	unsigned long start_pfn = low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 	bool skip_on_failure = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 	unsigned long next_skip_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 	bool skip_updated = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 	 * Ensure that there are not too many pages isolated from the LRU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 	 * list by either parallel reclaimers or compaction. If there are,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 	 * delay for some time until fewer pages are isolated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 	while (unlikely(too_many_isolated(pgdat))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 		/* stop isolation if there are still pages not migrated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 		if (cc->nr_migratepages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 		/* async migration should just abort */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 		if (cc->mode == MIGRATE_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 		congestion_wait(BLK_RW_ASYNC, HZ/10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 		if (fatal_signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 	cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 	if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 		skip_on_failure = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 		next_skip_pfn = block_end_pfn(low_pfn, cc->order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	/* Time to isolate some pages for migration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	for (; low_pfn < end_pfn; low_pfn++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 		if (skip_on_failure && low_pfn >= next_skip_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 			 * We have isolated all migration candidates in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 			 * previous order-aligned block, and did not skip it due
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 			 * to failure. We should migrate the pages now and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 			 * hopefully succeed compaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 			if (nr_isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 			 * We failed to isolate in the previous order-aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 			 * block. Set the new boundary to the end of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 			 * current block. Note we can't simply increase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 			 * next_skip_pfn by 1 << order, as low_pfn might have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 			 * been incremented by a higher number due to skipping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 			 * a compound or a high-order buddy page in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 			 * previous loop iteration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 			next_skip_pfn = block_end_pfn(low_pfn, cc->order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 		 * Periodically drop the lock (if held) regardless of its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 		 * contention, to give chance to IRQs. Abort completely if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 		 * a fatal signal is pending.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 		if (!(low_pfn % SWAP_CLUSTER_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 		    && compact_unlock_should_abort(&pgdat->lru_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 					    flags, &locked, cc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 			low_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 			goto fatal_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 		if (!pfn_valid_within(low_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 			goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 		nr_scanned++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 		page = pfn_to_page(low_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 		 * Check if the pageblock has already been marked skipped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 		 * Only the aligned PFN is checked as the caller isolates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 		 * COMPACT_CLUSTER_MAX at a time so the second call must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 		 * not falsely conclude that the block should be skipped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 		if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 			if (!cc->ignore_skip_hint && get_pageblock_skip(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 				low_pfn = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 				goto isolate_abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 			valid_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 		 * Skip if free. We read page order here without zone lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 		 * which is generally unsafe, but the race window is small and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 		 * the worst thing that can happen is that we skip some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 		 * potential isolation targets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 		if (PageBuddy(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 			unsigned long freepage_order = buddy_order_unsafe(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 			 * Without lock, we cannot be sure that what we got is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 			 * a valid page order. Consider only values in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 			 * valid order range to prevent low_pfn overflow.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 			if (freepage_order > 0 && freepage_order < MAX_ORDER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 				low_pfn += (1UL << freepage_order) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 		 * Regardless of being on LRU, compound pages such as THP and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 		 * hugetlbfs are not to be compacted unless we are attempting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 		 * an allocation much larger than the huge page size (eg CMA).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 		 * We can potentially save a lot of iterations if we skip them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 		 * at once. The check is racy, but we can consider only valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 		 * values and the only danger is skipping too much.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 		if (PageCompound(page) && !cc->alloc_contig) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 			const unsigned int order = compound_order(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 			if (likely(order < MAX_ORDER))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 				low_pfn += (1UL << order) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 			goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 		 * Check may be lockless but that's ok as we recheck later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 		 * It's possible to migrate LRU and non-lru movable pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 		 * Skip any other type of page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 		if (!PageLRU(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 			 * __PageMovable can return false positive so we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 			 * to verify it under page_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 			if (unlikely(__PageMovable(page)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 					!PageIsolated(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 				if (locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 					spin_unlock_irqrestore(&pgdat->lru_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 									flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 					locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 				if (!isolate_movable_page(page, isolate_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 					goto isolate_success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 			goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 		 * Migration will fail if an anonymous page is pinned in memory,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 		 * so avoid taking lru_lock and isolating it unnecessarily in an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 		 * admittedly racy check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 		if (!page_mapping(page) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 		    page_count(page) > page_mapcount(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 			goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 		 * Only allow to migrate anonymous pages in GFP_NOFS context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 		 * because those do not depend on fs locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 		if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 			goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 		/* If we already hold the lock, we can skip some rechecking */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 		if (!locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 			locked = compact_lock_irqsave(&pgdat->lru_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 								&flags, cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 			/* Try get exclusive access under lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 			if (!skip_updated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 				skip_updated = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 				if (test_and_set_skip(cc, page, low_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 					goto isolate_abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 			/* Recheck PageLRU and PageCompound under lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 			if (!PageLRU(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 				goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 			 * Page become compound since the non-locked check,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 			 * and it's on LRU. It can only be a THP so the order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 			 * is safe to read and it's 0 for tail pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 			if (unlikely(PageCompound(page) && !cc->alloc_contig)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 				low_pfn += compound_nr(page) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 				goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 		lruvec = mem_cgroup_page_lruvec(page, pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 		/* Try isolate the page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 		if (__isolate_lru_page(page, isolate_mode) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 			goto isolate_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 		/* The whole page is taken off the LRU; skip the tail pages. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 		if (PageCompound(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 			low_pfn += compound_nr(page) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 		/* Successfully isolated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 		del_page_from_lru_list(page, lruvec, page_lru(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 		mod_node_page_state(page_pgdat(page),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 				NR_ISOLATED_ANON + page_is_file_lru(page),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 				thp_nr_pages(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) isolate_success:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 		list_add(&page->lru, &cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 		cc->nr_migratepages += compound_nr(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 		nr_isolated += compound_nr(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 		 * Avoid isolating too much unless this block is being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 		 * rescanned (e.g. dirty/writeback pages, parallel allocation)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 		 * or a lock is contended. For contention, isolate quickly to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 		 * potentially remove one source of contention.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 		if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 		    !cc->rescan && !cc->contended) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 			++low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 		continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) isolate_fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 		if (!skip_on_failure)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 		 * We have isolated some pages, but then failed. Release them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 		 * instead of migrating, as we cannot form the cc->order buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 		 * page anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 		if (nr_isolated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 			if (locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 				spin_unlock_irqrestore(&pgdat->lru_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 				locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 			putback_movable_pages(&cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 			cc->nr_migratepages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 			nr_isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 		if (low_pfn < next_skip_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 			low_pfn = next_skip_pfn - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 			 * The check near the loop beginning would have updated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 			 * next_skip_pfn too, but this is a bit simpler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 			next_skip_pfn += 1UL << cc->order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	 * The PageBuddy() check could have potentially brought us outside
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 	 * the range to be scanned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	if (unlikely(low_pfn > end_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 		low_pfn = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) isolate_abort:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 		spin_unlock_irqrestore(&pgdat->lru_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	 * Updated the cached scanner pfn once the pageblock has been scanned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 	 * Pages will either be migrated in which case there is no point
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 	 * scanning in the near future or migration failed in which case the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 	 * failure reason may persist. The block is marked for skipping if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 	 * there were no pages isolated in the block or if the block is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 	 * rescanned twice in a row.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 	if (low_pfn == end_pfn && (!nr_isolated || cc->rescan)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 		if (valid_page && !skip_updated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 			set_pageblock_skip(valid_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 		update_cached_migrate(cc, low_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 						nr_scanned, nr_isolated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) fatal_pending:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	cc->total_migrate_scanned += nr_scanned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	if (nr_isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 		count_compact_events(COMPACTISOLATED, nr_isolated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	return low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)  * isolate_migratepages_range() - isolate migrate-able pages in a PFN range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)  * @cc:        Compaction control structure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128)  * @start_pfn: The first PFN to start isolating.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129)  * @end_pfn:   The one-past-last PFN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131)  * Returns zero if isolation fails fatally due to e.g. pending signal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132)  * Otherwise, function returns one-past-the-last PFN of isolated page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)  * (which may be greater than end_pfn if end fell in a middle of a THP page).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 							unsigned long end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 	unsigned long pfn, block_start_pfn, block_end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 	/* Scan block by block. First and last block may be incomplete */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 	pfn = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	block_start_pfn = pageblock_start_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 	if (block_start_pfn < cc->zone->zone_start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 		block_start_pfn = cc->zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 	block_end_pfn = pageblock_end_pfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 	for (; pfn < end_pfn; pfn = block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 				block_start_pfn = block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 				block_end_pfn += pageblock_nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 		block_end_pfn = min(block_end_pfn, end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 		if (!pageblock_pfn_to_page(block_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 					block_end_pfn, cc->zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 		pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 							ISOLATE_UNEVICTABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 		if (!pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 		if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) #endif /* CONFIG_COMPACTION || CONFIG_CMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) #ifdef CONFIG_COMPACTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) static bool suitable_migration_source(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 							struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 	int block_mt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 	if (pageblock_skip_persistent(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	block_mt = get_pageblock_migratetype(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 	if (cc->migratetype == MIGRATE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 		return is_migrate_movable(block_mt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 		return block_mt == cc->migratetype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) /* Returns true if the page is within a block suitable for migration to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) static bool suitable_migration_target(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 							struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	/* If the page is a large free page, then disallow migration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	if (PageBuddy(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 		 * We are checking page_order without zone->lock taken. But
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 		 * the only small danger is that we skip a potentially suitable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 		 * pageblock, so it's not worth to check order for valid range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 		if (buddy_order_unsafe(page) >= pageblock_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 	if (cc->ignore_block_suitable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 	if (is_migrate_movable(get_pageblock_migratetype(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	/* Otherwise skip the block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) static inline unsigned int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) freelist_scan_limit(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 	unsigned short shift = BITS_PER_LONG - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 	return (COMPACT_CLUSTER_MAX >> min(shift, cc->fast_search_fail)) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228)  * Test whether the free scanner has reached the same or lower pageblock than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)  * the migration scanner, and compaction should thus terminate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) static inline bool compact_scanners_met(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	return (cc->free_pfn >> pageblock_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 		<= (cc->migrate_pfn >> pageblock_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238)  * Used when scanning for a suitable migration target which scans freelists
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)  * in reverse. Reorders the list such as the unscanned pages are scanned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240)  * first on the next iteration of the free scanner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) move_freelist_head(struct list_head *freelist, struct page *freepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	LIST_HEAD(sublist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	if (!list_is_last(freelist, &freepage->lru)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 		list_cut_before(&sublist, freelist, &freepage->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 		if (!list_empty(&sublist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 			list_splice_tail(&sublist, freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255)  * Similar to move_freelist_head except used by the migration scanner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256)  * when scanning forward. It's possible for these list operations to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257)  * move against each other if they search the free list exactly in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258)  * lockstep.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) move_freelist_tail(struct list_head *freelist, struct page *freepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	LIST_HEAD(sublist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 	if (!list_is_first(freelist, &freepage->lru)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 		list_cut_position(&sublist, freelist, &freepage->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 		if (!list_empty(&sublist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 			list_splice_tail(&sublist, freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	unsigned long start_pfn, end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	/* Do not search around if there are enough pages already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	if (cc->nr_freepages >= cc->nr_migratepages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	/* Minimise scanning during async compaction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 	if (cc->direct_compaction && cc->mode == MIGRATE_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 	/* Pageblock boundaries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	start_pfn = max(pageblock_start_pfn(pfn), cc->zone->zone_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 	end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	page = pageblock_pfn_to_page(start_pfn, end_pfn, cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 	if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 	/* Scan before */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 	if (start_pfn != pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 		isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 		if (cc->nr_freepages >= cc->nr_migratepages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	/* Scan after */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 	start_pfn = pfn + nr_isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 	if (start_pfn < end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 		isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 	/* Skip this pageblock in the future as it's full or nearly full */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 	if (cc->nr_freepages < cc->nr_migratepages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 		set_pageblock_skip(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) /* Search orders in round-robin fashion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) static int next_search_order(struct compact_control *cc, int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 	order--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 	if (order < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 		order = cc->order - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 	/* Search wrapped around? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 	if (order == cc->search_order) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 		cc->search_order--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 		if (cc->search_order < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 			cc->search_order = cc->order - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 	return order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) static unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) fast_isolate_freepages(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 	unsigned int nr_scanned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 	unsigned long low_pfn, min_pfn, highest = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 	unsigned long nr_isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 	unsigned long distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 	struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 	bool scan_start = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 	int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 	/* Full compaction passes in a negative order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 	if (cc->order <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 		return cc->free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 	 * If starting the scan, use a deeper search and use the highest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 	 * PFN found if a suitable one is not found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 	if (cc->free_pfn >= cc->zone->compact_init_free_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 		limit = pageblock_nr_pages >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 		scan_start = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 	 * Preferred point is in the top quarter of the scan space but take
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 	 * a pfn from the top half if the search is problematic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 	distance = (cc->free_pfn - cc->migrate_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 	low_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 	min_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 	if (WARN_ON_ONCE(min_pfn > low_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 		low_pfn = min_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 	 * Search starts from the last successful isolation order or the next
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 	 * order to search after a previous failure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 	cc->search_order = min_t(unsigned int, cc->order - 1, cc->search_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 	for (order = cc->search_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 	     !page && order >= 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 	     order = next_search_order(cc, order)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 		struct free_area *area = &cc->zone->free_area[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 		struct list_head *freelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 		struct page *freepage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 		unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 		unsigned int order_scanned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 		unsigned long high_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 		if (!area->nr_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 		spin_lock_irqsave(&cc->zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 		freelist = &area->free_list[MIGRATE_MOVABLE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 		list_for_each_entry_reverse(freepage, freelist, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 			unsigned long pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 			order_scanned++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 			nr_scanned++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 			pfn = page_to_pfn(freepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 			if (pfn >= highest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 				highest = max(pageblock_start_pfn(pfn),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 					      cc->zone->zone_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 			if (pfn >= low_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 				cc->fast_search_fail = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 				cc->search_order = order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 				page = freepage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 			if (pfn >= min_pfn && pfn > high_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 				high_pfn = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 				/* Shorten the scan if a candidate is found */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 				limit >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 			if (order_scanned >= limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 		/* Use a minimum pfn if a preferred one was not found */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 		if (!page && high_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 			page = pfn_to_page(high_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 			/* Update freepage for the list reorder below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 			freepage = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 		/* Reorder to so a future search skips recent pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 		move_freelist_head(freelist, freepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 		/* Isolate the page if available */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 		if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 			if (__isolate_free_page(page, order)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 				set_page_private(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 				nr_isolated = 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 				cc->nr_freepages += nr_isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 				list_add_tail(&page->lru, &cc->freepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 				count_compact_events(COMPACTISOLATED, nr_isolated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 				/* If isolation fails, abort the search */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 				order = cc->search_order + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 				page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 		spin_unlock_irqrestore(&cc->zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 		 * Smaller scan on next order so the total scan ig related
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 		 * to freelist_scan_limit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 		if (order_scanned >= limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 			limit = min(1U, limit >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 	if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 		cc->fast_search_fail++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 		if (scan_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 			 * Use the highest PFN found above min. If one was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 			 * not found, be pessimistic for direct compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 			 * and use the min mark.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 			if (highest) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 				page = pfn_to_page(highest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 				cc->free_pfn = highest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 				if (cc->direct_compaction && pfn_valid(min_pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 					page = pageblock_pfn_to_page(min_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 						min(pageblock_end_pfn(min_pfn),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 						    zone_end_pfn(cc->zone)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 						cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 					cc->free_pfn = min_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 	if (highest && highest >= cc->zone->compact_cached_free_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 		highest -= pageblock_nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 		cc->zone->compact_cached_free_pfn = highest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 	cc->total_free_scanned += nr_scanned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 	if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 		return cc->free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 	low_pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 	fast_isolate_around(cc, low_pfn, nr_isolated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 	return low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489)  * Based on information in the current compact_control, find blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490)  * suitable for isolating free pages from and then isolate them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) static void isolate_freepages(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 	struct zone *zone = cc->zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 	unsigned long block_start_pfn;	/* start of current pageblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 	unsigned long isolate_start_pfn; /* exact pfn we start at */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 	unsigned long block_end_pfn;	/* end of current pageblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	unsigned long low_pfn;	     /* lowest pfn scanner is able to scan */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 	struct list_head *freelist = &cc->freepages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 	unsigned int stride;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 	/* Try a small search of the free lists for a candidate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 	isolate_start_pfn = fast_isolate_freepages(cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	if (cc->nr_freepages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 		goto splitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 	 * Initialise the free scanner. The starting point is where we last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 	 * successfully isolated from, zone-cached value, or the end of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 	 * zone when isolating for the first time. For looping we also need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 	 * this pfn aligned down to the pageblock boundary, because we do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 	 * block_start_pfn -= pageblock_nr_pages in the for loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 	 * For ending point, take care when isolating in last pageblock of a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 	 * zone which ends in the middle of a pageblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 	 * The low boundary is the end of the pageblock the migration scanner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 	 * is using.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	isolate_start_pfn = cc->free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 	block_start_pfn = pageblock_start_pfn(isolate_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 	block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 						zone_end_pfn(zone));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 	low_pfn = pageblock_end_pfn(cc->migrate_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 	stride = cc->mode == MIGRATE_ASYNC ? COMPACT_CLUSTER_MAX : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 	 * Isolate free pages until enough are available to migrate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 	 * pages on cc->migratepages. We stop searching if the migrate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 	 * and free page scanners meet or enough free pages are isolated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 	for (; block_start_pfn >= low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 				block_end_pfn = block_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 				block_start_pfn -= pageblock_nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 				isolate_start_pfn = block_start_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 		unsigned long nr_isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 		 * This can iterate a massively long zone without finding any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 		 * suitable migration targets, so periodically check resched.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 		if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 		page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 									zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 		if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 		/* Check the block is suitable for migration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 		if (!suitable_migration_target(cc, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 		/* If isolation recently failed, do not retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 		if (!isolation_suitable(cc, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 		/* Found a block suitable for isolating free pages from. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 		nr_isolated = isolate_freepages_block(cc, &isolate_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 					block_end_pfn, freelist, stride, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 		/* Update the skip hint if the full pageblock was scanned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 		if (isolate_start_pfn == block_end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 			update_pageblock_skip(cc, page, block_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 		/* Are enough freepages isolated? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 		if (cc->nr_freepages >= cc->nr_migratepages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 			if (isolate_start_pfn >= block_end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 				 * Restart at previous pageblock if more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 				 * freepages can be isolated next time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 				isolate_start_pfn =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 					block_start_pfn - pageblock_nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 		} else if (isolate_start_pfn < block_end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 			 * If isolation failed early, do not continue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 			 * needlessly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 		/* Adjust stride depending on isolation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 		if (nr_isolated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 			stride = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 		stride = min_t(unsigned int, COMPACT_CLUSTER_MAX, stride << 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 	 * Record where the free scanner will restart next time. Either we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 	 * broke from the loop and set isolate_start_pfn based on the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 	 * call to isolate_freepages_block(), or we met the migration scanner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 	 * and the loop terminated due to isolate_start_pfn < low_pfn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 	cc->free_pfn = isolate_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) splitmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 	/* __isolate_free_page() does not map the pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 	split_map_pages(freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606)  * This is a migrate-callback that "allocates" freepages by taking pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607)  * from the isolated freelists in the block we are migrating to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) static struct page *compaction_alloc(struct page *migratepage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 					unsigned long data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	struct compact_control *cc = (struct compact_control *)data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 	struct page *freepage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 	if (list_empty(&cc->freepages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 		isolate_freepages(cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 		if (list_empty(&cc->freepages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 	freepage = list_entry(cc->freepages.next, struct page, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 	list_del(&freepage->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 	cc->nr_freepages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 	return freepage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630)  * This is a migrate-callback that "frees" freepages back to the isolated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631)  * freelist.  All pages on the freelist are from the same zone, so there is no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632)  * special handling needed for NUMA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) static void compaction_free(struct page *page, unsigned long data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 	struct compact_control *cc = (struct compact_control *)data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 	list_add(&page->lru, &cc->freepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 	cc->nr_freepages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) /* possible outcome of isolate_migratepages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) typedef enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 	ISOLATE_ABORT,		/* Abort compaction now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 	ISOLATE_NONE,		/* No pages isolated, continue scanning */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 	ISOLATE_SUCCESS,	/* Pages isolated, migrate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) } isolate_migrate_t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650)  * Allow userspace to control policy on scanning the unevictable LRU for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651)  * compactable pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) #ifdef CONFIG_PREEMPT_RT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) int sysctl_compact_unevictable_allowed __read_mostly = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) int sysctl_compact_unevictable_allowed __read_mostly = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 	if (cc->fast_start_pfn == ULONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 	if (!cc->fast_start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 		cc->fast_start_pfn = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 	cc->fast_start_pfn = min(cc->fast_start_pfn, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) static inline unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) reinit_migrate_pfn(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 	if (!cc->fast_start_pfn || cc->fast_start_pfn == ULONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 		return cc->migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 	cc->migrate_pfn = cc->fast_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 	cc->fast_start_pfn = ULONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 	return cc->migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684)  * Briefly search the free lists for a migration source that already has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685)  * some free pages to reduce the number of pages that need migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686)  * before a pageblock is free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) static unsigned long fast_find_migrateblock(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 	unsigned int limit = freelist_scan_limit(cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 	unsigned int nr_scanned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 	unsigned long distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 	unsigned long pfn = cc->migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 	unsigned long high_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 	int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 	bool found_block = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 	/* Skip hints are relied on to avoid repeats on the fast search */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 	if (cc->ignore_skip_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 		return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 	 * If the migrate_pfn is not at the start of a zone or the start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 	 * of a pageblock then assume this is a continuation of a previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 	 * scan restarted due to COMPACT_CLUSTER_MAX.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 	if (pfn != cc->zone->zone_start_pfn && pfn != pageblock_start_pfn(pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 		return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 	 * For smaller orders, just linearly scan as the number of pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) 	 * to migrate should be relatively small and does not necessarily
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 	 * justify freeing up a large block for a small allocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 	if (cc->order <= PAGE_ALLOC_COSTLY_ORDER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 		return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 	 * Only allow kcompactd and direct requests for movable pages to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 	 * quickly clear out a MOVABLE pageblock for allocation. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 	 * reduces the risk that a large movable pageblock is freed for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 	 * an unmovable/reclaimable small allocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 	if (cc->direct_compaction && cc->migratetype != MIGRATE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 		return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 	 * When starting the migration scanner, pick any pageblock within the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 	 * first half of the search space. Otherwise try and pick a pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 	 * within the first eighth to reduce the chances that a migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 	 * target later becomes a source.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 	distance = (cc->free_pfn - cc->migrate_pfn) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 	if (cc->migrate_pfn != cc->zone->zone_start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 		distance >>= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 	high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 	for (order = cc->order - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 	     order >= PAGE_ALLOC_COSTLY_ORDER && !found_block && nr_scanned < limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 	     order--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 		struct free_area *area = &cc->zone->free_area[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 		struct list_head *freelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 		unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 		struct page *freepage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 		if (!area->nr_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 		spin_lock_irqsave(&cc->zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 		freelist = &area->free_list[MIGRATE_MOVABLE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 		list_for_each_entry(freepage, freelist, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 			unsigned long free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 			if (nr_scanned++ >= limit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 				move_freelist_tail(freelist, freepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 			free_pfn = page_to_pfn(freepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 			if (free_pfn < high_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) 				 * Avoid if skipped recently. Ideally it would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 				 * move to the tail but even safe iteration of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 				 * the list assumes an entry is deleted, not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 				 * reordered.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 				if (get_pageblock_skip(freepage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) 				/* Reorder to so a future search skips recent pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 				move_freelist_tail(freelist, freepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) 				update_fast_start_pfn(cc, free_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 				pfn = pageblock_start_pfn(free_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 				cc->fast_search_fail = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 				found_block = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 				set_pageblock_skip(freepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) 		spin_unlock_irqrestore(&cc->zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 	cc->total_migrate_scanned += nr_scanned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 	 * If fast scanning failed then use a cached entry for a page block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 	 * that had free pages as the basis for starting a linear scan.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 	if (!found_block) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 		cc->fast_search_fail++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 		pfn = reinit_migrate_pfn(cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 	return pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798)  * Isolate all pages that can be migrated from the first suitable block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799)  * starting at the block pointed to by the migrate scanner pfn within
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800)  * compact_control.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 	unsigned long block_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 	unsigned long block_end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 	unsigned long low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 	const isolate_mode_t isolate_mode =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 		(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 		(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) 	bool fast_find_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 	 * Start at where we last stopped, or beginning of the zone as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) 	 * initialized by compact_zone(). The first failure will use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 	 * the lowest PFN as the starting point for linear scanning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 	low_pfn = fast_find_migrateblock(cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 	block_start_pfn = pageblock_start_pfn(low_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 	if (block_start_pfn < cc->zone->zone_start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 		block_start_pfn = cc->zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 	 * fast_find_migrateblock marks a pageblock skipped so to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 	 * the isolation_suitable check below, check whether the fast
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 	 * search was successful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 	fast_find_block = low_pfn != cc->migrate_pfn && !cc->fast_search_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 	/* Only scan within a pageblock boundary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 	block_end_pfn = pageblock_end_pfn(low_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 	 * Iterate over whole pageblocks until we find the first suitable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 	 * Do not cross the free scanner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 	for (; block_end_pfn <= cc->free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 			fast_find_block = false,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) 			low_pfn = block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 			block_start_pfn = block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) 			block_end_pfn += pageblock_nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 		 * This can potentially iterate a massively long zone with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 		 * many pageblocks unsuitable, so periodically check if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 		 * need to schedule.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 		if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 		page = pageblock_pfn_to_page(block_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 						block_end_pfn, cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 		if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 		 * If isolation recently failed, do not retry. Only check the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 		 * pageblock once. COMPACT_CLUSTER_MAX causes a pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 		 * to be visited multiple times. Assume skip was checked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 		 * before making it "skip" so other compaction instances do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 		 * not scan the same block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 		if (IS_ALIGNED(low_pfn, pageblock_nr_pages) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 		    !fast_find_block && !isolation_suitable(cc, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 		 * For async compaction, also only scan in MOVABLE blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 		 * without huge pages. Async compaction is optimistic to see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 		 * if the minimum amount of work satisfies the allocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 		 * The cached PFN is updated as it's possible that all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 		 * remaining blocks between source and target are unsuitable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 		 * and the compaction scanners fail to meet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 		if (!suitable_migration_source(cc, page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 			update_cached_migrate(cc, block_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 		/* Perform the isolation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 		low_pfn = isolate_migratepages_block(cc, low_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 						block_end_pfn, isolate_mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 		if (!low_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 			return ISOLATE_ABORT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 		 * Either we isolated something and proceed with migration. Or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 		 * we failed and compact_zone should decide if we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 		 * continue or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) 	/* Record where migration scanner will be restarted. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 	cc->migrate_pfn = low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 	return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902)  * order == -1 is expected when compacting via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903)  * /proc/sys/vm/compact_memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) static inline bool is_via_compact_memory(int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 	return order == -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) static bool kswapd_is_running(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 	return pgdat->kswapd && (pgdat->kswapd->state == TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916)  * A zone's fragmentation score is the external fragmentation wrt to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917)  * COMPACTION_HPAGE_ORDER. It returns a value in the range [0, 100].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) static unsigned int fragmentation_score_zone(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 	return extfrag_for_order(zone, COMPACTION_HPAGE_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925)  * A weighted zone's fragmentation score is the external fragmentation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926)  * wrt to the COMPACTION_HPAGE_ORDER scaled by the zone's size. It
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927)  * returns a value in the range [0, 100].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929)  * The scaling factor ensures that proactive compaction focuses on larger
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930)  * zones like ZONE_NORMAL, rather than smaller, specialized zones like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931)  * ZONE_DMA32. For smaller zones, the score value remains close to zero,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932)  * and thus never exceeds the high threshold for proactive compaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) static unsigned int fragmentation_score_zone_weighted(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 	unsigned long score;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 	score = zone->present_pages * fragmentation_score_zone(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 	return div64_ul(score, zone->zone_pgdat->node_present_pages + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943)  * The per-node proactive (background) compaction process is started by its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944)  * corresponding kcompactd thread when the node's fragmentation score
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945)  * exceeds the high threshold. The compaction process remains active till
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946)  * the node's score falls below the low threshold, or one of the back-off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947)  * conditions is met.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) static unsigned int fragmentation_score_node(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 	unsigned int score = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 	int zoneid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 		struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 		zone = &pgdat->node_zones[zoneid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 		score += fragmentation_score_zone_weighted(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 	return score;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) static unsigned int fragmentation_score_wmark(pg_data_t *pgdat, bool low)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 	unsigned int wmark_low;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) 	 * Cap the low watermak to avoid excessive compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 	 * activity in case a user sets the proactivess tunable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) 	 * close to 100 (maximum).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 	wmark_low = max(100U - sysctl_compaction_proactiveness, 5U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) 	return low ? wmark_low : min(wmark_low + 10, 100U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) static bool should_proactive_compact_node(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) 	int wmark_high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 	if (!sysctl_compaction_proactiveness || kswapd_is_running(pgdat))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 	wmark_high = fragmentation_score_wmark(pgdat, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 	return fragmentation_score_node(pgdat) > wmark_high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) static enum compact_result __compact_finished(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 	const int migratetype = cc->migratetype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 	/* Compaction run completes if the migrate and free scanner meet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 	if (compact_scanners_met(cc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 		/* Let the next compaction start anew. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 		reset_cached_positions(cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 		 * Mark that the PG_migrate_skip information should be cleared
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 		 * by kswapd when it goes to sleep. kcompactd does not set the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 		 * flag itself as the decision to be clear should be directly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 		 * based on an allocation request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 		if (cc->direct_compaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 			cc->zone->compact_blockskip_flush = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 		if (cc->whole_zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 			return COMPACT_COMPLETE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 			return COMPACT_PARTIAL_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 	if (cc->proactive_compaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 		int score, wmark_low;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 		pg_data_t *pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 		pgdat = cc->zone->zone_pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 		if (kswapd_is_running(pgdat))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 			return COMPACT_PARTIAL_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 		score = fragmentation_score_zone(cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 		wmark_low = fragmentation_score_wmark(pgdat, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) 		if (score > wmark_low)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 			ret = COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) 			ret = COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 	if (is_via_compact_memory(cc->order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 		return COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 	 * Always finish scanning a pageblock to reduce the possibility of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 	 * fallbacks in the future. This is particularly important when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 	 * migration source is unmovable/reclaimable but it's not worth
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 	 * special casing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 	if (!IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 		return COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 	/* Direct compactor: Is a suitable page free? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 	ret = COMPACT_NO_SUITABLE_PAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 	for (order = cc->order; order < MAX_ORDER; order++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 		struct free_area *area = &cc->zone->free_area[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 		bool can_steal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 		/* Job done if page is free of the right migratetype */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 		if (!free_area_empty(area, migratetype))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 			return COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 		/* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) 		if (migratetype == MIGRATE_MOVABLE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 			!free_area_empty(area, MIGRATE_CMA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 			return COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) 		 * Job done if allocation would steal freepages from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 		 * other migratetype buddy lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 		if (find_suitable_fallback(area, order, migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) 						true, &can_steal) != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 			/* movable pages are OK in any pageblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) 			if (migratetype == MIGRATE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) 				return COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 			 * We are stealing for a non-movable allocation. Make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 			 * sure we finish compacting the current pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 			 * first so it is as free as possible and we won't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 			 * have to steal another one soon. This only applies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 			 * to sync compaction, as async compaction operates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 			 * on pageblocks of the same migratetype.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 			if (cc->mode == MIGRATE_ASYNC ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 					IS_ALIGNED(cc->migrate_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 							pageblock_nr_pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) 				return COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 			ret = COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 	if (cc->contended || fatal_signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 		ret = COMPACT_CONTENDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) static enum compact_result compact_finished(struct compact_control *cc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) 	ret = __compact_finished(cc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 	trace_mm_compaction_finished(cc->zone, cc->order, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) 	if (ret == COMPACT_NO_SUITABLE_PAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) 		ret = COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111)  * compaction_suitable: Is this suitable to run compaction on this zone now?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112)  * Returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113)  *   COMPACT_SKIPPED  - If there are too few free pages for compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114)  *   COMPACT_SUCCESS  - If the allocation would succeed without compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115)  *   COMPACT_CONTINUE - If compaction should run now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) static enum compact_result __compaction_suitable(struct zone *zone, int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 					unsigned int alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 					int highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 					unsigned long wmark_target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 	unsigned long watermark;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 	if (is_via_compact_memory(order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 		return COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 	watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 	 * If watermarks for high-order allocation are already met, there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 	 * should be no need for compaction at all.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 	if (zone_watermark_ok(zone, order, watermark, highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 								alloc_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) 		return COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) 	 * Watermarks for order-0 must be met for compaction to be able to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 	 * isolate free pages for migration targets. This means that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 	 * watermark and alloc_flags have to match, or be more pessimistic than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 	 * the check in __isolate_free_page(). We don't use the direct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 	 * compactor's alloc_flags, as they are not relevant for freepage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 	 * isolation. We however do use the direct compactor's highest_zoneidx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 	 * to skip over zones where lowmem reserves would prevent allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 	 * even if compaction succeeds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 	 * For costly orders, we require low watermark instead of min for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 	 * compaction to proceed to increase its chances.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 	 * ALLOC_CMA is used, as pages in CMA pageblocks are considered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) 	 * suitable migration targets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) 	watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 				low_wmark_pages(zone) : min_wmark_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 	watermark += compact_gap(order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 	if (!__zone_watermark_ok(zone, 0, watermark, highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) 						ALLOC_CMA, wmark_target))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 		return COMPACT_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) 	return COMPACT_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) enum compact_result compaction_suitable(struct zone *zone, int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) 					unsigned int alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) 					int highest_zoneidx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) 	enum compact_result ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) 	int fragindex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) 	ret = __compaction_suitable(zone, order, alloc_flags, highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) 				    zone_page_state(zone, NR_FREE_PAGES));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) 	 * fragmentation index determines if allocation failures are due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) 	 * low memory or external fragmentation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) 	 * index of -1000 would imply allocations might succeed depending on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 	 * watermarks, but we already failed the high-order watermark check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) 	 * index towards 0 implies failure is due to lack of memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 	 * index towards 1000 implies failure is due to fragmentation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) 	 * Only compact if a failure would be due to fragmentation. Also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 	 * ignore fragindex for non-costly orders where the alternative to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) 	 * a successful reclaim/compaction is OOM. Fragindex and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 	 * vm.extfrag_threshold sysctl is meant as a heuristic to prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) 	 * excessive compaction for costly orders, but it should not be at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) 	 * expense of system stability.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) 	if (ret == COMPACT_CONTINUE && (order > PAGE_ALLOC_COSTLY_ORDER)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) 		fragindex = fragmentation_index(zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) 		if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) 			ret = COMPACT_NOT_SUITABLE_ZONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) 	trace_mm_compaction_suitable(zone, order, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) 	if (ret == COMPACT_NOT_SUITABLE_ZONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 		ret = COMPACT_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 		int alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) 	struct zoneref *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) 	 * Make sure at least one zone would pass __compaction_suitable if we continue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) 	 * retrying the reclaim.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) 	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 				ac->highest_zoneidx, ac->nodemask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) 		unsigned long available;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) 		enum compact_result compact_result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) 		 * Do not consider all the reclaimable memory because we do not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) 		 * want to trash just for a single high order allocation which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) 		 * is even not guaranteed to appear even if __compaction_suitable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) 		 * is happy about the watermark check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) 		available = zone_reclaimable_pages(zone) / order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) 		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) 		compact_result = __compaction_suitable(zone, order, alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) 				ac->highest_zoneidx, available);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) 		if (compact_result != COMPACT_SKIPPED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) static enum compact_result
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) compact_zone(struct compact_control *cc, struct capture_control *capc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 	enum compact_result ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 	unsigned long start_pfn = cc->zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 	unsigned long end_pfn = zone_end_pfn(cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 	unsigned long last_migrated_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) 	const bool sync = cc->mode != MIGRATE_ASYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 	bool update_cached;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 	 * These counters track activities during zone compaction.  Initialize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 	 * them before compacting a new zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) 	cc->total_migrate_scanned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 	cc->total_free_scanned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) 	cc->nr_migratepages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 	cc->nr_freepages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) 	INIT_LIST_HEAD(&cc->freepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) 	INIT_LIST_HEAD(&cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 	cc->migratetype = gfp_migratetype(cc->gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) 	ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 							cc->highest_zoneidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) 	/* Compaction is likely to fail */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) 	if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) 	/* huh, compaction_suitable is returning something unexpected */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) 	VM_BUG_ON(ret != COMPACT_CONTINUE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) 	 * Clear pageblock skip if there were failures recently and compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 	 * is about to be retried after being deferred.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) 	if (compaction_restarting(cc->zone, cc->order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 		__reset_isolation_suitable(cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 	 * Setup to move all movable pages to the end of the zone. Used cached
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 	 * information on where the scanners should start (unless we explicitly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 	 * want to compact the whole zone), but check that it is initialised
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) 	 * by ensuring the values are within zone boundaries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 	cc->fast_start_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) 	if (cc->whole_zone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 		cc->migrate_pfn = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 		cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 		cc->migrate_pfn = cc->zone->compact_cached_migrate_pfn[sync];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 		cc->free_pfn = cc->zone->compact_cached_free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 		if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 			cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 			cc->zone->compact_cached_free_pfn = cc->free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 		if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 			cc->migrate_pfn = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 			cc->zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 			cc->zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 		if (cc->migrate_pfn <= cc->zone->compact_init_migrate_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 			cc->whole_zone = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 	last_migrated_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) 	 * Migrate has separate cached PFNs for ASYNC and SYNC* migration on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) 	 * the basis that some migrations will fail in ASYNC mode. However,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) 	 * if the cached PFNs match and pageblocks are skipped due to having
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) 	 * no isolation candidates, then the sync state does not matter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 	 * Until a pageblock with isolation candidates is found, keep the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 	 * cached PFNs in sync to avoid revisiting the same blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 	update_cached = !sync &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 		cc->zone->compact_cached_migrate_pfn[0] == cc->zone->compact_cached_migrate_pfn[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 	trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) 				cc->free_pfn, end_pfn, sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 	/* lru_add_drain_all could be expensive with involving other CPUs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) 	lru_add_drain();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 	while ((ret = compact_finished(cc)) == COMPACT_CONTINUE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 		int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 		unsigned long start_pfn = cc->migrate_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 		 * Avoid multiple rescans which can happen if a page cannot be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 		 * isolated (dirty/writeback in async mode) or if the migrated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 		 * pages are being allocated before the pageblock is cleared.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 		 * The first rescan will capture the entire pageblock for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) 		 * migration. If it fails, it'll be marked skip and scanning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 		 * will proceed as normal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) 		cc->rescan = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 		if (pageblock_start_pfn(last_migrated_pfn) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) 		    pageblock_start_pfn(start_pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 			cc->rescan = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 		switch (isolate_migratepages(cc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 		case ISOLATE_ABORT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 			ret = COMPACT_CONTENDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 			putback_movable_pages(&cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) 			cc->nr_migratepages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 		case ISOLATE_NONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 			if (update_cached) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 				cc->zone->compact_cached_migrate_pfn[1] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) 					cc->zone->compact_cached_migrate_pfn[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) 			 * We haven't isolated and migrated anything, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) 			 * there might still be unflushed migrations from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) 			 * previous cc->order aligned block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 			goto check_drain;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) 		case ISOLATE_SUCCESS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 			update_cached = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) 			last_migrated_pfn = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) 			;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) 		err = migrate_pages(&cc->migratepages, compaction_alloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) 				compaction_free, (unsigned long)cc, cc->mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) 				MR_COMPACTION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) 		trace_mm_compaction_migratepages(cc->nr_migratepages, err,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) 							&cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) 		/* All pages were either migrated or will be released */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 		cc->nr_migratepages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) 		if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 			putback_movable_pages(&cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 			 * migrate_pages() may return -ENOMEM when scanners meet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 			 * and we want compact_finished() to detect it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 			if (err == -ENOMEM && !compact_scanners_met(cc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 				ret = COMPACT_CONTENDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 			 * We failed to migrate at least one page in the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 			 * order-aligned block, so skip the rest of it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 			if (cc->direct_compaction &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 						(cc->mode == MIGRATE_ASYNC)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 				cc->migrate_pfn = block_end_pfn(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 						cc->migrate_pfn - 1, cc->order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 				/* Draining pcplists is useless in this case */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 				last_migrated_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) check_drain:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 		 * Has the migration scanner moved away from the previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 		 * cc->order aligned block where we migrated from? If yes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) 		 * flush the pages that were freed, so that they can merge and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 		 * compact_finished() can detect immediately if allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) 		 * would succeed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) 		if (cc->order > 0 && last_migrated_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) 			unsigned long current_block_start =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 				block_start_pfn(cc->migrate_pfn, cc->order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 			if (last_migrated_pfn < current_block_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 				lru_add_drain_cpu_zone(cc->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 				/* No more flushing until we migrate again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 				last_migrated_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) 		/* Stop if a page has been captured */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 		if (capc && capc->page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) 			ret = COMPACT_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 	 * Release free pages and update where the free scanner should restart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 	 * so we don't leave any returned pages behind in the next attempt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 	if (cc->nr_freepages > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 		unsigned long free_pfn = release_freepages(&cc->freepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 		cc->nr_freepages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) 		VM_BUG_ON(free_pfn == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) 		/* The cached pfn is always the first in a pageblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 		free_pfn = pageblock_start_pfn(free_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) 		 * Only go back, not forward. The cached pfn might have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 		 * already reset to zone end in compact_finished()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 		if (free_pfn > cc->zone->compact_cached_free_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 			cc->zone->compact_cached_free_pfn = free_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 	count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) 	count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) 	trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) 				cc->free_pfn, end_pfn, sync, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) static enum compact_result compact_zone_order(struct zone *zone, int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) 		gfp_t gfp_mask, enum compact_priority prio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) 		unsigned int alloc_flags, int highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) 		struct page **capture)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) 	enum compact_result ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) 	struct compact_control cc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) 		.order = order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) 		.search_order = order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) 		.gfp_mask = gfp_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) 		.zone = zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 		.mode = (prio == COMPACT_PRIO_ASYNC) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) 					MIGRATE_ASYNC :	MIGRATE_SYNC_LIGHT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 		.alloc_flags = alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 		.highest_zoneidx = highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 		.direct_compaction = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) 		.whole_zone = (prio == MIN_COMPACT_PRIORITY),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) 		.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) 		.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) 	struct capture_control capc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) 		.cc = &cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) 		.page = NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) 	 * Make sure the structs are really initialized before we expose the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) 	 * capture control, in case we are interrupted and the interrupt handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) 	 * frees a page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) 	barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) 	WRITE_ONCE(current->capture_control, &capc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) 	ret = compact_zone(&cc, &capc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) 	VM_BUG_ON(!list_empty(&cc.freepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) 	VM_BUG_ON(!list_empty(&cc.migratepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) 	 * Make sure we hide capture control first before we read the captured
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) 	 * page pointer, otherwise an interrupt could free and capture a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) 	 * and we would leak it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) 	WRITE_ONCE(current->capture_control, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) 	*capture = READ_ONCE(capc.page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) int sysctl_extfrag_threshold = 500;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495)  * try_to_compact_pages - Direct compact to satisfy a high-order allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496)  * @gfp_mask: The GFP mask of the current allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497)  * @order: The order of the current allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498)  * @alloc_flags: The allocation flags of the current allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499)  * @ac: The context of current allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500)  * @prio: Determines how hard direct compaction should try to succeed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501)  * @capture: Pointer to free page created by compaction will be stored here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503)  * This is the main entry point for direct page compaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) 		unsigned int alloc_flags, const struct alloc_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) 		enum compact_priority prio, struct page **capture)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) 	int may_perform_io = gfp_mask & __GFP_IO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) 	struct zoneref *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) 	enum compact_result rc = COMPACT_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) 	 * Check if the GFP flags allow compaction - GFP_NOIO is really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) 	 * tricky context because the migration might require IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) 	if (!may_perform_io)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) 		return COMPACT_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 	trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) 	/* Compact each zone in the list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) 	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) 					ac->highest_zoneidx, ac->nodemask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) 		enum compact_result status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) 		if (prio > MIN_COMPACT_PRIORITY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) 					&& compaction_deferred(zone, order)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) 			rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) 		status = compact_zone_order(zone, order, gfp_mask, prio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) 				alloc_flags, ac->highest_zoneidx, capture);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) 		rc = max(status, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) 		/* The allocation should succeed, stop compacting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) 		if (status == COMPACT_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) 			 * We think the allocation will succeed in this zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) 			 * but it is not certain, hence the false. The caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) 			 * will repeat this with true if allocation indeed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) 			 * succeeds in this zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) 			compaction_defer_reset(zone, order, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) 		if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) 					status == COMPACT_PARTIAL_SKIPPED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) 			 * We think that allocation won't succeed in this zone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) 			 * so we defer compaction there. If it ends up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) 			 * succeeding after all, it will be reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) 			defer_compaction(zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) 		 * We might have stopped compacting due to need_resched() in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) 		 * async compaction, or due to a fatal signal detected. In that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) 		 * case do not try further zones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) 		if ((prio == COMPACT_PRIO_ASYNC && need_resched())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) 					|| fatal_signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) 	return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574)  * Compact all zones within a node till each zone's fragmentation score
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575)  * reaches within proactive compaction thresholds (as determined by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576)  * proactiveness tunable).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578)  * It is possible that the function returns before reaching score targets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579)  * due to various back-off conditions, such as, contention on per-node or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580)  * per-zone locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) static void proactive_compact_node(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) 	int zoneid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) 	struct compact_control cc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) 		.order = -1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) 		.mode = MIGRATE_SYNC_LIGHT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) 		.ignore_skip_hint = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) 		.whole_zone = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) 		.gfp_mask = GFP_KERNEL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) 		.proactive_compaction = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) 	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) 		zone = &pgdat->node_zones[zoneid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) 		if (!populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) 		cc.zone = zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) 		compact_zone(&cc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) 		VM_BUG_ON(!list_empty(&cc.freepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) 		VM_BUG_ON(!list_empty(&cc.migratepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) /* Compact all zones within a node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) static void compact_node(int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) 	pg_data_t *pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) 	int zoneid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) 	struct compact_control cc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) 		.order = -1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) 		.mode = MIGRATE_SYNC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) 		.ignore_skip_hint = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) 		.whole_zone = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) 		.gfp_mask = GFP_KERNEL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) 	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) 		zone = &pgdat->node_zones[zoneid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) 		if (!populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) 		cc.zone = zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) 		compact_zone(&cc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) 		VM_BUG_ON(!list_empty(&cc.freepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) 		VM_BUG_ON(!list_empty(&cc.migratepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) /* Compact all nodes in the system */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) static void compact_nodes(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) 	int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) 	/* Flush pending updates to the LRU lists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) 	lru_add_drain_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) 	for_each_online_node(nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) 		compact_node(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) /* The written value is actually unused, all memory is compacted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) int sysctl_compact_memory;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655)  * Tunable for proactive compaction. It determines how
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656)  * aggressively the kernel should compact memory in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657)  * background. It takes values in the range [0, 100].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) 		void *buffer, size_t *length, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) 	int rc, nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) 	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) 	if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) 		return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) 	if (write && sysctl_compaction_proactiveness) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) 		for_each_online_node(nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) 			pg_data_t *pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) 			if (pgdat->proactive_compact_trigger)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) 			pgdat->proactive_compact_trigger = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) 			wake_up_interruptible(&pgdat->kcompactd_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686)  * This is the entry point for compacting all nodes via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687)  * /proc/sys/vm/compact_memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) int sysctl_compaction_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) 			void *buffer, size_t *length, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) 	if (write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) 		compact_nodes();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) static ssize_t sysfs_compact_node(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) 			struct device_attribute *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) 			const char *buf, size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) 	int nid = dev->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) 	if (nid >= 0 && nid < nr_node_ids && node_online(nid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) 		/* Flush pending updates to the LRU lists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) 		lru_add_drain_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) 		compact_node(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) 	return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) static DEVICE_ATTR(compact, 0200, NULL, sysfs_compact_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) int compaction_register_node(struct node *node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) 	return device_create_file(&node->dev, &dev_attr_compact);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) void compaction_unregister_node(struct node *node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) 	return device_remove_file(&node->dev, &dev_attr_compact);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) #endif /* CONFIG_SYSFS && CONFIG_NUMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) static inline bool kcompactd_work_requested(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) 	return pgdat->kcompactd_max_order > 0 || kthread_should_stop() ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) 		pgdat->proactive_compact_trigger;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) static bool kcompactd_node_suitable(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) 	int zoneid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) 	enum zone_type highest_zoneidx = pgdat->kcompactd_highest_zoneidx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) 	for (zoneid = 0; zoneid <= highest_zoneidx; zoneid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) 		zone = &pgdat->node_zones[zoneid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) 		if (!populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) 		if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) 					highest_zoneidx) == COMPACT_CONTINUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) static void kcompactd_do_work(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) 	 * With no special task, compact all zones so that a page of requested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) 	 * order is allocatable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) 	int zoneid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) 	struct compact_control cc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) 		.order = pgdat->kcompactd_max_order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) 		.search_order = pgdat->kcompactd_max_order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) 		.highest_zoneidx = pgdat->kcompactd_highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) 		.mode = MIGRATE_SYNC_LIGHT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) 		.ignore_skip_hint = false,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) 		.gfp_mask = GFP_KERNEL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) 	trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) 							cc.highest_zoneidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) 	count_compact_event(KCOMPACTD_WAKE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) 	for (zoneid = 0; zoneid <= cc.highest_zoneidx; zoneid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) 		int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) 		zone = &pgdat->node_zones[zoneid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) 		if (!populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) 		if (compaction_deferred(zone, cc.order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) 		if (compaction_suitable(zone, cc.order, 0, zoneid) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) 							COMPACT_CONTINUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) 		if (kthread_should_stop())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) 		cc.zone = zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) 		status = compact_zone(&cc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) 		if (status == COMPACT_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) 			compaction_defer_reset(zone, cc.order, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) 		} else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) 			 * Buddy pages may become stranded on pcps that could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) 			 * otherwise coalesce on the zone's free area for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) 			 * order >= cc.order.  This is ratelimited by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) 			 * upcoming deferral.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) 			drain_all_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) 			 * We use sync migration mode here, so we defer like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) 			 * sync direct compaction does.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) 			defer_compaction(zone, cc.order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) 		count_compact_events(KCOMPACTD_MIGRATE_SCANNED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) 				     cc.total_migrate_scanned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) 		count_compact_events(KCOMPACTD_FREE_SCANNED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) 				     cc.total_free_scanned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) 		VM_BUG_ON(!list_empty(&cc.freepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) 		VM_BUG_ON(!list_empty(&cc.migratepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) 	 * Regardless of success, we are done until woken up next. But remember
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) 	 * the requested order/highest_zoneidx in case it was higher/tighter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) 	 * than our current ones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) 	if (pgdat->kcompactd_max_order <= cc.order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) 		pgdat->kcompactd_max_order = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) 	if (pgdat->kcompactd_highest_zoneidx >= cc.highest_zoneidx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) 		pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) void wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) 	if (!order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) 	if (pgdat->kcompactd_max_order < order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) 		pgdat->kcompactd_max_order = order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) 	if (pgdat->kcompactd_highest_zoneidx > highest_zoneidx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) 		pgdat->kcompactd_highest_zoneidx = highest_zoneidx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) 	 * Pairs with implicit barrier in wait_event_freezable()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) 	 * such that wakeups are not missed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) 	if (!wq_has_sleeper(&pgdat->kcompactd_wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) 	if (!kcompactd_node_suitable(pgdat))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) 	trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) 							highest_zoneidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) 	wake_up_interruptible(&pgdat->kcompactd_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858)  * The background compaction daemon, started as a kernel thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859)  * from the init process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) static int kcompactd(void *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) 	pg_data_t *pgdat = (pg_data_t*)p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) 	struct task_struct *tsk = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) 	unsigned int proactive_defer = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) 	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) 	if (!cpumask_empty(cpumask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) 		set_cpus_allowed_ptr(tsk, cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) 	set_freezable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) 	pgdat->kcompactd_max_order = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) 	pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) 	while (!kthread_should_stop()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) 		unsigned long pflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) 		long timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) 		timeout = sysctl_compaction_proactiveness ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) 			msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) 			MAX_SCHEDULE_TIMEOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) 		trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) 		if (wait_event_freezable_timeout(pgdat->kcompactd_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) 			kcompactd_work_requested(pgdat), timeout) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) 			!pgdat->proactive_compact_trigger) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) 			psi_memstall_enter(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) 			kcompactd_do_work(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) 			psi_memstall_leave(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) 		/* kcompactd wait timeout */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) 		if (should_proactive_compact_node(pgdat)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) 			unsigned int prev_score, score;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) 			 * On wakeup of proactive compaction by sysctl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) 			 * write, ignore the accumulated defer score.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) 			 * Anyway, if the proactive compaction didn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) 			 * make any progress for the new value, it will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) 			 * be further deferred by 2^COMPACT_MAX_DEFER_SHIFT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) 			 * times.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) 			if (proactive_defer &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) 				!pgdat->proactive_compact_trigger) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) 				proactive_defer--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) 			prev_score = fragmentation_score_node(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) 			proactive_compact_node(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) 			score = fragmentation_score_node(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) 			 * Defer proactive compaction if the fragmentation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) 			 * score did not go down i.e. no progress made.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) 			proactive_defer = score < prev_score ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) 					0 : 1 << COMPACT_MAX_DEFER_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) 		if (pgdat->proactive_compact_trigger)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) 			pgdat->proactive_compact_trigger = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931)  * This kcompactd start function will be called by init and node-hot-add.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932)  * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) int kcompactd_run(int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) 	pg_data_t *pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) 	if (pgdat->kcompactd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) 	pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) 	if (IS_ERR(pgdat->kcompactd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) 		pr_err("Failed to start kcompactd on node %d\n", nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) 		ret = PTR_ERR(pgdat->kcompactd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) 		pgdat->kcompactd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952)  * Called by memory hotplug when all memory in a node is offlined. Caller must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953)  * hold mem_hotplug_begin/end().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) void kcompactd_stop(int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) 	struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) 	if (kcompactd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) 		kthread_stop(kcompactd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) 		NODE_DATA(nid)->kcompactd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966)  * It's optimal to keep kcompactd on the same CPUs as their memory, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967)  * not required for correctness. So if the last cpu in a node goes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968)  * away, we get changed to run anywhere: as the first one comes back,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969)  * restore their cpu bindings.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) static int kcompactd_cpu_online(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) 	int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) 	for_each_node_state(nid, N_MEMORY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) 		pg_data_t *pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) 		const struct cpumask *mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) 		mask = cpumask_of_node(pgdat->node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) 		if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) 			/* One of our CPUs online: restore mask */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) 			set_cpus_allowed_ptr(pgdat->kcompactd, mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) static int __init kcompactd_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) 	int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) 					"mm/compaction:online",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) 					kcompactd_cpu_online, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) 		pr_err("kcompactd: failed to register hotplug callbacks.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) 	for_each_node_state(nid, N_MEMORY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) 		kcompactd_run(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) subsys_initcall(kcompactd_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) #endif /* CONFIG_COMPACTION */