^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <linux/mmzone.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/page_reporting.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/scatterlist.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "page_reporting.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #define PAGE_REPORTING_DELAY (2 * HZ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) PAGE_REPORTING_IDLE = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) PAGE_REPORTING_REQUESTED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) PAGE_REPORTING_ACTIVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) /* request page reporting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) __page_reporting_request(struct page_reporting_dev_info *prdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) unsigned int state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) /* Check to see if we are in desired state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) state = atomic_read(&prdev->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) if (state == PAGE_REPORTING_REQUESTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * If reporting is already active there is nothing we need to do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * Test against 0 as that represents PAGE_REPORTING_IDLE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) state = atomic_xchg(&prdev->state, PAGE_REPORTING_REQUESTED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) if (state != PAGE_REPORTING_IDLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * Delay the start of work to allow a sizable queue to build. For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * now we are limiting this to running no more than once every
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * couple of seconds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /* notify prdev of free page reporting request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) void __page_reporting_notify(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) struct page_reporting_dev_info *prdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * We use RCU to protect the pr_dev_info pointer. In almost all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * cases this should be present, however in the unlikely case of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * a shutdown this will be NULL and we should exit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) prdev = rcu_dereference(pr_dev_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) if (likely(prdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) __page_reporting_request(prdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) page_reporting_drain(struct page_reporting_dev_info *prdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) struct scatterlist *sgl, unsigned int nents, bool reported)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) struct scatterlist *sg = sgl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * Drain the now reported pages back into their respective
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * free lists/areas. We assume at least one page is populated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) struct page *page = sg_page(sg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) int mt = get_pageblock_migratetype(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) unsigned int order = get_order(sg->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) __putback_isolated_page(page, order, mt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) /* If the pages were not reported due to error skip flagging */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) if (!reported)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * If page was not comingled with another page we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * consider the result to be "reported" since the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * hasn't been modified, otherwise we will need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * report on the new larger page when we make our way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * up to that higher order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) if (PageBuddy(page) && buddy_order(page) == order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) __SetPageReported(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) } while ((sg = sg_next(sg)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) /* reinitialize scatterlist now that it is empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) sg_init_table(sgl, nents);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * The page reporting cycle consists of 4 stages, fill, report, drain, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * idle. We will cycle through the first 3 stages until we cannot obtain a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * full scatterlist of pages, in that case we will switch to idle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) unsigned int order, unsigned int mt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) struct scatterlist *sgl, unsigned int *offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) struct free_area *area = &zone->free_area[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) struct list_head *list = &area->free_list[mt];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) unsigned int page_len = PAGE_SIZE << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) struct page *page, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) long budget;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * Perform early check, if free area is empty there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * nothing to process so we can skip this free_list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) if (list_empty(list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) spin_lock_irq(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * Limit how many calls we will be making to the page reporting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * device for this list. By doing this we avoid processing any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * given list for too long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * The current value used allows us enough calls to process over a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * sixteenth of the current list plus one additional call to handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * any pages that may have already been present from the previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * list processed. This should result in us reporting all pages on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * an idle system in about 30 seconds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * The division here should be cheap since PAGE_REPORTING_CAPACITY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * should always be a power of 2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) budget = DIV_ROUND_UP(area->nr_free, PAGE_REPORTING_CAPACITY * 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) /* loop through free list adding unreported pages to sg list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) list_for_each_entry_safe(page, next, list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) /* We are going to skip over the reported pages. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) if (PageReported(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * If we fully consumed our budget then update our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * state to indicate that we are requesting additional
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * processing and exit this list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) if (budget < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) atomic_set(&prdev->state, PAGE_REPORTING_REQUESTED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) next = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) /* Attempt to pull page from list and place in scatterlist */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) if (*offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) if (!__isolate_free_page(page, order)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) next = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) /* Add page to scatter list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) --(*offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) sg_set_page(&sgl[*offset], page, page_len, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * Make the first non-reported page in the free list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * the new head of the free list before we release the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) * zone lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) if (!list_is_first(&page->lru, list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) list_rotate_to_front(&page->lru, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) /* release lock before waiting on report processing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) spin_unlock_irq(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) /* begin processing pages in local list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) err = prdev->report(prdev, sgl, PAGE_REPORTING_CAPACITY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) /* reset offset since the full list was reported */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) *offset = PAGE_REPORTING_CAPACITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) /* update budget to reflect call to report function */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) budget--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) /* reacquire zone lock and resume processing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) spin_lock_irq(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) /* flush reported pages from the sg list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) page_reporting_drain(prdev, sgl, PAGE_REPORTING_CAPACITY, !err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * Reset next to first entry, the old next isn't valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * since we dropped the lock to report the pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) next = list_first_entry(list, struct page, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) /* exit on error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) /* Rotate any leftover pages to the head of the freelist */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) if (&next->lru != list && !list_is_first(&next->lru, list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) list_rotate_to_front(&next->lru, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) spin_unlock_irq(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) page_reporting_process_zone(struct page_reporting_dev_info *prdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) struct scatterlist *sgl, struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) unsigned int order, mt, leftover, offset = PAGE_REPORTING_CAPACITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) unsigned long watermark;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) /* Generate minimum watermark to be able to guarantee progress */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) watermark = low_wmark_pages(zone) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) (PAGE_REPORTING_CAPACITY << PAGE_REPORTING_MIN_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * Cancel request if insufficient free memory or if we failed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * to allocate page reporting statistics for the zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) /* Process each free list starting from lowest order/mt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) for (order = PAGE_REPORTING_MIN_ORDER; order < MAX_ORDER; order++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) for (mt = 0; mt < MIGRATE_TYPES; mt++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) /* We do not pull pages from the isolate free list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) if (is_migrate_isolate(mt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) err = page_reporting_cycle(prdev, zone, order, mt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) sgl, &offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) /* report the leftover pages before going idle */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) leftover = PAGE_REPORTING_CAPACITY - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) if (leftover) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) sgl = &sgl[offset];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) err = prdev->report(prdev, sgl, leftover);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) /* flush any remaining pages out from the last report */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) spin_lock_irq(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) page_reporting_drain(prdev, sgl, leftover, !err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) spin_unlock_irq(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) static void page_reporting_process(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) struct delayed_work *d_work = to_delayed_work(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) struct page_reporting_dev_info *prdev =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) container_of(d_work, struct page_reporting_dev_info, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) int err = 0, state = PAGE_REPORTING_ACTIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) struct scatterlist *sgl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * Change the state to "Active" so that we can track if there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * anyone requests page reporting after we complete our pass. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * the state is not altered by the end of the pass we will switch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * to idle and quit scheduling reporting runs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) atomic_set(&prdev->state, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) /* allocate scatterlist to store pages being reported on */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) sgl = kmalloc_array(PAGE_REPORTING_CAPACITY, sizeof(*sgl), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) if (!sgl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) goto err_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) sg_init_table(sgl, PAGE_REPORTING_CAPACITY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) for_each_zone(zone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) err = page_reporting_process_zone(prdev, sgl, zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) kfree(sgl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) err_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * If the state has reverted back to requested then there may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * additional pages to be processed. We will defer for 2s to allow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * more pages to accumulate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) state = atomic_cmpxchg(&prdev->state, state, PAGE_REPORTING_IDLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) if (state == PAGE_REPORTING_REQUESTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) static DEFINE_MUTEX(page_reporting_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) DEFINE_STATIC_KEY_FALSE(page_reporting_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) int page_reporting_register(struct page_reporting_dev_info *prdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) mutex_lock(&page_reporting_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) /* nothing to do if already in use */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) if (rcu_access_pointer(pr_dev_info)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) err = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) goto err_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) /* initialize state and work structures */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) atomic_set(&prdev->state, PAGE_REPORTING_IDLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) INIT_DELAYED_WORK(&prdev->work, &page_reporting_process);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) /* Begin initial flush of zones */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) __page_reporting_request(prdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) /* Assign device to allow notifications */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) rcu_assign_pointer(pr_dev_info, prdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) /* enable page reporting notification */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) if (!static_key_enabled(&page_reporting_enabled)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) static_branch_enable(&page_reporting_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) pr_info("Free page reporting enabled\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) err_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) mutex_unlock(&page_reporting_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) EXPORT_SYMBOL_GPL(page_reporting_register);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) void page_reporting_unregister(struct page_reporting_dev_info *prdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) mutex_lock(&page_reporting_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) if (rcu_access_pointer(pr_dev_info) == prdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) /* Disable page reporting notification */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) RCU_INIT_POINTER(pr_dev_info, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) synchronize_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) /* Flush any existing work, and lock it out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) cancel_delayed_work_sync(&prdev->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) mutex_unlock(&page_reporting_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) EXPORT_SYMBOL_GPL(page_reporting_unregister);