^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Virtio-mem device driver.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright Red Hat, Inc. 2020
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Author(s): David Hildenbrand <david@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/virtio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/virtio_mem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/workqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/memory_hotplug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/memory.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/hrtimer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/crash_dump.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/mutex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/bitmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/lockdep.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <acpi/acpi_numa.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) static bool unplug_online = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) module_param(unplug_online, bool, 0644);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) MODULE_PARM_DESC(unplug_online, "Try to unplug online memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) enum virtio_mem_mb_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /* Unplugged, not added to Linux. Can be reused later. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) VIRTIO_MEM_MB_STATE_UNUSED = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) /* (Partially) plugged, not added to Linux. Error on add_memory(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) VIRTIO_MEM_MB_STATE_PLUGGED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) /* Fully plugged, fully added to Linux, offline. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) VIRTIO_MEM_MB_STATE_OFFLINE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) /* Partially plugged, fully added to Linux, offline. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) /* Fully plugged, fully added to Linux, online. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) VIRTIO_MEM_MB_STATE_ONLINE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) /* Partially plugged, fully added to Linux, online. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) VIRTIO_MEM_MB_STATE_COUNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) struct virtio_mem {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) struct virtio_device *vdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /* We might first have to unplug all memory when starting up. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) bool unplug_all_required;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) /* Workqueue that processes the plug/unplug requests. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) struct work_struct wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) atomic_t config_changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) /* Virtqueue for guest->host requests. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) struct virtqueue *vq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) /* Wait for a host response to a guest request. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) wait_queue_head_t host_resp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) /* Space for one guest request and the host response. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) struct virtio_mem_req req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct virtio_mem_resp resp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) /* The current size of the device. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) uint64_t plugged_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) /* The requested size of the device. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) uint64_t requested_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) /* The device block size (for communicating with the device). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) uint64_t device_block_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /* The translated node id. NUMA_NO_NODE in case not specified. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) /* Physical start address of the memory region. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) uint64_t addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) /* Maximum region size in bytes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) uint64_t region_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) /* The subblock size. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) uint64_t subblock_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) /* The number of subblocks per memory block. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) uint32_t nb_sb_per_mb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) /* Id of the first memory block of this device. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) unsigned long first_mb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) /* Id of the last memory block of this device. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) unsigned long last_mb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) /* Id of the last usable memory block of this device. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) unsigned long last_usable_mb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) /* Id of the next memory bock to prepare when needed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) unsigned long next_mb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) /* The parent resource for all memory added via this device. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) struct resource *parent_resource;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * Copy of "System RAM (virtio_mem)" to be used for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * add_memory_driver_managed().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) const char *resource_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) /* Summary of all memory block states. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) unsigned long nb_mb_state[VIRTIO_MEM_MB_STATE_COUNT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) #define VIRTIO_MEM_NB_OFFLINE_THRESHOLD 10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * One byte state per memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * Allocated via vmalloc(). When preparing new blocks, resized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * (alloc+copy+free) when needed (crossing pages with the next mb).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * (when crossing pages).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * With 128MB memory blocks, we have states for 512GB of memory in one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) uint8_t *mb_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * $nb_sb_per_mb bit per memory block. Handled similar to mb_state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * With 4MB subblocks, we manage 128GB of memory in one page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) unsigned long *sb_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * Mutex that protects the nb_mb_state, mb_state, and sb_bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * When this lock is held the pointers can't change, ONLINE and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * OFFLINE blocks can't change the state and no subblocks will get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * plugged/unplugged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) struct mutex hotplug_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) bool hotplug_active;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) /* An error occurred we cannot handle - stop processing requests. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) bool broken;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) /* The driver is being removed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) spinlock_t removal_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) bool removing;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) /* Timer for retrying to plug/unplug memory. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) struct hrtimer retry_timer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) unsigned int retry_timer_ms;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) #define VIRTIO_MEM_RETRY_TIMER_MIN_MS 50000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) #define VIRTIO_MEM_RETRY_TIMER_MAX_MS 300000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) /* Memory notifier (online/offline events). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) struct notifier_block memory_notifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) /* Next device in the list of virtio-mem devices. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) struct list_head next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * We have to share a single online_page callback among all virtio-mem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * devices. We use RCU to iterate the list in the callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) static DEFINE_MUTEX(virtio_mem_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) static LIST_HEAD(virtio_mem_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) static void virtio_mem_online_page_cb(struct page *page, unsigned int order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * Register a virtio-mem device so it will be considered for the online_page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) static int register_virtio_mem_device(struct virtio_mem *vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) int rc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) /* First device registers the callback. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) mutex_lock(&virtio_mem_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) if (list_empty(&virtio_mem_devices))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) rc = set_online_page_callback(&virtio_mem_online_page_cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) if (!rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) list_add_rcu(&vm->next, &virtio_mem_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) mutex_unlock(&virtio_mem_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * Unregister a virtio-mem device so it will no longer be considered for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * online_page callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) static void unregister_virtio_mem_device(struct virtio_mem *vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) /* Last device unregisters the callback. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) mutex_lock(&virtio_mem_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) list_del_rcu(&vm->next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) if (list_empty(&virtio_mem_devices))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) restore_online_page_callback(&virtio_mem_online_page_cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) mutex_unlock(&virtio_mem_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) synchronize_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * Calculate the memory block id of a given address.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) static unsigned long virtio_mem_phys_to_mb_id(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) return addr / memory_block_size_bytes();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * Calculate the physical start address of a given memory block id.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) static unsigned long virtio_mem_mb_id_to_phys(unsigned long mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) return mb_id * memory_block_size_bytes();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * Calculate the subblock id of a given address.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) const unsigned long mb_addr = virtio_mem_mb_id_to_phys(mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) return (addr - mb_addr) / vm->subblock_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * Set the state of a memory block, taking care of the state counter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) static void virtio_mem_mb_set_state(struct virtio_mem *vm, unsigned long mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) enum virtio_mem_mb_state state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) const unsigned long idx = mb_id - vm->first_mb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) enum virtio_mem_mb_state old_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) old_state = vm->mb_state[idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) vm->mb_state[idx] = state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) BUG_ON(vm->nb_mb_state[old_state] == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) vm->nb_mb_state[old_state]--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) vm->nb_mb_state[state]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * Get the state of a memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) static enum virtio_mem_mb_state virtio_mem_mb_get_state(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) unsigned long mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) const unsigned long idx = mb_id - vm->first_mb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) return vm->mb_state[idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * Prepare the state array for the next memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) static int virtio_mem_mb_state_prepare_next_mb(struct virtio_mem *vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) unsigned long old_bytes = vm->next_mb_id - vm->first_mb_id + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) unsigned long new_bytes = vm->next_mb_id - vm->first_mb_id + 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) int old_pages = PFN_UP(old_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) int new_pages = PFN_UP(new_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) uint8_t *new_mb_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) if (vm->mb_state && old_pages == new_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) new_mb_state = vzalloc(new_pages * PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) if (!new_mb_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) mutex_lock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) if (vm->mb_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) memcpy(new_mb_state, vm->mb_state, old_pages * PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) vfree(vm->mb_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) vm->mb_state = new_mb_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) #define virtio_mem_for_each_mb_state(_vm, _mb_id, _state) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) for (_mb_id = _vm->first_mb_id; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) _mb_id < _vm->next_mb_id && _vm->nb_mb_state[_state]; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) _mb_id++) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) if (virtio_mem_mb_get_state(_vm, _mb_id) == _state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) #define virtio_mem_for_each_mb_state_rev(_vm, _mb_id, _state) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) for (_mb_id = _vm->next_mb_id - 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) _mb_id >= _vm->first_mb_id && _vm->nb_mb_state[_state]; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) _mb_id--) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) if (virtio_mem_mb_get_state(_vm, _mb_id) == _state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * Mark all selected subblocks plugged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * Will not modify the state of the memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) static void virtio_mem_mb_set_sb_plugged(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) unsigned long mb_id, int sb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) __bitmap_set(vm->sb_bitmap, bit, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) * Mark all selected subblocks unplugged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) * Will not modify the state of the memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) static void virtio_mem_mb_set_sb_unplugged(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) unsigned long mb_id, int sb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) __bitmap_clear(vm->sb_bitmap, bit, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * Test if all selected subblocks are plugged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) static bool virtio_mem_mb_test_sb_plugged(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) unsigned long mb_id, int sb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) if (count == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) return test_bit(bit, vm->sb_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) /* TODO: Helper similar to bitmap_set() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) return find_next_zero_bit(vm->sb_bitmap, bit + count, bit) >=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) bit + count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) * Test if all selected subblocks are unplugged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) static bool virtio_mem_mb_test_sb_unplugged(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) unsigned long mb_id, int sb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) /* TODO: Helper similar to bitmap_set() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) return find_next_bit(vm->sb_bitmap, bit + count, bit) >= bit + count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) * Find the first unplugged subblock. Returns vm->nb_sb_per_mb in case there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) * none.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) static int virtio_mem_mb_first_unplugged_sb(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) unsigned long mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) return find_next_zero_bit(vm->sb_bitmap, bit + vm->nb_sb_per_mb, bit) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) * Prepare the subblock bitmap for the next memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) const unsigned long old_nb_mb = vm->next_mb_id - vm->first_mb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) const unsigned long old_nb_bits = old_nb_mb * vm->nb_sb_per_mb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->nb_sb_per_mb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) int new_pages = PFN_UP(BITS_TO_LONGS(new_nb_bits) * sizeof(long));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) unsigned long *new_sb_bitmap, *old_sb_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) if (vm->sb_bitmap && old_pages == new_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) new_sb_bitmap = vzalloc(new_pages * PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) if (!new_sb_bitmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) mutex_lock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) if (new_sb_bitmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) memcpy(new_sb_bitmap, vm->sb_bitmap, old_pages * PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) old_sb_bitmap = vm->sb_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) vm->sb_bitmap = new_sb_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) vfree(old_sb_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) * Try to add a memory block to Linux. This will usually only fail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) * if out of memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * Must not be called with the vm->hotplug_mutex held (possible deadlock with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) * onlining code).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) * Will not modify the state of the memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) int nid = vm->nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) if (nid == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) nid = memory_add_physaddr_to_nid(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * When force-unloading the driver and we still have memory added to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) * Linux, the resource name has to stay.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) if (!vm->resource_name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) vm->resource_name = kstrdup_const("System RAM (virtio_mem)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) if (!vm->resource_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) return add_memory_driver_managed(nid, addr, memory_block_size_bytes(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) vm->resource_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) MEMHP_MERGE_RESOURCE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) * Try to remove a memory block from Linux. Will only fail if the memory block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) * is not offline.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) * Must not be called with the vm->hotplug_mutex held (possible deadlock with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) * onlining code).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * Will not modify the state of the memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) int nid = vm->nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) if (nid == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) nid = memory_add_physaddr_to_nid(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) dev_dbg(&vm->vdev->dev, "removing memory block: %lu\n", mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) return remove_memory(nid, addr, memory_block_size_bytes());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * Try to offline and remove a memory block from Linux.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * Must not be called with the vm->hotplug_mutex held (possible deadlock with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * onlining code).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) * Will not modify the state of the memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) unsigned long mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) int nid = vm->nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) if (nid == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) nid = memory_add_physaddr_to_nid(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) dev_dbg(&vm->vdev->dev, "offlining and removing memory block: %lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) return offline_and_remove_memory(nid, addr, memory_block_size_bytes());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * Trigger the workqueue so the device can perform its magic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) static void virtio_mem_retry(struct virtio_mem *vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) spin_lock_irqsave(&vm->removal_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) if (!vm->removing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) queue_work(system_freezable_wq, &vm->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) spin_unlock_irqrestore(&vm->removal_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) static int virtio_mem_translate_node_id(struct virtio_mem *vm, uint16_t node_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) int node = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) #if defined(CONFIG_ACPI_NUMA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) if (virtio_has_feature(vm->vdev, VIRTIO_MEM_F_ACPI_PXM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) node = pxm_to_node(node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) return node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) * Test if a virtio-mem device overlaps with the given range. Can be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) * from (notifier) callbacks lockless.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) static bool virtio_mem_overlaps_range(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) unsigned long start, unsigned long size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) unsigned long dev_start = virtio_mem_mb_id_to_phys(vm->first_mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) unsigned long dev_end = virtio_mem_mb_id_to_phys(vm->last_mb_id) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) memory_block_size_bytes();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) return start < dev_end && dev_start < start + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * Test if a virtio-mem device owns a memory block. Can be called from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) * (notifier) callbacks lockless.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) static bool virtio_mem_owned_mb(struct virtio_mem *vm, unsigned long mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) return mb_id >= vm->first_mb_id && mb_id <= vm->last_mb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) static int virtio_mem_notify_going_online(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) unsigned long mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) switch (virtio_mem_mb_get_state(vm, mb_id)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) case VIRTIO_MEM_MB_STATE_OFFLINE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) return NOTIFY_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) dev_warn_ratelimited(&vm->vdev->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) "memory block onlining denied\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) return NOTIFY_BAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) static void virtio_mem_notify_offline(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) unsigned long mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) switch (virtio_mem_mb_get_state(vm, mb_id)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) case VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) virtio_mem_mb_set_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) case VIRTIO_MEM_MB_STATE_ONLINE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) virtio_mem_mb_set_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) VIRTIO_MEM_MB_STATE_OFFLINE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) * Trigger the workqueue, maybe we can now unplug memory. Also,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) * when we offline and remove a memory block, this will re-trigger
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) * us immediately - which is often nice because the removal of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) * the memory block (e.g., memmap) might have freed up memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) * on other memory blocks we manage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) virtio_mem_retry(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) unsigned long nb_offline;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) switch (virtio_mem_mb_get_state(vm, mb_id)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) virtio_mem_mb_set_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) case VIRTIO_MEM_MB_STATE_OFFLINE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_ONLINE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) /* see if we can add new blocks now that we onlined one block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) if (nb_offline == VIRTIO_MEM_NB_OFFLINE_THRESHOLD - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) virtio_mem_retry(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) static void virtio_mem_notify_going_offline(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) unsigned long mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) const unsigned long nr_pages = PFN_DOWN(vm->subblock_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) unsigned long pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) int sb_id, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) * Drop our reference to the pages so the memory can get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) * offlined and add the unplugged pages to the managed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) * page counters (so offlining code can correctly subtract
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) * them again).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) sb_id * vm->subblock_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) adjust_managed_page_count(pfn_to_page(pfn), nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) page = pfn_to_page(pfn + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) if (WARN_ON(!page_ref_dec_and_test(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) dump_page(page, "unplugged page referenced");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) static void virtio_mem_notify_cancel_offline(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) unsigned long mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) const unsigned long nr_pages = PFN_DOWN(vm->subblock_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) unsigned long pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) int sb_id, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) * Get the reference we dropped when going offline and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) * subtract the unplugged pages from the managed page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) * counters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) sb_id * vm->subblock_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) adjust_managed_page_count(pfn_to_page(pfn), -nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) for (i = 0; i < nr_pages; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) page_ref_inc(pfn_to_page(pfn + i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) * This callback will either be called synchronously from add_memory() or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) * asynchronously (e.g., triggered via user space). We have to be careful
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) * with locking when calling add_memory().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) unsigned long action, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) struct virtio_mem *vm = container_of(nb, struct virtio_mem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) memory_notifier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) struct memory_notify *mhp = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) const unsigned long start = PFN_PHYS(mhp->start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) const unsigned long size = PFN_PHYS(mhp->nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) const unsigned long mb_id = virtio_mem_phys_to_mb_id(start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) int rc = NOTIFY_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) if (!virtio_mem_overlaps_range(vm, start, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) return NOTIFY_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) * Memory is onlined/offlined in memory block granularity. We cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) * cross virtio-mem device boundaries and memory block boundaries. Bail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) * out if this ever changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) if (WARN_ON_ONCE(size != memory_block_size_bytes() ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) !IS_ALIGNED(start, memory_block_size_bytes())))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) return NOTIFY_BAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) * Avoid circular locking lockdep warnings. We lock the mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) * e.g., in MEM_GOING_ONLINE and unlock it in MEM_ONLINE. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) * blocking_notifier_call_chain() has it's own lock, which gets unlocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) * between both notifier calls and will bail out. False positive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) lockdep_off();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) switch (action) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) case MEM_GOING_OFFLINE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) mutex_lock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) if (vm->removing) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) rc = notifier_from_errno(-EBUSY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) vm->hotplug_active = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) virtio_mem_notify_going_offline(vm, mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) case MEM_GOING_ONLINE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) mutex_lock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) if (vm->removing) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) rc = notifier_from_errno(-EBUSY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) vm->hotplug_active = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) rc = virtio_mem_notify_going_online(vm, mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) case MEM_OFFLINE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) virtio_mem_notify_offline(vm, mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) vm->hotplug_active = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) case MEM_ONLINE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) virtio_mem_notify_online(vm, mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) vm->hotplug_active = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) case MEM_CANCEL_OFFLINE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) if (!vm->hotplug_active)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) virtio_mem_notify_cancel_offline(vm, mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) vm->hotplug_active = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) case MEM_CANCEL_ONLINE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) if (!vm->hotplug_active)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) vm->hotplug_active = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) lockdep_on();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) * Set a range of pages PG_offline. Remember pages that were never onlined
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) * (via generic_online_page()) using PageDirty().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) static void virtio_mem_set_fake_offline(unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) unsigned int nr_pages, bool onlined)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) for (; nr_pages--; pfn++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) struct page *page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) __SetPageOffline(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) if (!onlined) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) SetPageDirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) /* FIXME: remove after cleanups */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) ClearPageReserved(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) * Clear PG_offline from a range of pages. If the pages were never onlined,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) * (via generic_online_page()), clear PageDirty().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) static void virtio_mem_clear_fake_offline(unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) unsigned int nr_pages, bool onlined)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) for (; nr_pages--; pfn++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) struct page *page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) __ClearPageOffline(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) if (!onlined)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) ClearPageDirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) * Release a range of fake-offline pages to the buddy, effectively
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) * fake-onlining them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) const int order = MAX_ORDER - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) * We are always called with subblock granularity, which is at least
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) * aligned to MAX_ORDER - 1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) for (i = 0; i < nr_pages; i += 1 << order) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) struct page *page = pfn_to_page(pfn + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) * If the page is PageDirty(), it was kept fake-offline when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) * onlining the memory block. Otherwise, it was allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) * using alloc_contig_range(). All pages in a subblock are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) * alike.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) if (PageDirty(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) virtio_mem_clear_fake_offline(pfn + i, 1 << order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) generic_online_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) virtio_mem_clear_fake_offline(pfn + i, 1 << order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) free_contig_range(pfn + i, 1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) adjust_managed_page_count(page, 1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) const unsigned long addr = page_to_phys(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) struct virtio_mem *vm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) int sb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) * We exploit here that subblocks have at least MAX_ORDER - 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) * size/alignment and that this callback is is called with such a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) * size/alignment. So we cannot cross subblocks and therefore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) * also not memory blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) list_for_each_entry_rcu(vm, &virtio_mem_devices, next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) if (!virtio_mem_owned_mb(vm, mb_id))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) sb_id = virtio_mem_phys_to_sb_id(vm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) * If plugged, online the pages, otherwise, set them fake
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) * offline (PageOffline).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) generic_online_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) /* not virtio-mem memory, but e.g., a DIMM. online it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) generic_online_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) static uint64_t virtio_mem_send_request(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) const struct virtio_mem_req *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) struct scatterlist *sgs[2], sg_req, sg_resp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) unsigned int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) /* don't use the request residing on the stack (vaddr) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) vm->req = *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) /* out: buffer for request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) sg_init_one(&sg_req, &vm->req, sizeof(vm->req));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) sgs[0] = &sg_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) /* in: buffer for response */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) sg_init_one(&sg_resp, &vm->resp, sizeof(vm->resp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) sgs[1] = &sg_resp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) rc = virtqueue_add_sgs(vm->vq, sgs, 1, 1, vm, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) if (rc < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) virtqueue_kick(vm->vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) /* wait for a response */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) wait_event(vm->host_resp, virtqueue_get_buf(vm->vq, &len));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) return virtio16_to_cpu(vm->vdev, vm->resp.type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) static int virtio_mem_send_plug_request(struct virtio_mem *vm, uint64_t addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) uint64_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) const uint64_t nb_vm_blocks = size / vm->device_block_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) const struct virtio_mem_req req = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_PLUG),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) .u.plug.addr = cpu_to_virtio64(vm->vdev, addr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) .u.plug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) if (atomic_read(&vm->config_changed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) switch (virtio_mem_send_request(vm, &req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) case VIRTIO_MEM_RESP_ACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) vm->plugged_size += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) case VIRTIO_MEM_RESP_NACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) case VIRTIO_MEM_RESP_BUSY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) return -ETXTBSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) case VIRTIO_MEM_RESP_ERROR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) static int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) uint64_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) const uint64_t nb_vm_blocks = size / vm->device_block_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) const struct virtio_mem_req req = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) .u.unplug.addr = cpu_to_virtio64(vm->vdev, addr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) .u.unplug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) if (atomic_read(&vm->config_changed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) switch (virtio_mem_send_request(vm, &req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) case VIRTIO_MEM_RESP_ACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) vm->plugged_size -= size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) case VIRTIO_MEM_RESP_BUSY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) return -ETXTBSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) case VIRTIO_MEM_RESP_ERROR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) const struct virtio_mem_req req = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG_ALL),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) switch (virtio_mem_send_request(vm, &req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) case VIRTIO_MEM_RESP_ACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) vm->unplug_all_required = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) vm->plugged_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) /* usable region might have shrunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) atomic_set(&vm->config_changed, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) case VIRTIO_MEM_RESP_BUSY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) return -ETXTBSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) * Plug selected subblocks. Updates the plugged state, but not the state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) * of the memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) int sb_id, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) sb_id * vm->subblock_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) const uint64_t size = count * vm->subblock_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) dev_dbg(&vm->vdev->dev, "plugging memory block: %lu : %i - %i\n", mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) sb_id, sb_id + count - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) rc = virtio_mem_send_plug_request(vm, addr, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) if (!rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) virtio_mem_mb_set_sb_plugged(vm, mb_id, sb_id, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) * Unplug selected subblocks. Updates the plugged state, but not the state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) * of the memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) int sb_id, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) sb_id * vm->subblock_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) const uint64_t size = count * vm->subblock_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) dev_dbg(&vm->vdev->dev, "unplugging memory block: %lu : %i - %i\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) mb_id, sb_id, sb_id + count - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) rc = virtio_mem_send_unplug_request(vm, addr, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) if (!rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) virtio_mem_mb_set_sb_unplugged(vm, mb_id, sb_id, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) * Unplug the desired number of plugged subblocks of a offline or not-added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) * memory block. Will fail if any subblock cannot get unplugged (instead of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) * skipping it).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) * Will not modify the state of the memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) * Note: can fail after some subblocks were unplugged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) unsigned long mb_id, uint64_t *nb_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) int sb_id, count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) sb_id = vm->nb_sb_per_mb - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) while (*nb_sb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) /* Find the next candidate subblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) while (sb_id >= 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) virtio_mem_mb_test_sb_unplugged(vm, mb_id, sb_id, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) sb_id--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) if (sb_id < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) /* Try to unplug multiple subblocks at a time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) while (count < *nb_sb && sb_id > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) sb_id--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) *nb_sb -= count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) sb_id--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) * Unplug all plugged subblocks of an offline or not-added memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) * Will not modify the state of the memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) * Note: can fail after some subblocks were unplugged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) static int virtio_mem_mb_unplug(struct virtio_mem *vm, unsigned long mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) uint64_t nb_sb = vm->nb_sb_per_mb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) return virtio_mem_mb_unplug_any_sb(vm, mb_id, &nb_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) * Prepare tracking data for the next memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) static int virtio_mem_prepare_next_mb(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) unsigned long *mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) if (vm->next_mb_id > vm->last_usable_mb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) /* Resize the state array if required. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) rc = virtio_mem_mb_state_prepare_next_mb(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) /* Resize the subblock bitmap if required. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) rc = virtio_mem_sb_bitmap_prepare_next_mb(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) vm->nb_mb_state[VIRTIO_MEM_MB_STATE_UNUSED]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) *mb_id = vm->next_mb_id++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) * Don't add too many blocks that are not onlined yet to avoid running OOM.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) static bool virtio_mem_too_many_mb_offline(struct virtio_mem *vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) unsigned long nb_offline;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) return nb_offline >= VIRTIO_MEM_NB_OFFLINE_THRESHOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) * Try to plug the desired number of subblocks and add the memory block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) * to Linux.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) * Will modify the state of the memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) unsigned long mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) uint64_t *nb_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) const int count = min_t(int, *nb_sb, vm->nb_sb_per_mb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) int rc, rc2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) if (WARN_ON_ONCE(!count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) * Plug the requested number of subblocks before adding it to linux,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) * so that onlining will directly online all plugged subblocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) rc = virtio_mem_mb_plug_sb(vm, mb_id, 0, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) * Mark the block properly offline before adding it to Linux,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) * so the memory notifiers will find the block in the right state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) if (count == vm->nb_sb_per_mb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) virtio_mem_mb_set_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) VIRTIO_MEM_MB_STATE_OFFLINE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) virtio_mem_mb_set_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) /* Add the memory block to linux - if that fails, try to unplug. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) rc = virtio_mem_mb_add(vm, mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) if (rc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) enum virtio_mem_mb_state new_state = VIRTIO_MEM_MB_STATE_UNUSED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) dev_err(&vm->vdev->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) "adding memory block %lu failed with %d\n", mb_id, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) rc2 = virtio_mem_mb_unplug_sb(vm, mb_id, 0, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) * TODO: Linux MM does not properly clean up yet in all cases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) * where adding of memory failed - especially on -ENOMEM.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) if (rc2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) new_state = VIRTIO_MEM_MB_STATE_PLUGGED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) virtio_mem_mb_set_state(vm, mb_id, new_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) *nb_sb -= count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) * Try to plug the desired number of subblocks of a memory block that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) * is already added to Linux.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) * Will modify the state of the memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) * Note: Can fail after some subblocks were successfully plugged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) uint64_t *nb_sb, bool online)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) unsigned long pfn, nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) int sb_id, count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) if (WARN_ON_ONCE(!*nb_sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) while (*nb_sb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) sb_id = virtio_mem_mb_first_unplugged_sb(vm, mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) if (sb_id >= vm->nb_sb_per_mb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) while (count < *nb_sb &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) sb_id + count < vm->nb_sb_per_mb &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id + count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) rc = virtio_mem_mb_plug_sb(vm, mb_id, sb_id, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) *nb_sb -= count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) if (!online)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) /* fake-online the pages if the memory block is online */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) sb_id * vm->subblock_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) nr_pages = PFN_DOWN(count * vm->subblock_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) virtio_mem_fake_online(pfn, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) if (virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) if (online)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) virtio_mem_mb_set_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) VIRTIO_MEM_MB_STATE_ONLINE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) virtio_mem_mb_set_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) VIRTIO_MEM_MB_STATE_OFFLINE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) * Try to plug the requested amount of memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) uint64_t nb_sb = diff / vm->subblock_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) unsigned long mb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) if (!nb_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) /* Don't race with onlining/offlining */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) mutex_lock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) /* Try to plug subblocks of partially plugged online blocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) virtio_mem_for_each_mb_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) if (rc || !nb_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) /* Try to plug subblocks of partially plugged offline blocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) virtio_mem_for_each_mb_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) if (rc || !nb_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) * We won't be working on online/offline memory blocks from this point,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) * so we can't race with memory onlining/offlining. Drop the mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) /* Try to plug and add unused blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) if (virtio_mem_too_many_mb_offline(vm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) if (rc || !nb_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) /* Try to prepare, plug and add new blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) while (nb_sb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) if (virtio_mem_too_many_mb_offline(vm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) rc = virtio_mem_prepare_next_mb(vm, &mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) * Unplug the desired number of plugged subblocks of an offline memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) * Will fail if any subblock cannot get unplugged (instead of skipping it).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) * Will modify the state of the memory block. Might temporarily drop the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) * hotplug_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) * Note: Can fail after some subblocks were successfully unplugged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) unsigned long mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) uint64_t *nb_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) rc = virtio_mem_mb_unplug_any_sb(vm, mb_id, nb_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) /* some subblocks might have been unplugged even on failure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) if (!virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) virtio_mem_mb_set_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) if (virtio_mem_mb_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) * Remove the block from Linux - this should never fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) * Hinder the block from getting onlined by marking it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) * unplugged. Temporarily drop the mutex, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) * any pending GOING_ONLINE requests can be serviced/rejected.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) virtio_mem_mb_set_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) VIRTIO_MEM_MB_STATE_UNUSED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) rc = virtio_mem_mb_remove(vm, mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) BUG_ON(rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) mutex_lock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) * Unplug the given plugged subblocks of an online memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) * Will modify the state of the memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) unsigned long mb_id, int sb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) const unsigned long nr_pages = PFN_DOWN(vm->subblock_size) * count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) unsigned long start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) struct acr_info dummy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) sb_id * vm->subblock_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) rc = alloc_contig_range(start_pfn, start_pfn + nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) MIGRATE_MOVABLE, GFP_KERNEL, &dummy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) if (rc == -ENOMEM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) /* whoops, out of memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) /* Mark it as fake-offline before unplugging it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) virtio_mem_set_fake_offline(start_pfn, nr_pages, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) /* Try to unplug the allocated memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) if (rc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) /* Return the memory to the buddy. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) virtio_mem_fake_online(start_pfn, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) virtio_mem_mb_set_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) * Unplug the desired number of plugged subblocks of an online memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) * Will skip subblock that are busy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) * Will modify the state of the memory block. Might temporarily drop the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) * hotplug_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) * Note: Can fail after some subblocks were successfully unplugged. Can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) * return 0 even if subblocks were busy and could not get unplugged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) unsigned long mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) uint64_t *nb_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) int rc, sb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) /* If possible, try to unplug the complete block in one shot. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) if (*nb_sb >= vm->nb_sb_per_mb &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) vm->nb_sb_per_mb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) if (!rc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) *nb_sb -= vm->nb_sb_per_mb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) goto unplugged;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) } else if (rc != -EBUSY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) /* Fallback to single subblocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) for (sb_id = vm->nb_sb_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) /* Find the next candidate subblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) while (sb_id >= 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) sb_id--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) if (sb_id < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, sb_id, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) if (rc == -EBUSY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) else if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) *nb_sb -= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) unplugged:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) * Once all subblocks of a memory block were unplugged, offline and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) * remove it. This will usually not fail, as no memory is in use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) * anymore - however some other notifiers might NACK the request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) if (virtio_mem_mb_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) rc = virtio_mem_mb_offline_and_remove(vm, mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) mutex_lock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) if (!rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) virtio_mem_mb_set_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) VIRTIO_MEM_MB_STATE_UNUSED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) * Try to unplug the requested amount of memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) uint64_t nb_sb = diff / vm->subblock_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) unsigned long mb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) if (!nb_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) * We'll drop the mutex a couple of times when it is safe to do so.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) * This might result in some blocks switching the state (online/offline)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) * and we could miss them in this run - we will retry again later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) mutex_lock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) /* Try to unplug subblocks of partially plugged offline blocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) virtio_mem_for_each_mb_state_rev(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) &nb_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) if (rc || !nb_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) /* Try to unplug subblocks of plugged offline blocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) virtio_mem_for_each_mb_state_rev(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) VIRTIO_MEM_MB_STATE_OFFLINE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) &nb_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) if (rc || !nb_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) if (!unplug_online) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) /* Try to unplug subblocks of partially plugged online blocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) virtio_mem_for_each_mb_state_rev(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) &nb_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) if (rc || !nb_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) mutex_lock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) /* Try to unplug subblocks of plugged online blocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) virtio_mem_for_each_mb_state_rev(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) VIRTIO_MEM_MB_STATE_ONLINE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) &nb_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) if (rc || !nb_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) mutex_lock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) return nb_sb ? -EBUSY : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) * Try to unplug all blocks that couldn't be unplugged before, for example,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) * because the hypervisor was busy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) unsigned long mb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_PLUGGED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) rc = virtio_mem_mb_unplug(vm, mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) * Update all parts of the config that could have changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) static void virtio_mem_refresh_config(struct virtio_mem *vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) uint64_t new_plugged_size, usable_region_size, end_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) /* the plugged_size is just a reflection of what _we_ did previously */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) &new_plugged_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) if (WARN_ON_ONCE(new_plugged_size != vm->plugged_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) vm->plugged_size = new_plugged_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) /* calculate the last usable memory block id */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) virtio_cread_le(vm->vdev, struct virtio_mem_config,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) usable_region_size, &usable_region_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) end_addr = vm->addr + usable_region_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) end_addr = min(end_addr, phys_limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) vm->last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) /* see if there is a request to change the size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) &vm->requested_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) dev_info(&vm->vdev->dev, "plugged size: 0x%llx", vm->plugged_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) dev_info(&vm->vdev->dev, "requested size: 0x%llx", vm->requested_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) * Workqueue function for handling plug/unplug requests and config updates.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) static void virtio_mem_run_wq(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) struct virtio_mem *vm = container_of(work, struct virtio_mem, wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) uint64_t diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) hrtimer_cancel(&vm->retry_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) if (vm->broken)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) rc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) /* Make sure we start with a clean state if there are leftovers. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) if (unlikely(vm->unplug_all_required))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) rc = virtio_mem_send_unplug_all_request(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) if (atomic_read(&vm->config_changed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) atomic_set(&vm->config_changed, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) virtio_mem_refresh_config(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) /* Unplug any leftovers from previous runs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) if (!rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) rc = virtio_mem_unplug_pending_mb(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) if (!rc && vm->requested_size != vm->plugged_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) if (vm->requested_size > vm->plugged_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) diff = vm->requested_size - vm->plugged_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) rc = virtio_mem_plug_request(vm, diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) diff = vm->plugged_size - vm->requested_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) rc = virtio_mem_unplug_request(vm, diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) switch (rc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) case 0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) case -ENOSPC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) * We cannot add any more memory (alignment, physical limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) * or we have too many offline memory blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) case -ETXTBSY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) * The hypervisor cannot process our request right now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) * (e.g., out of memory, migrating);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) case -EBUSY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) * We cannot free up any memory to unplug it (all plugged memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) * is busy).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) case -ENOMEM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) /* Out of memory, try again later. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) hrtimer_start(&vm->retry_timer, ms_to_ktime(vm->retry_timer_ms),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) HRTIMER_MODE_REL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) case -EAGAIN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) /* Retry immediately (e.g., the config changed). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) /* Unknown error, mark as broken */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) dev_err(&vm->vdev->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) "unknown error, marking device broken: %d\n", rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) vm->broken = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) static enum hrtimer_restart virtio_mem_timer_expired(struct hrtimer *timer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) struct virtio_mem *vm = container_of(timer, struct virtio_mem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) retry_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) virtio_mem_retry(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) vm->retry_timer_ms = min_t(unsigned int, vm->retry_timer_ms * 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) VIRTIO_MEM_RETRY_TIMER_MAX_MS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) return HRTIMER_NORESTART;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) static void virtio_mem_handle_response(struct virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) struct virtio_mem *vm = vq->vdev->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) wake_up(&vm->host_resp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) static int virtio_mem_init_vq(struct virtio_mem *vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) struct virtqueue *vq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) vq = virtio_find_single_vq(vm->vdev, virtio_mem_handle_response,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) "guest-request");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) if (IS_ERR(vq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) return PTR_ERR(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) vm->vq = vq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) static int virtio_mem_init(struct virtio_mem *vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) uint16_t node_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) if (!vm->vdev->config->get) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) dev_err(&vm->vdev->dev, "config access disabled\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) * We don't want to (un)plug or reuse any memory when in kdump. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) * memory is still accessible (but not mapped).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) if (is_kdump_kernel()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) dev_warn(&vm->vdev->dev, "disabled in kdump kernel\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) /* Fetch all properties that can't change. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) &vm->plugged_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) &vm->device_block_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) &node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) vm->nid = virtio_mem_translate_node_id(vm, node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) &vm->region_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) * We always hotplug memory in memory block granularity. This way,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) * we have to wait for exactly one memory block to online.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) if (vm->device_block_size > memory_block_size_bytes()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) dev_err(&vm->vdev->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) "The block size is not supported (too big).\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) /* bad device setup - warn only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) if (!IS_ALIGNED(vm->addr, memory_block_size_bytes()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) dev_warn(&vm->vdev->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) "The alignment of the physical start address can make some memory unusable.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) if (!IS_ALIGNED(vm->addr + vm->region_size, memory_block_size_bytes()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) dev_warn(&vm->vdev->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) "The alignment of the physical end address can make some memory unusable.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) if (vm->addr + vm->region_size > phys_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) dev_warn(&vm->vdev->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) "Some memory is not addressable. This can make some memory unusable.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) * Calculate the subblock size:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) * - At least MAX_ORDER - 1 / pageblock_order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) * - At least the device block size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) * In the worst case, a single subblock per memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) vm->subblock_size = PAGE_SIZE * 1ul << max_t(uint32_t, MAX_ORDER - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) pageblock_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) vm->subblock_size = max_t(uint64_t, vm->device_block_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) vm->subblock_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) vm->nb_sb_per_mb = memory_block_size_bytes() / vm->subblock_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) /* Round up to the next full memory block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) vm->first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) memory_block_size_bytes());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) vm->next_mb_id = vm->first_mb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) vm->last_mb_id = virtio_mem_phys_to_mb_id(vm->addr +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) vm->region_size) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) dev_info(&vm->vdev->dev, "device block size: 0x%llx",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) (unsigned long long)vm->device_block_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) dev_info(&vm->vdev->dev, "memory block size: 0x%lx",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) memory_block_size_bytes());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) dev_info(&vm->vdev->dev, "subblock size: 0x%llx",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) (unsigned long long)vm->subblock_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) if (vm->nid != NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) dev_info(&vm->vdev->dev, "nid: %d", vm->nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) static int virtio_mem_create_resource(struct virtio_mem *vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) * When force-unloading the driver and removing the device, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) * could have a garbage pointer. Duplicate the string.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) const char *name = kstrdup(dev_name(&vm->vdev->dev), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) if (!name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) vm->parent_resource = __request_mem_region(vm->addr, vm->region_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) name, IORESOURCE_SYSTEM_RAM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) if (!vm->parent_resource) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) kfree(name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) dev_warn(&vm->vdev->dev, "could not reserve device region\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) dev_info(&vm->vdev->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) "reloading the driver is not supported\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) /* The memory is not actually busy - make add_memory() work. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) vm->parent_resource->flags &= ~IORESOURCE_BUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) static void virtio_mem_delete_resource(struct virtio_mem *vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) const char *name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) if (!vm->parent_resource)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) name = vm->parent_resource->name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) release_resource(vm->parent_resource);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) kfree(vm->parent_resource);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) kfree(name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) vm->parent_resource = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) static int virtio_mem_probe(struct virtio_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) struct virtio_mem *vm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) BUILD_BUG_ON(sizeof(struct virtio_mem_resp) != 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) vdev->priv = vm = kzalloc(sizeof(*vm), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) if (!vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) init_waitqueue_head(&vm->host_resp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) vm->vdev = vdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) INIT_WORK(&vm->wq, virtio_mem_run_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) mutex_init(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) INIT_LIST_HEAD(&vm->next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) spin_lock_init(&vm->removal_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) hrtimer_init(&vm->retry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) vm->retry_timer.function = virtio_mem_timer_expired;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) /* register the virtqueue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) rc = virtio_mem_init_vq(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) goto out_free_vm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) /* initialize the device by querying the config */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) rc = virtio_mem_init(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) goto out_del_vq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) /* create the parent resource for all memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) rc = virtio_mem_create_resource(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) goto out_del_vq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) * If we still have memory plugged, we have to unplug all memory first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) * Registering our parent resource makes sure that this memory isn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) * actually in use (e.g., trying to reload the driver).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) if (vm->plugged_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) vm->unplug_all_required = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) dev_info(&vm->vdev->dev, "unplugging all memory is required\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) /* register callbacks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) rc = register_memory_notifier(&vm->memory_notifier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) goto out_del_resource;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) rc = register_virtio_mem_device(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) goto out_unreg_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) virtio_device_ready(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) /* trigger a config update to start processing the requested_size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) atomic_set(&vm->config_changed, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) queue_work(system_freezable_wq, &vm->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) out_unreg_mem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) unregister_memory_notifier(&vm->memory_notifier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) out_del_resource:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) virtio_mem_delete_resource(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) out_del_vq:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) vdev->config->del_vqs(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) out_free_vm:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) kfree(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) vdev->priv = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) static void virtio_mem_remove(struct virtio_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) struct virtio_mem *vm = vdev->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) unsigned long mb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) * Make sure the workqueue won't be triggered anymore and no memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) * blocks can be onlined/offlined until we're finished here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) mutex_lock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) spin_lock_irq(&vm->removal_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) vm->removing = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) spin_unlock_irq(&vm->removal_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) mutex_unlock(&vm->hotplug_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) /* wait until the workqueue stopped */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) cancel_work_sync(&vm->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) hrtimer_cancel(&vm->retry_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) * After we unregistered our callbacks, user space can online partially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) * plugged offline blocks. Make sure to remove them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) virtio_mem_for_each_mb_state(vm, mb_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) rc = virtio_mem_mb_remove(vm, mb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) BUG_ON(rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) * After we unregistered our callbacks, user space can no longer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) * offline partially plugged online memory blocks. No need to worry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) * about them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) /* unregister callbacks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) unregister_virtio_mem_device(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) unregister_memory_notifier(&vm->memory_notifier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) * There is no way we could reliably remove all memory we have added to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) * the system. And there is no way to stop the driver/device from going
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) * away. Warn at least.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) if (vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL] ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE] ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) dev_warn(&vdev->dev, "device still has system memory added\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) virtio_mem_delete_resource(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) kfree_const(vm->resource_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) /* remove all tracking data - no locking needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) vfree(vm->mb_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) vfree(vm->sb_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) /* reset the device and cleanup the queues */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) vdev->config->reset(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) vdev->config->del_vqs(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) kfree(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) vdev->priv = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) static void virtio_mem_config_changed(struct virtio_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) struct virtio_mem *vm = vdev->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) atomic_set(&vm->config_changed, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) virtio_mem_retry(vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) #ifdef CONFIG_PM_SLEEP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) static int virtio_mem_freeze(struct virtio_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) * When restarting the VM, all memory is usually unplugged. Don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) * allow to suspend/hibernate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) dev_err(&vdev->dev, "save/restore not supported.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) static int virtio_mem_restore(struct virtio_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) static unsigned int virtio_mem_features[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) #if defined(CONFIG_NUMA) && defined(CONFIG_ACPI_NUMA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) VIRTIO_MEM_F_ACPI_PXM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) static const struct virtio_device_id virtio_mem_id_table[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) { VIRTIO_ID_MEM, VIRTIO_DEV_ANY_ID },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) { 0 },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) static struct virtio_driver virtio_mem_driver = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) .feature_table = virtio_mem_features,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) .feature_table_size = ARRAY_SIZE(virtio_mem_features),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) .driver.name = KBUILD_MODNAME,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) .driver.owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) .id_table = virtio_mem_id_table,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) .probe = virtio_mem_probe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) .remove = virtio_mem_remove,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) .config_changed = virtio_mem_config_changed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) #ifdef CONFIG_PM_SLEEP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) .freeze = virtio_mem_freeze,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) .restore = virtio_mem_restore,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) module_virtio_driver(virtio_mem_driver);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) MODULE_DEVICE_TABLE(virtio, virtio_mem_id_table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) MODULE_AUTHOR("David Hildenbrand <david@redhat.com>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) MODULE_DESCRIPTION("Virtio-mem driver");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) MODULE_LICENSE("GPL");