^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * dax: direct host memory access
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2020 Red Hat, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include "fuse_i.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/dax.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/pfn_t.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/iomap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/interval_tree.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * Default memory range size. A power of 2 so it agrees with common FUSE_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * map_alignment values 4KB and 64KB.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #define FUSE_DAX_SHIFT 21
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #define FUSE_DAX_SZ (1 << FUSE_DAX_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define FUSE_DAX_PAGES (FUSE_DAX_SZ / PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) /* Number of ranges reclaimer will try to free in one invocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define FUSE_DAX_RECLAIM_CHUNK (10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * Dax memory reclaim threshold in percetage of total ranges. When free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * number of free ranges drops below this threshold, reclaim can trigger
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * Default is 20%
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define FUSE_DAX_RECLAIM_THRESHOLD (20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) /** Translation information for file offsets to DAX window offsets */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) struct fuse_dax_mapping {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) /* Pointer to inode where this memory range is mapped */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) /* Will connect in fcd->free_ranges to keep track of free memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) /* For interval tree in file/inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) struct interval_tree_node itn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /* Will connect in fc->busy_ranges to keep track busy memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) struct list_head busy_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) /** Position in DAX window */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) u64 window_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) /** Length of mapping, in bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) loff_t length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /* Is this mapping read-only or read-write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) bool writable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) /* reference count when the mapping is used by dax iomap. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) refcount_t refcnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) /* Per-inode dax map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) struct fuse_inode_dax {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /* Semaphore to protect modifications to the dmap tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct rw_semaphore sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) /* Sorted rb tree of struct fuse_dax_mapping elements */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) struct rb_root_cached tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) unsigned long nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) struct fuse_conn_dax {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) /* DAX device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) struct dax_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) /* Lock protecting accessess to members of this structure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) /* List of memory ranges which are busy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) unsigned long nr_busy_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) struct list_head busy_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) /* Worker to free up memory ranges */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) struct delayed_work free_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) /* Wait queue for a dax range to become free */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) wait_queue_head_t range_waitq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) /* DAX Window Free Ranges */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) long nr_free_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) struct list_head free_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) unsigned long nr_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) static inline struct fuse_dax_mapping *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) node_to_dmap(struct interval_tree_node *node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) if (!node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) return container_of(node, struct fuse_dax_mapping, itn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) static struct fuse_dax_mapping *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) __kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) unsigned long free_threshold;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) /* If number of free ranges are below threshold, start reclaim */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) if (fcd->nr_free_ranges < free_threshold)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) queue_delayed_work(system_long_wq, &fcd->free_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) msecs_to_jiffies(delay_ms));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) static void kick_dmap_free_worker(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) unsigned long delay_ms)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) __kick_dmap_free_worker(fcd, delay_ms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) dmap = list_first_entry_or_null(&fcd->free_ranges,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) struct fuse_dax_mapping, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) if (dmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) list_del_init(&dmap->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) WARN_ON(fcd->nr_free_ranges <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) fcd->nr_free_ranges--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) kick_dmap_free_worker(fcd, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) return dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) /* This assumes fcd->lock is held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) static void __dmap_remove_busy_list(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) list_del_init(&dmap->busy_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) WARN_ON(fcd->nr_busy_ranges == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) fcd->nr_busy_ranges--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) static void dmap_remove_busy_list(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) __dmap_remove_busy_list(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) /* This assumes fcd->lock is held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) static void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) list_add_tail(&dmap->list, &fcd->free_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) fcd->nr_free_ranges++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) wake_up(&fcd->range_waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) static void dmap_add_to_free_pool(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) /* Return fuse_dax_mapping to free list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) __dmap_add_to_free_pool(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) struct fuse_dax_mapping *dmap, bool writable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) bool upgrade)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) struct fuse_mount *fm = get_fuse_mount(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) struct fuse_conn_dax *fcd = fm->fc->dax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) struct fuse_setupmapping_in inarg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) loff_t offset = start_idx << FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) FUSE_ARGS(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) ssize_t err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) WARN_ON(fcd->nr_free_ranges < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) /* Ask fuse daemon to setup mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) memset(&inarg, 0, sizeof(inarg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) inarg.foffset = offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) inarg.fh = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) inarg.moffset = dmap->window_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) inarg.len = FUSE_DAX_SZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) if (writable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) args.opcode = FUSE_SETUPMAPPING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) args.nodeid = fi->nodeid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) args.in_numargs = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) args.in_args[0].size = sizeof(inarg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) args.in_args[0].value = &inarg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) err = fuse_simple_request(fm, &args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) dmap->writable = writable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) if (!upgrade) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * We don't take a refernce on inode. inode is valid right now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * and when inode is going away, cleanup logic should first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * cleanup dmap entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) dmap->inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) dmap->itn.start = dmap->itn.last = start_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) /* Protected by fi->dax->sem */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) interval_tree_insert(&dmap->itn, &fi->dax->tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) fi->dax->nr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) list_add_tail(&dmap->busy_list, &fcd->busy_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) fcd->nr_busy_ranges++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) static int fuse_send_removemapping(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) struct fuse_removemapping_in *inargp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) struct fuse_removemapping_one *remove_one)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) struct fuse_mount *fm = get_fuse_mount(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) FUSE_ARGS(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) args.opcode = FUSE_REMOVEMAPPING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) args.nodeid = fi->nodeid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) args.in_numargs = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) args.in_args[0].size = sizeof(*inargp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) args.in_args[0].value = inargp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) args.in_args[1].size = inargp->count * sizeof(*remove_one);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) args.in_args[1].value = remove_one;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) return fuse_simple_request(fm, &args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) static int dmap_removemapping_list(struct inode *inode, unsigned int num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) struct list_head *to_remove)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) struct fuse_removemapping_one *remove_one, *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) struct fuse_removemapping_in inarg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) int ret, i = 0, nr_alloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) if (!remove_one)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) ptr = remove_one;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) list_for_each_entry(dmap, to_remove, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) ptr->moffset = dmap->window_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) ptr->len = dmap->length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) num--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) if (i >= nr_alloc || num == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) memset(&inarg, 0, sizeof(inarg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) inarg.count = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) ret = fuse_send_removemapping(inode, &inarg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) remove_one);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) ptr = remove_one;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) kfree(remove_one);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * Cleanup dmap entry and add back to free list. This should be called with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) * fcd->lock held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) static void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) dmap->itn.start, dmap->itn.last, dmap->window_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) dmap->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) __dmap_remove_busy_list(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) dmap->inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) dmap->itn.start = dmap->itn.last = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) __dmap_add_to_free_pool(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * Free inode dmap entries whose range falls inside [start, end].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * Does not take any locks. At this point of time it should only be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * called from evict_inode() path where we know all dmap entries can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) * reclaimed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) static void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) loff_t start, loff_t end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) struct fuse_dax_mapping *dmap, *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) int err, num = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) LIST_HEAD(to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) unsigned long start_idx = start >> FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) unsigned long end_idx = end >> FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) node = interval_tree_iter_first(&fi->dax->tree, start_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) end_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) if (!node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) /* inode is going away. There should not be any users of dmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) WARN_ON(refcount_read(&dmap->refcnt) > 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) interval_tree_remove(&dmap->itn, &fi->dax->tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) num++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) list_add(&dmap->list, &to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) /* Nothing to remove */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) if (list_empty(&to_remove))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) WARN_ON(fi->dax->nr < num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) fi->dax->nr -= num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) err = dmap_removemapping_list(inode, num, &to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) if (err && err != -ENOTCONN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) list_for_each_entry_safe(dmap, n, &to_remove, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) list_del_init(&dmap->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) dmap_reinit_add_to_free_pool(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) static int dmap_removemapping_one(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) struct fuse_removemapping_one forget_one;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) struct fuse_removemapping_in inarg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) memset(&inarg, 0, sizeof(inarg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) inarg.count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) memset(&forget_one, 0, sizeof(forget_one));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) forget_one.moffset = dmap->window_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) forget_one.len = dmap->length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) return fuse_send_removemapping(inode, &inarg, &forget_one);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) * It is called from evict_inode() and by that time inode is going away. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) * this function does not take any locks like fi->dax->sem for traversing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) * that fuse inode interval tree. If that lock is taken then lock validator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) * complains of deadlock situation w.r.t fs_reclaim lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) void fuse_dax_inode_cleanup(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) struct fuse_conn *fc = get_fuse_conn(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) * fuse_evict_inode() has already called truncate_inode_pages_final()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) * before we arrive here. So we should not have to worry about any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) * pages/exception entries still associated with inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) inode_reclaim_dmap_range(fc->dax, inode, 0, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) WARN_ON(fi->dax->nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) iomap->addr = IOMAP_NULL_ADDR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) iomap->length = length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) iomap->type = IOMAP_HOLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) struct iomap *iomap, struct fuse_dax_mapping *dmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) loff_t offset, len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) loff_t i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) len = min(length, dmap->length - offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) /* If length is beyond end of file, truncate further */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) if (pos + len > i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) len = i_size - pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) iomap->addr = dmap->window_offset + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) iomap->length = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) if (flags & IOMAP_FAULT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) iomap->length = ALIGN(len, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) iomap->type = IOMAP_MAPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * increace refcnt so that reclaim code knows this dmap is in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) * use. This assumes fi->dax->sem mutex is held either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * shared/exclusive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) refcount_inc(&dmap->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) /* iomap->private should be NULL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) WARN_ON_ONCE(iomap->private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) iomap->private = dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) /* Mapping beyond end of file is hole */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) fuse_fill_iomap_hole(iomap, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) loff_t length, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) struct iomap *iomap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) struct fuse_conn *fc = get_fuse_conn(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) struct fuse_conn_dax *fcd = fc->dax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) bool writable = flags & IOMAP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) unsigned long start_idx = pos >> FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) * Can't do inline reclaim in fault path. We call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) * dax_layout_busy_page() before we free a range. And
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) * fuse_wait_dax_page() drops fi->i_mmap_sem lock and requires it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) * In fault path we enter with fi->i_mmap_sem held and can't drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) * it. Also in fault path we hold fi->i_mmap_sem shared and not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) * exclusive, so that creates further issues with fuse_wait_dax_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) * Hence return -EAGAIN and fuse_dax_fault() will wait for a memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * range to become free and retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) if (flags & IOMAP_FAULT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) alloc_dmap = alloc_dax_mapping(fcd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) if (!alloc_dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) alloc_dmap = alloc_dax_mapping_reclaim(fcd, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) if (IS_ERR(alloc_dmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) return PTR_ERR(alloc_dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) /* If we are here, we should have memory allocated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) if (WARN_ON(!alloc_dmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * Take write lock so that only one caller can try to setup mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) * and other waits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) down_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * We dropped lock. Check again if somebody else setup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) * mapping already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) if (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) dmap_add_to_free_pool(fcd, alloc_dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) up_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) /* Setup one mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) writable, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) dmap_add_to_free_pool(fcd, alloc_dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) up_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) up_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) loff_t length, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) struct iomap *iomap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) unsigned long idx = pos >> FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) * Take exclusive lock so that only one caller can try to setup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * mapping and others wait.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) down_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) node = interval_tree_iter_first(&fi->dax->tree, idx, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) /* We are holding either inode lock or i_mmap_sem, and that should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) * ensure that dmap can't be truncated. We are holding a reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * on dmap and that should make sure it can't be reclaimed. So dmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * should still be there in tree despite the fact we dropped and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * re-acquired the fi->dax->sem lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) if (WARN_ON(!node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) /* We took an extra reference on dmap to make sure its not reclaimd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * Now we hold fi->dax->sem lock and that reference is not needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * anymore. Drop it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) if (refcount_dec_and_test(&dmap->refcnt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) /* refcount should not hit 0. This object only goes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * away when fuse connection goes away
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) /* Maybe another thread already upgraded mapping while we were not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) * holding lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) if (dmap->writable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) goto out_fill_iomap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) out_fill_iomap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) up_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) /* This is just for DAX and the mapping is ephemeral, do not use it for other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) * purposes since there is no block device with a permanent mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) unsigned int flags, struct iomap *iomap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) struct iomap *srcmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) struct fuse_conn *fc = get_fuse_conn(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) bool writable = flags & IOMAP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) unsigned long start_idx = pos >> FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) /* We don't support FIEMAP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) if (WARN_ON(flags & IOMAP_REPORT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) iomap->offset = pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) iomap->flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) iomap->bdev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) iomap->dax_dev = fc->dax->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) * Both read/write and mmap path can race here. So we need something
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) * to make sure if we are setting up mapping, then other path waits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) * For now, use a semaphore for this. It probably needs to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) * optimized later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) down_read(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) if (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) if (writable && !dmap->writable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) /* Upgrade read-only mapping to read-write. This will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) * require exclusive fi->dax->sem lock as we don't want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) * two threads to be trying to this simultaneously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) * for same dmap. So drop shared lock and acquire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) * exclusive lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) * Before dropping fi->dax->sem lock, take reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) * on dmap so that its not freed by range reclaim.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) refcount_inc(&dmap->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) up_read(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) __func__, pos, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) return fuse_upgrade_dax_mapping(inode, pos, length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) flags, iomap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) up_read(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) up_read(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) __func__, pos, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) if (pos >= i_size_read(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) goto iomap_hole;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) return fuse_setup_new_dax_mapping(inode, pos, length, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) iomap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) * If read beyond end of file happnes, fs code seems to return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) * it as hole
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) iomap_hole:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) fuse_fill_iomap_hole(iomap, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) __func__, pos, length, iomap->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) ssize_t written, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) struct iomap *iomap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) struct fuse_dax_mapping *dmap = iomap->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) if (dmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) if (refcount_dec_and_test(&dmap->refcnt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) /* refcount should not hit 0. This object only goes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) * away when fuse connection goes away
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) /* DAX writes beyond end-of-file aren't handled using iomap, so the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) * file size is unchanged and there is nothing to do here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) static const struct iomap_ops fuse_iomap_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) .iomap_begin = fuse_iomap_begin,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) .iomap_end = fuse_iomap_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) static void fuse_wait_dax_page(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) up_write(&fi->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) down_write(&fi->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) /* Should be called with fi->i_mmap_sem lock held exclusively */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) loff_t start, loff_t end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) page = dax_layout_busy_page_range(inode->i_mapping, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) *retry = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) return ___wait_var_event(&page->_refcount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) 0, 0, fuse_wait_dax_page(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) /* dmap_end == 0 leads to unmapping of whole file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) u64 dmap_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) bool retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) retry = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) ret = __fuse_dax_break_layouts(inode, &retry, dmap_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) dmap_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) } while (ret == 0 && retry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) struct inode *inode = file_inode(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) if (iocb->ki_flags & IOCB_NOWAIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) if (!inode_trylock_shared(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) inode_lock_shared(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) inode_unlock_shared(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) /* TODO file_accessed(iocb->f_filp) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) static bool file_extending_write(struct kiocb *iocb, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) struct inode *inode = file_inode(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) return (iov_iter_rw(from) == WRITE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) ((iocb->ki_pos) >= i_size_read(inode) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode))));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) struct inode *inode = file_inode(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) fuse_invalidate_attr(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) fuse_write_update_size(inode, iocb->ki_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) struct inode *inode = file_inode(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) if (iocb->ki_flags & IOCB_NOWAIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) if (!inode_trylock(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) inode_lock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) ret = generic_write_checks(iocb, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) if (ret <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) ret = file_remove_privs(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) /* TODO file_update_time() but we don't want metadata I/O */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) /* Do not use dax for file extending writes as write and on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) * disk i_size increase are not atomic otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) if (file_extending_write(iocb, from))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) ret = fuse_dax_direct_write(iocb, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) inode_unlock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) ret = generic_write_sync(iocb, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) static int fuse_dax_writepages(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) struct fuse_conn *fc = get_fuse_conn(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) enum page_entry_size pe_size, bool write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) vm_fault_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) struct inode *inode = file_inode(vmf->vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) pfn_t pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) struct fuse_conn *fc = get_fuse_conn(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) struct fuse_conn_dax *fcd = fc->dax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) bool retry = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) if (write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) sb_start_pagefault(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) if (retry && !(fcd->nr_free_ranges > 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) wait_event(fcd->range_waitq, (fcd->nr_free_ranges > 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) * We need to serialize against not only truncate but also against
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) * fuse dax memory range reclaim. While a range is being reclaimed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) * we do not want any read/write/mmap to make progress and try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) * to populate page cache or access memory we are trying to free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) down_read(&get_fuse_inode(inode)->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) retry = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) up_read(&get_fuse_inode(inode)->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) if (ret & VM_FAULT_NEEDDSYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) ret = dax_finish_sync_fault(vmf, pe_size, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) up_read(&get_fuse_inode(inode)->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) if (write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) sb_end_pagefault(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) static vm_fault_t fuse_dax_fault(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) return __fuse_dax_fault(vmf, PE_SIZE_PTE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) vmf->flags & FAULT_FLAG_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) enum page_entry_size pe_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) return __fuse_dax_fault(vmf, pe_size, vmf->flags & FAULT_FLAG_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) static vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) return __fuse_dax_fault(vmf, PE_SIZE_PTE, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) static vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) return __fuse_dax_fault(vmf, PE_SIZE_PTE, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) static const struct vm_operations_struct fuse_dax_vm_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) .fault = fuse_dax_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) .huge_fault = fuse_dax_huge_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) .page_mkwrite = fuse_dax_page_mkwrite,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) .pfn_mkwrite = fuse_dax_pfn_mkwrite,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) file_accessed(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) vma->vm_ops = &fuse_dax_vm_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) static int dmap_writeback_invalidate(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) loff_t start_pos = dmap->itn.start << FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) loff_t end_pos = (start_pos + FUSE_DAX_SZ - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) ret = filemap_fdatawrite_range(inode->i_mapping, start_pos, end_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) pr_debug("fuse: filemap_fdatawrite_range() failed. err=%d start_pos=0x%llx, end_pos=0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) ret, start_pos, end_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) ret = invalidate_inode_pages2_range(inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) start_pos >> PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) end_pos >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) pr_debug("fuse: invalidate_inode_pages2_range() failed err=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) static int reclaim_one_dmap_locked(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) * igrab() was done to make sure inode won't go under us, and this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) * further avoids the race with evict().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) ret = dmap_writeback_invalidate(inode, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) /* Remove dax mapping from inode interval tree now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) interval_tree_remove(&dmap->itn, &fi->dax->tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) fi->dax->nr--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) /* It is possible that umount/shutdown has killed the fuse connection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) * and worker thread is trying to reclaim memory in parallel. Don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) * warn in that case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) ret = dmap_removemapping_one(inode, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) if (ret && ret != -ENOTCONN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) pr_warn("Failed to remove mapping. offset=0x%llx len=0x%llx ret=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) dmap->window_offset, dmap->length, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) /* Find first mapped dmap for an inode and return file offset. Caller needs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) * to hold fi->dax->sem lock either shared or exclusive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) static struct fuse_dax_mapping *inode_lookup_first_dmap(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) for (node = interval_tree_iter_first(&fi->dax->tree, 0, -1); node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) node = interval_tree_iter_next(node, 0, -1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) /* still in use. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) if (refcount_read(&dmap->refcnt) > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) return dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) * Find first mapping in the tree and free it and return it. Do not add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) * it back to free pool.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) static struct fuse_dax_mapping *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) bool *retry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) u64 dmap_start, dmap_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) unsigned long start_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) down_write(&fi->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) /* Lookup a dmap and corresponding file offset to reclaim. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) down_read(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) dmap = inode_lookup_first_dmap(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) if (dmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) start_idx = dmap->itn.start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) dmap_start = start_idx << FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) dmap_end = dmap_start + FUSE_DAX_SZ - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) up_read(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) if (!dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) goto out_mmap_sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) * Make sure there are no references to inode pages using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) * get_user_pages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) pr_debug("fuse: fuse_dax_break_layouts() failed. err=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) dmap = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) goto out_mmap_sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) down_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) /* Range already got reclaimed by somebody else */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) if (!node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) if (retry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) *retry = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) goto out_write_dmap_sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) /* still in use. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) if (refcount_read(&dmap->refcnt) > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) dmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) if (retry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) *retry = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) goto out_write_dmap_sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) ret = reclaim_one_dmap_locked(inode, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) dmap = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) goto out_write_dmap_sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) /* Clean up dmap. Do not add back to free list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) dmap_remove_busy_list(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) dmap->inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) dmap->itn.start = dmap->itn.last = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) pr_debug("fuse: %s: inline reclaimed memory range. inode=%p, window_offset=0x%llx, length=0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) __func__, inode, dmap->window_offset, dmap->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) out_write_dmap_sem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) up_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) out_mmap_sem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) up_write(&fi->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) return dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) static struct fuse_dax_mapping *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) bool retry = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) dmap = alloc_dax_mapping(fcd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) if (dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) return dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) dmap = inode_inline_reclaim_one_dmap(fcd, inode, &retry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) * Either we got a mapping or it is an error, return in both
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) * the cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) if (dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) return dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) /* If we could not reclaim a mapping because it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) * had a reference or some other temporary failure,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) * Try again. We want to give up inline reclaim only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) * if there is no range assigned to this node. Otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) * if a deadlock is possible if we sleep with fi->i_mmap_sem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) * held and worker to free memory can't make progress due
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) * to unavailability of fi->i_mmap_sem lock. So sleep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) * only if fi->dax->nr=0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) if (retry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) * There are no mappings which can be reclaimed. Wait for one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) * We are not holding fi->dax->sem. So it is possible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) * that range gets added now. But as we are not holding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) * fi->i_mmap_sem, worker should still be able to free up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) * a range and wake us up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) if (wait_event_killable_exclusive(fcd->range_waitq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) (fcd->nr_free_ranges > 0))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) return ERR_PTR(-EINTR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) static int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) unsigned long start_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) /* Find fuse dax mapping at file offset inode. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) /* Range already got cleaned up by somebody else */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) if (!node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) /* still in use. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) if (refcount_read(&dmap->refcnt) > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) ret = reclaim_one_dmap_locked(inode, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) /* Cleanup dmap entry and add back to free list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) dmap_reinit_add_to_free_pool(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) * Free a range of memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) * Locking:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) * 1. Take fi->i_mmap_sem to block dax faults.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) * 2. Take fi->dax->sem to protect interval tree and also to make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) * read/write can not reuse a dmap which we might be freeing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) unsigned long start_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) unsigned long end_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) loff_t dmap_start = start_idx << FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) down_write(&fi->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) goto out_mmap_sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) down_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) up_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) out_mmap_sem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) up_write(&fi->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) static int try_to_free_dmap_chunks(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) unsigned long nr_to_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) struct fuse_dax_mapping *dmap, *pos, *temp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) int ret, nr_freed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) unsigned long start_idx = 0, end_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) struct inode *inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) /* Pick first busy range and free it for now*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) if (nr_freed >= nr_to_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) dmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) if (!fcd->nr_busy_ranges) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) list_for_each_entry_safe(pos, temp, &fcd->busy_ranges,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) busy_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) /* skip this range if it's in use. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) if (refcount_read(&pos->refcnt) > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) inode = igrab(pos->inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) * This inode is going away. That will free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) * up all the ranges anyway, continue to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) * next range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) if (!inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) * Take this element off list and add it tail. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) * this element can't be freed, it will help with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) * selecting new element in next iteration of loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) dmap = pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) list_move_tail(&dmap->busy_list, &fcd->busy_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) start_idx = end_idx = dmap->itn.start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) if (!dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) ret = lookup_and_reclaim_dmap(fcd, inode, start_idx, end_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) nr_freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) static void fuse_dax_free_mem_worker(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) struct fuse_conn_dax *fcd = container_of(work, struct fuse_conn_dax,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) free_work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) ret = try_to_free_dmap_chunks(fcd, FUSE_DAX_RECLAIM_CHUNK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) pr_debug("fuse: try_to_free_dmap_chunks() failed with err=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) /* If number of free ranges are still below threhold, requeue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) kick_dmap_free_worker(fcd, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) static void fuse_free_dax_mem_ranges(struct list_head *mem_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) struct fuse_dax_mapping *range, *temp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) /* Free All allocated elements */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) list_for_each_entry_safe(range, temp, mem_list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) list_del(&range->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) if (!list_empty(&range->busy_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) list_del(&range->busy_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) kfree(range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) void fuse_dax_conn_free(struct fuse_conn *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) if (fc->dax) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) fuse_free_dax_mem_ranges(&fc->dax->free_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) kfree(fc->dax);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) long nr_pages, nr_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) void *kaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) pfn_t pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) struct fuse_dax_mapping *range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) int ret, id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) size_t dax_size = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) init_waitqueue_head(&fcd->range_waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) INIT_LIST_HEAD(&fcd->free_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) INIT_LIST_HEAD(&fcd->busy_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) id = dax_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) &pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) dax_read_unlock(id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) if (nr_pages < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) pr_debug("dax_direct_access() returned %ld\n", nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) nr_ranges = nr_pages/FUSE_DAX_PAGES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) pr_debug("%s: dax mapped %ld pages. nr_ranges=%ld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) __func__, nr_pages, nr_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) for (i = 0; i < nr_ranges; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) range = kzalloc(sizeof(struct fuse_dax_mapping), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) if (!range)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) /* TODO: This offset only works if virtio-fs driver is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) * having some memory hidden at the beginning. This needs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) * better handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) range->window_offset = i * FUSE_DAX_SZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) range->length = FUSE_DAX_SZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) INIT_LIST_HEAD(&range->busy_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) refcount_set(&range->refcnt, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) list_add_tail(&range->list, &fcd->free_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) fcd->nr_free_ranges = nr_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) fcd->nr_ranges = nr_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) /* Free All allocated elements */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) fuse_free_dax_mem_ranges(&fcd->free_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) struct fuse_conn_dax *fcd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) if (!dax_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) fcd = kzalloc(sizeof(*fcd), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) if (!fcd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) spin_lock_init(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) fcd->dev = dax_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) err = fuse_dax_mem_range_init(fcd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) kfree(fcd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) fc->dax = fcd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) struct fuse_conn *fc = get_fuse_conn_super(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) fi->dax = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) if (fc->dax) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) if (!fi->dax)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) init_rwsem(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) fi->dax->tree = RB_ROOT_CACHED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) static const struct address_space_operations fuse_dax_file_aops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) .writepages = fuse_dax_writepages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) .direct_IO = noop_direct_IO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) .set_page_dirty = noop_set_page_dirty,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) .invalidatepage = noop_invalidatepage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) void fuse_dax_inode_init(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) struct fuse_conn *fc = get_fuse_conn(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) if (!fc->dax)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) inode->i_flags |= S_DAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) inode->i_data.a_ops = &fuse_dax_file_aops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) map_alignment, FUSE_DAX_SZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) void fuse_dax_cancel_work(struct fuse_conn *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) struct fuse_conn_dax *fcd = fc->dax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) if (fcd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) cancel_delayed_work_sync(&fcd->free_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) EXPORT_SYMBOL_GPL(fuse_dax_cancel_work);