Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * dax: direct host memory access
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  * Copyright (C) 2020 Red Hat, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7) #include "fuse_i.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) #include <linux/dax.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <linux/pfn_t.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include <linux/iomap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/interval_tree.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17)  * Default memory range size.  A power of 2 so it agrees with common FUSE_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18)  * map_alignment values 4KB and 64KB.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #define FUSE_DAX_SHIFT	21
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #define FUSE_DAX_SZ	(1 << FUSE_DAX_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #define FUSE_DAX_PAGES	(FUSE_DAX_SZ / PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) /* Number of ranges reclaimer will try to free in one invocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #define FUSE_DAX_RECLAIM_CHUNK		(10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28)  * Dax memory reclaim threshold in percetage of total ranges. When free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29)  * number of free ranges drops below this threshold, reclaim can trigger
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30)  * Default is 20%
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #define FUSE_DAX_RECLAIM_THRESHOLD	(20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) /** Translation information for file offsets to DAX window offsets */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) struct fuse_dax_mapping {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) 	/* Pointer to inode where this memory range is mapped */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) 	struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) 	/* Will connect in fcd->free_ranges to keep track of free memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) 	struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) 	/* For interval tree in file/inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) 	struct interval_tree_node itn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) 	/* Will connect in fc->busy_ranges to keep track busy memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) 	struct list_head busy_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) 	/** Position in DAX window */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) 	u64 window_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 	/** Length of mapping, in bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) 	loff_t length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) 	/* Is this mapping read-only or read-write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) 	bool writable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 	/* reference count when the mapping is used by dax iomap. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 	refcount_t refcnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) /* Per-inode dax map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) struct fuse_inode_dax {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) 	/* Semaphore to protect modifications to the dmap tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 	struct rw_semaphore sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 	/* Sorted rb tree of struct fuse_dax_mapping elements */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 	struct rb_root_cached tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 	unsigned long nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) struct fuse_conn_dax {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 	/* DAX device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 	struct dax_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 	/* Lock protecting accessess to  members of this structure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 	spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 	/* List of memory ranges which are busy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 	unsigned long nr_busy_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 	struct list_head busy_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	/* Worker to free up memory ranges */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 	struct delayed_work free_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 	/* Wait queue for a dax range to become free */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 	wait_queue_head_t range_waitq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 	/* DAX Window Free Ranges */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	long nr_free_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	struct list_head free_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	unsigned long nr_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) static inline struct fuse_dax_mapping *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) node_to_dmap(struct interval_tree_node *node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 	if (!node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	return container_of(node, struct fuse_dax_mapping, itn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) static struct fuse_dax_mapping *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) __kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	unsigned long free_threshold;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 	/* If number of free ranges are below threshold, start reclaim */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 	free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 			     1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	if (fcd->nr_free_ranges < free_threshold)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 		queue_delayed_work(system_long_wq, &fcd->free_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 				   msecs_to_jiffies(delay_ms));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) static void kick_dmap_free_worker(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 				  unsigned long delay_ms)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	__kick_dmap_free_worker(fcd, delay_ms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	dmap = list_first_entry_or_null(&fcd->free_ranges,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 					struct fuse_dax_mapping, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 	if (dmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 		list_del_init(&dmap->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 		WARN_ON(fcd->nr_free_ranges <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 		fcd->nr_free_ranges--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 	spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	kick_dmap_free_worker(fcd, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	return dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) /* This assumes fcd->lock is held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) static void __dmap_remove_busy_list(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 				    struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	list_del_init(&dmap->busy_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	WARN_ON(fcd->nr_busy_ranges == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 	fcd->nr_busy_ranges--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) static void dmap_remove_busy_list(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 				  struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 	spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 	__dmap_remove_busy_list(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 	spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) /* This assumes fcd->lock is held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) static void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 				struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 	list_add_tail(&dmap->list, &fcd->free_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	fcd->nr_free_ranges++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	wake_up(&fcd->range_waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) static void dmap_add_to_free_pool(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 				struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 	/* Return fuse_dax_mapping to free list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 	spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	__dmap_add_to_free_pool(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 	spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 				  struct fuse_dax_mapping *dmap, bool writable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 				  bool upgrade)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 	struct fuse_mount *fm = get_fuse_mount(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 	struct fuse_conn_dax *fcd = fm->fc->dax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 	struct fuse_setupmapping_in inarg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 	loff_t offset = start_idx << FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	FUSE_ARGS(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 	ssize_t err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 	WARN_ON(fcd->nr_free_ranges < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 	/* Ask fuse daemon to setup mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 	memset(&inarg, 0, sizeof(inarg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 	inarg.foffset = offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 	inarg.fh = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 	inarg.moffset = dmap->window_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 	inarg.len = FUSE_DAX_SZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 	if (writable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 	args.opcode = FUSE_SETUPMAPPING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 	args.nodeid = fi->nodeid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 	args.in_numargs = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 	args.in_args[0].size = sizeof(inarg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	args.in_args[0].value = &inarg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	err = fuse_simple_request(fm, &args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 	if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 		return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 	dmap->writable = writable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	if (!upgrade) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 		 * We don't take a refernce on inode. inode is valid right now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 		 * and when inode is going away, cleanup logic should first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 		 * cleanup dmap entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 		dmap->inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 		dmap->itn.start = dmap->itn.last = start_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 		/* Protected by fi->dax->sem */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 		interval_tree_insert(&dmap->itn, &fi->dax->tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 		fi->dax->nr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 		spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 		list_add_tail(&dmap->busy_list, &fcd->busy_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 		fcd->nr_busy_ranges++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 		spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) static int fuse_send_removemapping(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 				   struct fuse_removemapping_in *inargp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 				   struct fuse_removemapping_one *remove_one)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 	struct fuse_mount *fm = get_fuse_mount(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	FUSE_ARGS(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	args.opcode = FUSE_REMOVEMAPPING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 	args.nodeid = fi->nodeid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	args.in_numargs = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	args.in_args[0].size = sizeof(*inargp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	args.in_args[0].value = inargp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	args.in_args[1].size = inargp->count * sizeof(*remove_one);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	args.in_args[1].value = remove_one;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	return fuse_simple_request(fm, &args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) static int dmap_removemapping_list(struct inode *inode, unsigned int num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 				   struct list_head *to_remove)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 	struct fuse_removemapping_one *remove_one, *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 	struct fuse_removemapping_in inarg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 	struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 	int ret, i = 0, nr_alloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 	nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 	remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 	if (!remove_one)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 	ptr = remove_one;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	list_for_each_entry(dmap, to_remove, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 		ptr->moffset = dmap->window_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 		ptr->len = dmap->length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 		ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 		i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 		num--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 		if (i >= nr_alloc || num == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 			memset(&inarg, 0, sizeof(inarg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 			inarg.count = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 			ret = fuse_send_removemapping(inode, &inarg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 						      remove_one);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 			if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 			ptr = remove_one;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 			i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 	kfree(remove_one);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287)  * Cleanup dmap entry and add back to free list. This should be called with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288)  * fcd->lock held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) static void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 					    struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 	pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 		 dmap->itn.start, dmap->itn.last, dmap->window_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 		 dmap->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 	__dmap_remove_busy_list(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 	dmap->inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	dmap->itn.start = dmap->itn.last = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	__dmap_add_to_free_pool(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303)  * Free inode dmap entries whose range falls inside [start, end].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304)  * Does not take any locks. At this point of time it should only be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305)  * called from evict_inode() path where we know all dmap entries can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306)  * reclaimed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) static void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 				     struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 				     loff_t start, loff_t end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 	struct fuse_dax_mapping *dmap, *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 	int err, num = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 	LIST_HEAD(to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 	unsigned long start_idx = start >> FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 	unsigned long end_idx = end >> FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 	struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 	while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 		node = interval_tree_iter_first(&fi->dax->tree, start_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 						end_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 		if (!node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 		dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 		/* inode is going away. There should not be any users of dmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 		WARN_ON(refcount_read(&dmap->refcnt) > 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 		interval_tree_remove(&dmap->itn, &fi->dax->tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 		num++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 		list_add(&dmap->list, &to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 	/* Nothing to remove */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 	if (list_empty(&to_remove))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	WARN_ON(fi->dax->nr < num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	fi->dax->nr -= num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	err = dmap_removemapping_list(inode, num, &to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	if (err && err != -ENOTCONN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 		pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 			start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 	spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 	list_for_each_entry_safe(dmap, n, &to_remove, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 		list_del_init(&dmap->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 		dmap_reinit_add_to_free_pool(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 	spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) static int dmap_removemapping_one(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 				  struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 	struct fuse_removemapping_one forget_one;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 	struct fuse_removemapping_in inarg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	memset(&inarg, 0, sizeof(inarg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	inarg.count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	memset(&forget_one, 0, sizeof(forget_one));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 	forget_one.moffset = dmap->window_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	forget_one.len = dmap->length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	return fuse_send_removemapping(inode, &inarg, &forget_one);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368)  * It is called from evict_inode() and by that time inode is going away. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369)  * this function does not take any locks like fi->dax->sem for traversing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370)  * that fuse inode interval tree. If that lock is taken then lock validator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371)  * complains of deadlock situation w.r.t fs_reclaim lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) void fuse_dax_inode_cleanup(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 	struct fuse_conn *fc = get_fuse_conn(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 	 * fuse_evict_inode() has already called truncate_inode_pages_final()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	 * before we arrive here. So we should not have to worry about any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 	 * pages/exception entries still associated with inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	inode_reclaim_dmap_range(fc->dax, inode, 0, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 	WARN_ON(fi->dax->nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 	iomap->addr = IOMAP_NULL_ADDR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	iomap->length = length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	iomap->type = IOMAP_HOLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 			    struct iomap *iomap, struct fuse_dax_mapping *dmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 			    unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	loff_t offset, len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	loff_t i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 	offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 	len = min(length, dmap->length - offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	/* If length is beyond end of file, truncate further */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 	if (pos + len > i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 		len = i_size - pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 	if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 		iomap->addr = dmap->window_offset + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 		iomap->length = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 		if (flags & IOMAP_FAULT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 			iomap->length = ALIGN(len, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 		iomap->type = IOMAP_MAPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 		 * increace refcnt so that reclaim code knows this dmap is in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 		 * use. This assumes fi->dax->sem mutex is held either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 		 * shared/exclusive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 		refcount_inc(&dmap->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 		/* iomap->private should be NULL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 		WARN_ON_ONCE(iomap->private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 		iomap->private = dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 		/* Mapping beyond end of file is hole */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 		fuse_fill_iomap_hole(iomap, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 				      loff_t length, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 				      struct iomap *iomap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	struct fuse_conn *fc = get_fuse_conn(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	struct fuse_conn_dax *fcd = fc->dax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 	bool writable = flags & IOMAP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	unsigned long start_idx = pos >> FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 	 * Can't do inline reclaim in fault path. We call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 	 * dax_layout_busy_page() before we free a range. And
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	 * fuse_wait_dax_page() drops fi->i_mmap_sem lock and requires it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 	 * In fault path we enter with fi->i_mmap_sem held and can't drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	 * it. Also in fault path we hold fi->i_mmap_sem shared and not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 	 * exclusive, so that creates further issues with fuse_wait_dax_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 	 * Hence return -EAGAIN and fuse_dax_fault() will wait for a memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	 * range to become free and retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	if (flags & IOMAP_FAULT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 		alloc_dmap = alloc_dax_mapping(fcd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 		if (!alloc_dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 			return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 		alloc_dmap = alloc_dax_mapping_reclaim(fcd, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 		if (IS_ERR(alloc_dmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 			return PTR_ERR(alloc_dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	/* If we are here, we should have memory allocated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 	if (WARN_ON(!alloc_dmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 		return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 	 * Take write lock so that only one caller can try to setup mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 	 * and other waits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 	down_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 	 * We dropped lock. Check again if somebody else setup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 	 * mapping already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 	node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 	if (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 		dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 		fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 		dmap_add_to_free_pool(fcd, alloc_dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 		up_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 	/* Setup one mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 				     writable, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 		dmap_add_to_free_pool(fcd, alloc_dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 		up_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 	up_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 				    loff_t length, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 				    struct iomap *iomap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 	struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 	unsigned long idx = pos >> FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 	struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	 * Take exclusive lock so that only one caller can try to setup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	 * mapping and others wait.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 	down_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 	node = interval_tree_iter_first(&fi->dax->tree, idx, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	/* We are holding either inode lock or i_mmap_sem, and that should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 	 * ensure that dmap can't be truncated. We are holding a reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 	 * on dmap and that should make sure it can't be reclaimed. So dmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 	 * should still be there in tree despite the fact we dropped and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	 * re-acquired the fi->dax->sem lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 	if (WARN_ON(!node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 		goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 	dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 	/* We took an extra reference on dmap to make sure its not reclaimd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	 * Now we hold fi->dax->sem lock and that reference is not needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 	 * anymore. Drop it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 	if (refcount_dec_and_test(&dmap->refcnt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 		/* refcount should not hit 0. This object only goes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 		 * away when fuse connection goes away
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 		WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 	/* Maybe another thread already upgraded mapping while we were not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 	 * holding lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	if (dmap->writable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 		goto out_fill_iomap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 	ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 				     true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 		goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) out_fill_iomap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 	up_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) /* This is just for DAX and the mapping is ephemeral, do not use it for other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558)  * purposes since there is no block device with a permanent mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 			    unsigned int flags, struct iomap *iomap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 			    struct iomap *srcmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 	struct fuse_conn *fc = get_fuse_conn(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 	struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 	bool writable = flags & IOMAP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 	unsigned long start_idx = pos >> FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 	struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 	/* We don't support FIEMAP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 	if (WARN_ON(flags & IOMAP_REPORT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 		return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	iomap->offset = pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 	iomap->flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 	iomap->bdev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 	iomap->dax_dev = fc->dax->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 	 * Both read/write and mmap path can race here. So we need something
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	 * to make sure if we are setting up mapping, then other path waits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 	 * For now, use a semaphore for this. It probably needs to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	 * optimized later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 	down_read(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	if (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 		dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 		if (writable && !dmap->writable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 			/* Upgrade read-only mapping to read-write. This will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 			 * require exclusive fi->dax->sem lock as we don't want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 			 * two threads to be trying to this simultaneously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 			 * for same dmap. So drop shared lock and acquire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 			 * exclusive lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 			 * Before dropping fi->dax->sem lock, take reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 			 * on dmap so that its not freed by range reclaim.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 			refcount_inc(&dmap->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 			up_read(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 			pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 				 __func__, pos, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 			return fuse_upgrade_dax_mapping(inode, pos, length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 							flags, iomap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 			fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 			up_read(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 		up_read(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 		pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 				__func__, pos, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 		if (pos >= i_size_read(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 			goto iomap_hole;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 		return fuse_setup_new_dax_mapping(inode, pos, length, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 						  iomap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	 * If read beyond end of file happnes, fs code seems to return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	 * it as hole
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) iomap_hole:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	fuse_fill_iomap_hole(iomap, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 		 __func__, pos, length, iomap->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 			  ssize_t written, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 			  struct iomap *iomap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	struct fuse_dax_mapping *dmap = iomap->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	if (dmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 		if (refcount_dec_and_test(&dmap->refcnt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 			/* refcount should not hit 0. This object only goes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 			 * away when fuse connection goes away
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 			WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 	/* DAX writes beyond end-of-file aren't handled using iomap, so the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 	 * file size is unchanged and there is nothing to do here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) static const struct iomap_ops fuse_iomap_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 	.iomap_begin = fuse_iomap_begin,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 	.iomap_end = fuse_iomap_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) static void fuse_wait_dax_page(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 	up_write(&fi->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 	down_write(&fi->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) /* Should be called with fi->i_mmap_sem lock held exclusively */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 				    loff_t start, loff_t end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 	page = dax_layout_busy_page_range(inode->i_mapping, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 	if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 	*retry = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 	return ___wait_var_event(&page->_refcount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 			atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 			0, 0, fuse_wait_dax_page(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) /* dmap_end == 0 leads to unmapping of whole file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 				  u64 dmap_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 	bool	retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 	int	ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 		retry = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 		ret = __fuse_dax_break_layouts(inode, &retry, dmap_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 					       dmap_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	} while (ret == 0 && retry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	struct inode *inode = file_inode(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 	ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 	if (iocb->ki_flags & IOCB_NOWAIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 		if (!inode_trylock_shared(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 			return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 		inode_lock_shared(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 	ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 	inode_unlock_shared(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	/* TODO file_accessed(iocb->f_filp) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) static bool file_extending_write(struct kiocb *iocb, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 	struct inode *inode = file_inode(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 	return (iov_iter_rw(from) == WRITE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 		((iocb->ki_pos) >= i_size_read(inode) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 		  (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode))));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	struct inode *inode = file_inode(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 	struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	fuse_invalidate_attr(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 	fuse_write_update_size(inode, iocb->ki_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 	struct inode *inode = file_inode(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 	if (iocb->ki_flags & IOCB_NOWAIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 		if (!inode_trylock(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 			return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 		inode_lock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	ret = generic_write_checks(iocb, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	if (ret <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	ret = file_remove_privs(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 	/* TODO file_update_time() but we don't want metadata I/O */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	/* Do not use dax for file extending writes as write and on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 	 * disk i_size increase are not atomic otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	if (file_extending_write(iocb, from))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 		ret = fuse_dax_direct_write(iocb, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 		ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 	inode_unlock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 	if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 		ret = generic_write_sync(iocb, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) static int fuse_dax_writepages(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 			       struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	struct fuse_conn *fc = get_fuse_conn(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 				   enum page_entry_size pe_size, bool write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 	vm_fault_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 	struct inode *inode = file_inode(vmf->vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 	struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 	pfn_t pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 	int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 	struct fuse_conn *fc = get_fuse_conn(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 	struct fuse_conn_dax *fcd = fc->dax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	bool retry = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	if (write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 		sb_start_pagefault(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 	if (retry && !(fcd->nr_free_ranges > 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 		wait_event(fcd->range_waitq, (fcd->nr_free_ranges > 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 	 * We need to serialize against not only truncate but also against
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 	 * fuse dax memory range reclaim. While a range is being reclaimed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 	 * we do not want any read/write/mmap to make progress and try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 	 * to populate page cache or access memory we are trying to free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 	down_read(&get_fuse_inode(inode)->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 	ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 	if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 		error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 		retry = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 		up_read(&get_fuse_inode(inode)->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 	if (ret & VM_FAULT_NEEDDSYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 		ret = dax_finish_sync_fault(vmf, pe_size, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	up_read(&get_fuse_inode(inode)->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	if (write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 		sb_end_pagefault(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) static vm_fault_t fuse_dax_fault(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 	return __fuse_dax_fault(vmf, PE_SIZE_PTE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 				vmf->flags & FAULT_FLAG_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 			       enum page_entry_size pe_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	return __fuse_dax_fault(vmf, pe_size, vmf->flags & FAULT_FLAG_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) static vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 	return __fuse_dax_fault(vmf, PE_SIZE_PTE, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) static vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 	return __fuse_dax_fault(vmf, PE_SIZE_PTE, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) static const struct vm_operations_struct fuse_dax_vm_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 	.fault		= fuse_dax_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 	.huge_fault	= fuse_dax_huge_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 	.page_mkwrite	= fuse_dax_page_mkwrite,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	.pfn_mkwrite	= fuse_dax_pfn_mkwrite,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	file_accessed(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	vma->vm_ops = &fuse_dax_vm_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) static int dmap_writeback_invalidate(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 				     struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 	loff_t start_pos = dmap->itn.start << FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 	loff_t end_pos = (start_pos + FUSE_DAX_SZ - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 	ret = filemap_fdatawrite_range(inode->i_mapping, start_pos, end_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 		pr_debug("fuse: filemap_fdatawrite_range() failed. err=%d start_pos=0x%llx, end_pos=0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 			 ret, start_pos, end_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 	ret = invalidate_inode_pages2_range(inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 					    start_pos >> PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 					    end_pos >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 		pr_debug("fuse: invalidate_inode_pages2_range() failed err=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 			 ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) static int reclaim_one_dmap_locked(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 				   struct fuse_dax_mapping *dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	 * igrab() was done to make sure inode won't go under us, and this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 	 * further avoids the race with evict().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	ret = dmap_writeback_invalidate(inode, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	/* Remove dax mapping from inode interval tree now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	interval_tree_remove(&dmap->itn, &fi->dax->tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	fi->dax->nr--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	/* It is possible that umount/shutdown has killed the fuse connection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	 * and worker thread is trying to reclaim memory in parallel.  Don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	 * warn in that case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 	ret = dmap_removemapping_one(inode, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	if (ret && ret != -ENOTCONN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 		pr_warn("Failed to remove mapping. offset=0x%llx len=0x%llx ret=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 			dmap->window_offset, dmap->length, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) /* Find first mapped dmap for an inode and return file offset. Caller needs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926)  * to hold fi->dax->sem lock either shared or exclusive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) static struct fuse_dax_mapping *inode_lookup_first_dmap(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 	for (node = interval_tree_iter_first(&fi->dax->tree, 0, -1); node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 	     node = interval_tree_iter_next(node, 0, -1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 		dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 		/* still in use. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 		if (refcount_read(&dmap->refcnt) > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 		return dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948)  * Find first mapping in the tree and free it and return it. Do not add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949)  * it back to free pool.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) static struct fuse_dax_mapping *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 			      bool *retry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 	struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 	u64 dmap_start, dmap_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 	unsigned long start_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 	struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 	down_write(&fi->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	/* Lookup a dmap and corresponding file offset to reclaim. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 	down_read(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 	dmap = inode_lookup_first_dmap(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 	if (dmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 		start_idx = dmap->itn.start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 		dmap_start = start_idx << FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 		dmap_end = dmap_start + FUSE_DAX_SZ - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 	up_read(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 	if (!dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 		goto out_mmap_sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 	 * Make sure there are no references to inode pages using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 	 * get_user_pages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 	ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 		pr_debug("fuse: fuse_dax_break_layouts() failed. err=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 			 ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 		dmap = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 		goto out_mmap_sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 	down_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 	/* Range already got reclaimed by somebody else */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	if (!node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 		if (retry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 			*retry = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 		goto out_write_dmap_sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 	dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 	/* still in use. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 	if (refcount_read(&dmap->refcnt) > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 		dmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 		if (retry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 			*retry = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 		goto out_write_dmap_sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 	ret = reclaim_one_dmap_locked(inode, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 		dmap = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 		goto out_write_dmap_sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 	/* Clean up dmap. Do not add back to free list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 	dmap_remove_busy_list(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	dmap->inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 	dmap->itn.start = dmap->itn.last = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 	pr_debug("fuse: %s: inline reclaimed memory range. inode=%p, window_offset=0x%llx, length=0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 		 __func__, inode, dmap->window_offset, dmap->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) out_write_dmap_sem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	up_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) out_mmap_sem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 	up_write(&fi->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	return dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) static struct fuse_dax_mapping *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 	struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 	while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 		bool retry = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 		dmap = alloc_dax_mapping(fcd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 		if (dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 			return dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 		dmap = inode_inline_reclaim_one_dmap(fcd, inode, &retry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 		 * Either we got a mapping or it is an error, return in both
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 		 * the cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 		if (dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 			return dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 		/* If we could not reclaim a mapping because it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 		 * had a reference or some other temporary failure,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 		 * Try again. We want to give up inline reclaim only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 		 * if there is no range assigned to this node. Otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 		 * if a deadlock is possible if we sleep with fi->i_mmap_sem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 		 * held and worker to free memory can't make progress due
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 		 * to unavailability of fi->i_mmap_sem lock. So sleep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 		 * only if fi->dax->nr=0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		if (retry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 		 * There are no mappings which can be reclaimed. Wait for one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 		 * We are not holding fi->dax->sem. So it is possible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 		 * that range gets added now. But as we are not holding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 		 * fi->i_mmap_sem, worker should still be able to free up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 		 * a range and wake us up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 		if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 			if (wait_event_killable_exclusive(fcd->range_waitq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 					(fcd->nr_free_ranges > 0))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 				return ERR_PTR(-EINTR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) static int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 					  struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 					  unsigned long start_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 	struct fuse_dax_mapping *dmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 	struct interval_tree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 	/* Find fuse dax mapping at file offset inode. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 	node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 	/* Range already got cleaned up by somebody else */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 	if (!node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	dmap = node_to_dmap(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	/* still in use. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	if (refcount_read(&dmap->refcnt) > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 	ret = reclaim_one_dmap_locked(inode, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	/* Cleanup dmap entry and add back to free list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 	dmap_reinit_add_to_free_pool(fcd, dmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 	spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108)  * Free a range of memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109)  * Locking:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)  * 1. Take fi->i_mmap_sem to block dax faults.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)  * 2. Take fi->dax->sem to protect interval tree and also to make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112)  *    read/write can not reuse a dmap which we might be freeing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 				   struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 				   unsigned long start_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 				   unsigned long end_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 	struct fuse_inode *fi = get_fuse_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	loff_t dmap_start = start_idx << FUSE_DAX_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 	down_write(&fi->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 	ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 		pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 			 ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 		goto out_mmap_sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	down_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	up_write(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) out_mmap_sem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	up_write(&fi->i_mmap_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) static int try_to_free_dmap_chunks(struct fuse_conn_dax *fcd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 				   unsigned long nr_to_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	struct fuse_dax_mapping *dmap, *pos, *temp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 	int ret, nr_freed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	unsigned long start_idx = 0, end_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 	struct inode *inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 	/* Pick first busy range and free it for now*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 		if (nr_freed >= nr_to_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 		dmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 		spin_lock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 		if (!fcd->nr_busy_ranges) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 			spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 		list_for_each_entry_safe(pos, temp, &fcd->busy_ranges,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 						busy_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 			/* skip this range if it's in use. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 			if (refcount_read(&pos->refcnt) > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 			inode = igrab(pos->inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 			 * This inode is going away. That will free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 			 * up all the ranges anyway, continue to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 			 * next range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 			if (!inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 			 * Take this element off list and add it tail. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 			 * this element can't be freed, it will help with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 			 * selecting new element in next iteration of loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 			dmap = pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 			list_move_tail(&dmap->busy_list, &fcd->busy_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 			start_idx = end_idx = dmap->itn.start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 		spin_unlock(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 		if (!dmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 		ret = lookup_and_reclaim_dmap(fcd, inode, start_idx, end_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 		iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 		nr_freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) static void fuse_dax_free_mem_worker(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	struct fuse_conn_dax *fcd = container_of(work, struct fuse_conn_dax,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 						 free_work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	ret = try_to_free_dmap_chunks(fcd, FUSE_DAX_RECLAIM_CHUNK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 		pr_debug("fuse: try_to_free_dmap_chunks() failed with err=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 			 ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	/* If number of free ranges are still below threhold, requeue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	kick_dmap_free_worker(fcd, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) static void fuse_free_dax_mem_ranges(struct list_head *mem_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	struct fuse_dax_mapping *range, *temp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 	/* Free All allocated elements */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 	list_for_each_entry_safe(range, temp, mem_list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 		list_del(&range->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 		if (!list_empty(&range->busy_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 			list_del(&range->busy_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 		kfree(range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) void fuse_dax_conn_free(struct fuse_conn *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	if (fc->dax) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 		fuse_free_dax_mem_ranges(&fc->dax->free_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 		kfree(fc->dax);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 	long nr_pages, nr_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	void *kaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 	pfn_t pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 	struct fuse_dax_mapping *range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	int ret, id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 	size_t dax_size = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	init_waitqueue_head(&fcd->range_waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	INIT_LIST_HEAD(&fcd->free_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	INIT_LIST_HEAD(&fcd->busy_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 	id = dax_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 	nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 				     &pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 	dax_read_unlock(id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 	if (nr_pages < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 		pr_debug("dax_direct_access() returned %ld\n", nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 		return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 	nr_ranges = nr_pages/FUSE_DAX_PAGES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 	pr_debug("%s: dax mapped %ld pages. nr_ranges=%ld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 		__func__, nr_pages, nr_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	for (i = 0; i < nr_ranges; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 		range = kzalloc(sizeof(struct fuse_dax_mapping), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 		ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 		if (!range)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 			goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 		/* TODO: This offset only works if virtio-fs driver is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 		 * having some memory hidden at the beginning. This needs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 		 * better handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 		range->window_offset = i * FUSE_DAX_SZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 		range->length = FUSE_DAX_SZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 		INIT_LIST_HEAD(&range->busy_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 		refcount_set(&range->refcnt, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 		list_add_tail(&range->list, &fcd->free_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	fcd->nr_free_ranges = nr_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 	fcd->nr_ranges = nr_ranges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 	/* Free All allocated elements */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	fuse_free_dax_mem_ranges(&fcd->free_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	struct fuse_conn_dax *fcd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 	if (!dax_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 	fcd = kzalloc(sizeof(*fcd), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 	if (!fcd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 	spin_lock_init(&fcd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	fcd->dev = dax_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 	err = fuse_dax_mem_range_init(fcd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 	if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 		kfree(fcd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 		return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	fc->dax = fcd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 	struct fuse_conn *fc = get_fuse_conn_super(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 	fi->dax = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 	if (fc->dax) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 		fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 		if (!fi->dax)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 		init_rwsem(&fi->dax->sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 		fi->dax->tree = RB_ROOT_CACHED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) static const struct address_space_operations fuse_dax_file_aops  = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 	.writepages	= fuse_dax_writepages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 	.direct_IO	= noop_direct_IO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	.set_page_dirty	= noop_set_page_dirty,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 	.invalidatepage	= noop_invalidatepage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) void fuse_dax_inode_init(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 	struct fuse_conn *fc = get_fuse_conn(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 	if (!fc->dax)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 	inode->i_flags |= S_DAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 	inode->i_data.a_ops = &fuse_dax_file_aops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 	if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 		pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 			map_alignment, FUSE_DAX_SZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) void fuse_dax_cancel_work(struct fuse_conn *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 	struct fuse_conn_dax *fcd = fc->dax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 	if (fcd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 		cancel_delayed_work_sync(&fcd->free_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) EXPORT_SYMBOL_GPL(fuse_dax_cancel_work);