Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * Copyright (C) 2020 Google, Inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  * Copyright (C) 2020 Palmer Dabbelt <palmerdabbelt@google.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7) #include <linux/device-mapper.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) #include <uapi/linux/dm-user.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) #include <linux/bio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <linux/mempool.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include <linux/miscdevice.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/poll.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/wait.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/workqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #define DM_MSG_PREFIX "user"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #define MAX_OUTSTANDING_MESSAGES 128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) static unsigned int daemon_timeout_msec = 4000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) module_param_named(dm_user_daemon_timeout_msec, daemon_timeout_msec, uint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) 		   0644);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) MODULE_PARM_DESC(dm_user_daemon_timeout_msec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) 		 "IO Timeout in msec if daemon does not process");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31)  * dm-user uses four structures:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33)  *  - "struct target", the outermost structure, corresponds to a single device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34)  *    mapper target.  This contains the set of outstanding BIOs that have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35)  *    provided by DM and are not actively being processed by the user, along
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36)  *    with a misc device that userspace can open to communicate with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37)  *    kernel.  Each time userspaces opens the misc device a new channel is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38)  *    created.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39)  *  - "struct channel", which represents a single active communication channel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40)  *    with userspace.  Userspace may choose arbitrary read/write sizes to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41)  *    when processing messages, channels form these into logical accesses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42)  *    When userspace responds to a full message the channel completes the BIO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43)  *    and obtains a new message to process from the target.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44)  *  - "struct message", which wraps a BIO with the additional information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45)  *    required by the kernel to sort out what to do with BIOs when they return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46)  *    from userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47)  *  - "struct dm_user_message", which is the exact message format that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48)  *    userspace sees.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50)  * The hot path contains three distinct operations:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52)  *  - user_map(), which is provided a BIO from device mapper that is queued
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53)  *    into the target.  This allocates and enqueues a new message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54)  *  - dev_read(), which dequeues a message, copies it to userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55)  *  - dev_write(), which looks up a message (keyed by sequence number) and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56)  *    completes the corresponding BIO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58)  * Lock ordering (outer to inner)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60)  * 1) miscdevice's global lock.  This is held around dev_open, so it has to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61)  *    the outermost lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62)  * 2) target->lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63)  * 3) channel->lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) struct message {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 	 * Messages themselves do not need a lock, they're protected by either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) 	 * the target or channel's lock, depending on which can reference them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 	 * directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 	struct dm_user_message msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 	struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 	size_t posn_to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 	size_t total_to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 	size_t posn_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 	size_t total_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 	struct list_head from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 	struct list_head to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 	 * These are written back from the user.  They live in the same spot in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	 * the message, but we need to either keep the old values around or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 	 * call a bunch more BIO helpers.  These are only valid after write has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 	 * adopted the message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 	u64 return_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	u64 return_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 	struct delayed_work work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	bool delayed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	struct target *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) struct target {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 	 * A target has a single lock, which protects everything in the target
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	 * (but does not protect the channels associated with a target).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	struct mutex lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 	 * There is only one point at which anything blocks: userspace blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 	 * reading a new message, which is woken up by device mapper providing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 	 * a new BIO to process (or tearing down the target).  The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 	 * corresponding write side doesn't block, instead we treat userspace's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	 * response containing a message that has yet to be mapped as an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	 * invalid operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 	struct wait_queue_head wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 	 * Messages are delivered to userspace in order, but may be returned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	 * out of order.  This allows userspace to schedule IO if it wants to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 	mempool_t message_pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	u64 next_seq_to_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	u64 next_seq_to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	struct list_head to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	 * There is a misc device per target.  The name is selected by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	 * userspace (via a DM create ioctl argument), and each ends up in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	 * /dev/dm-user/.  It looks like a better way to do this may be to have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	 * a filesystem to manage these, but this was more expedient.  The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 	 * current mechanism is functional, but does result in an arbitrary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	 * number of dynamically created misc devices.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	struct miscdevice miscdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	 * Device mapper's target destructor triggers tearing this all down,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	 * but we can't actually free until every channel associated with this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 	 * target has been destroyed.  Channels each have a reference to their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 	 * target, and there is an additional single reference that corresponds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 	 * to both DM and the misc device (both of which are destroyed by DM).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 	 * In the common case userspace will be asleep waiting for a new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 	 * message when device mapper decides to destroy the target, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 	 * means no new messages will appear.  The destroyed flag triggers a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	 * wakeup, which will end up removing the reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	struct kref references;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	int dm_destroyed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	bool daemon_terminated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) struct channel {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	struct target *target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	 * A channel has a single lock, which prevents multiple reads (or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 	 * multiple writes) from conflicting with each other.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 	struct mutex lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 	struct message *cur_to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 	struct message *cur_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 	ssize_t to_user_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	ssize_t from_user_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	 * Once a message has been forwarded to userspace on a channel it must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 	 * be responded to on the same channel.  This allows us to error out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 	 * the messages that have not yet been responded to by a channel when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 	 * that channel closes, which makes handling errors more reasonable for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	 * fault-tolerant userspace daemons.  It also happens to make avoiding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	 * shared locks between user_map() and dev_read() a lot easier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 	 * This does preclude a multi-threaded work stealing userspace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 	 * implementation (or at least, force a degree of head-of-line blocking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 	 * on the response path).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 	struct list_head from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 	 * Responses from userspace can arrive in arbitrarily small chunks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 	 * We need some place to buffer one up until we can find the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	 * corresponding kernel-side message to continue processing, so instead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 	 * of allocating them we just keep one off to the side here.  This can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 	 * only ever be pointer to by from_user_cur, and will never have a BIO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 	struct message scratch_message_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) static void message_kill(struct message *m, mempool_t *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 	m->bio->bi_status = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	bio_endio(m->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 	bio_put(m->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	mempool_free(m, pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) static inline bool is_user_space_thread_present(struct target *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 	lockdep_assert_held(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 	return (kref_read(&t->references) > 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) static void process_delayed_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 	struct delayed_work *del_work = to_delayed_work(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 	struct message *msg = container_of(del_work, struct message, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 	struct target *t = msg->t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 	 * There is a atleast one thread to process the IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	if (is_user_space_thread_present(t)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 		mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	 * Terminate the IO with an error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	list_del(&msg->to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 	pr_err("I/O error: sector %llu: no user-space daemon for %s target\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 	       msg->bio->bi_iter.bi_sector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 	       t->miscdev.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 	message_kill(msg, &t->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 	mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) static void enqueue_delayed_work(struct message *m, bool is_delay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 	unsigned long delay = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	m->delayed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 	INIT_DELAYED_WORK(&m->work, process_delayed_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 	 * Snapuserd daemon is the user-space process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	 * which processes IO request from dm-user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 	 * when OTA is applied. Per the current design,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	 * when a dm-user target is created, daemon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 	 * attaches to target and starts processing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	 * the IO's. Daemon is terminated only when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	 * dm-user target is destroyed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	 * If for some reason, daemon crashes or terminates early,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	 * without destroying the dm-user target; then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	 * there is no mechanism to restart the daemon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 	 * and start processing the IO's from the same target.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	 * Theoretically, it is possible but that infrastructure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 	 * doesn't exist in the android ecosystem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 	 * Thus, when the daemon terminates, there is no way the IO's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 	 * issued on that target will be processed. Hence,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 	 * we set the delay to 0 and fail the IO's immediately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 	 * On the other hand, when a new dm-user target is created,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 	 * we wait for the daemon to get attached for the first time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 	 * This primarily happens when init first stage spins up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 	 * the daemon. At this point, since the snapshot device is mounted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 	 * of a root filesystem, dm-user target may receive IO request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 	 * even though daemon is not fully launched. We don't want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 	 * to fail those IO requests immediately. Thus, we queue these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 	 * requests with a timeout so that daemon is ready to process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	 * those IO requests. Again, if the daemon fails to launch within
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 	 * the timeout period, then IO's will be failed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 	if (is_delay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 		delay = msecs_to_jiffies(daemon_timeout_msec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 	queue_delayed_work(system_wq, &m->work, delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) static inline struct target *target_from_target(struct dm_target *target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 	WARN_ON(target->private == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 	return target->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) static inline struct target *target_from_miscdev(struct miscdevice *miscdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	return container_of(miscdev, struct target, miscdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) static inline struct channel *channel_from_file(struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 	WARN_ON(file->private_data == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 	return file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) static inline struct target *target_from_channel(struct channel *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	WARN_ON(c->target == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 	return c->target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) static inline size_t bio_size(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	struct bio_vec bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	struct bvec_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 	size_t out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 	bio_for_each_segment (bvec, bio, iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 		out += bio_iter_len(bio, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 	return out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) static inline size_t bio_bytes_needed_to_user(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	switch (bio_op(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	case REQ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 		return sizeof(struct dm_user_message) + bio_size(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 	case REQ_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 	case REQ_OP_FLUSH:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 	case REQ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 	case REQ_OP_SECURE_ERASE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 	case REQ_OP_WRITE_SAME:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 	case REQ_OP_WRITE_ZEROES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 		return sizeof(struct dm_user_message);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	 * These ops are not passed to userspace under the assumption that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 	 * they're not going to be particularly useful in that context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 		return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) static inline size_t bio_bytes_needed_from_user(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	switch (bio_op(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 	case REQ_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 		return sizeof(struct dm_user_message) + bio_size(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 	case REQ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 	case REQ_OP_FLUSH:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	case REQ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	case REQ_OP_SECURE_ERASE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	case REQ_OP_WRITE_SAME:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	case REQ_OP_WRITE_ZEROES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 		return sizeof(struct dm_user_message);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 	 * These ops are not passed to userspace under the assumption that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 	 * they're not going to be particularly useful in that context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 		return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) static inline long bio_type_to_user_type(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	switch (bio_op(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 	case REQ_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 		return DM_USER_REQ_MAP_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 	case REQ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 		return DM_USER_REQ_MAP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	case REQ_OP_FLUSH:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 		return DM_USER_REQ_MAP_FLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	case REQ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 		return DM_USER_REQ_MAP_DISCARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	case REQ_OP_SECURE_ERASE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 		return DM_USER_REQ_MAP_SECURE_ERASE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	case REQ_OP_WRITE_SAME:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 		return DM_USER_REQ_MAP_WRITE_SAME;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 	case REQ_OP_WRITE_ZEROES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 		return DM_USER_REQ_MAP_WRITE_ZEROES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 	 * These ops are not passed to userspace under the assumption that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 	 * they're not going to be particularly useful in that context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 		return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) static inline long bio_flags_to_user_flags(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	u64 out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 	typeof(bio->bi_opf) opf = bio->bi_opf & ~REQ_OP_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	if (opf & REQ_FAILFAST_DEV) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 		opf &= ~REQ_FAILFAST_DEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 		out |= DM_USER_REQ_MAP_FLAG_FAILFAST_DEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	if (opf & REQ_FAILFAST_TRANSPORT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 		opf &= ~REQ_FAILFAST_TRANSPORT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 		out |= DM_USER_REQ_MAP_FLAG_FAILFAST_TRANSPORT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	if (opf & REQ_FAILFAST_DRIVER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 		opf &= ~REQ_FAILFAST_DRIVER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 		out |= DM_USER_REQ_MAP_FLAG_FAILFAST_DRIVER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	if (opf & REQ_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 		opf &= ~REQ_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 		out |= DM_USER_REQ_MAP_FLAG_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 	if (opf & REQ_META) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 		opf &= ~REQ_META;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 		out |= DM_USER_REQ_MAP_FLAG_META;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 	if (opf & REQ_PRIO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 		opf &= ~REQ_PRIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 		out |= DM_USER_REQ_MAP_FLAG_PRIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	if (opf & REQ_NOMERGE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 		opf &= ~REQ_NOMERGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 		out |= DM_USER_REQ_MAP_FLAG_NOMERGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	if (opf & REQ_IDLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 		opf &= ~REQ_IDLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 		out |= DM_USER_REQ_MAP_FLAG_IDLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	if (opf & REQ_INTEGRITY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 		opf &= ~REQ_INTEGRITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 		out |= DM_USER_REQ_MAP_FLAG_INTEGRITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 	if (opf & REQ_FUA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 		opf &= ~REQ_FUA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 		out |= DM_USER_REQ_MAP_FLAG_FUA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 	if (opf & REQ_PREFLUSH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 		opf &= ~REQ_PREFLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 		out |= DM_USER_REQ_MAP_FLAG_PREFLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 	if (opf & REQ_RAHEAD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 		opf &= ~REQ_RAHEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 		out |= DM_USER_REQ_MAP_FLAG_RAHEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	if (opf & REQ_BACKGROUND) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 		opf &= ~REQ_BACKGROUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 		out |= DM_USER_REQ_MAP_FLAG_BACKGROUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	if (opf & REQ_NOWAIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 		opf &= ~REQ_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 		out |= DM_USER_REQ_MAP_FLAG_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	if (opf & REQ_NOUNMAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 		opf &= ~REQ_NOUNMAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 		out |= DM_USER_REQ_MAP_FLAG_NOUNMAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 	if (unlikely(opf)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 		pr_warn("unsupported BIO type %x\n", opf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 		return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 	WARN_ON(out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	return out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467)  * Not quite what's in blk-map.c, but instead what I thought the functions in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468)  * blk-map did.  This one seems more generally useful and I think we could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469)  * write the blk-map version in terms of this one.  The differences are that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470)  * this has a return value that counts, and blk-map uses the BIO _all iters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471)  * Neither  advance the BIO iter but don't advance the IOV iter, which is a bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472)  * odd here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) static ssize_t bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 	struct bio_vec bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 	struct bvec_iter biter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 	ssize_t out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	bio_for_each_segment (bvec, bio, biter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 		ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 		ret = copy_page_from_iter(bvec.bv_page, bvec.bv_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 					  bvec.bv_len, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 		 * FIXME: I thought that IOV copies had a mechanism for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 		 * terminating early, if for example a signal came in while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 		 * sleeping waiting for a page to be mapped, but I don't see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 		 * where that would happen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 		WARN_ON(ret < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 		out += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 		if (!iov_iter_count(iter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 		if (ret < bvec.bv_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 	return out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) static ssize_t bio_copy_to_iter(struct bio *bio, struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	struct bio_vec bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	struct bvec_iter biter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	ssize_t out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	bio_for_each_segment (bvec, bio, biter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 		ret = copy_page_to_iter(bvec.bv_page, bvec.bv_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 					bvec.bv_len, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 		/* as above */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 		WARN_ON(ret < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 		out += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 		if (!iov_iter_count(iter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 		if (ret < bvec.bv_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	return out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) static ssize_t msg_copy_to_iov(struct message *msg, struct iov_iter *to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 	ssize_t copied = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 	if (!iov_iter_count(to))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 	if (msg->posn_to_user < sizeof(msg->msg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 		copied = copy_to_iter((char *)(&msg->msg) + msg->posn_to_user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 				      sizeof(msg->msg) - msg->posn_to_user, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 		copied = bio_copy_to_iter(msg->bio, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 		if (copied > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 			bio_advance(msg->bio, copied);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 	if (copied < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 		return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 	msg->posn_to_user += copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) static ssize_t msg_copy_from_iov(struct message *msg, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 	ssize_t copied = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 	if (!iov_iter_count(from))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 	if (msg->posn_from_user < sizeof(msg->msg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 		copied = copy_from_iter(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 			(char *)(&msg->msg) + msg->posn_from_user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 			sizeof(msg->msg) - msg->posn_from_user, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 		copied = bio_copy_from_iter(msg->bio, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 		if (copied > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 			bio_advance(msg->bio, copied);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 	if (copied < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 		return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 	msg->posn_from_user += copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) static struct message *msg_get_map(struct target *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 	struct message *m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	lockdep_assert_held(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 	m = mempool_alloc(&t->message_pool, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	m->msg.seq = t->next_seq_to_map++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	INIT_LIST_HEAD(&m->to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 	INIT_LIST_HEAD(&m->from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	return m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) static struct message *msg_get_to_user(struct target *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 	struct message *m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 	lockdep_assert_held(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 	if (list_empty(&t->to_user))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 	m = list_first_entry(&t->to_user, struct message, to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 	list_del(&m->to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 	 * If the IO was queued to workqueue since there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	 * was no daemon to service the IO, then we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 	 * will have to cancel the delayed work as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 	 * IO will be processed by this user-space thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	 * If the delayed work was already picked up for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	 * processing, then wait for it to complete. Note
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	 * that the IO will not be terminated by the work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	 * queue thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	if (unlikely(m->delayed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 		mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 		cancel_delayed_work_sync(&m->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 		mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	return m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) static struct message *msg_get_from_user(struct channel *c, u64 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	struct message *m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	struct list_head *cur, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	lockdep_assert_held(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	list_for_each_safe (cur, tmp, &c->from_user) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 		m = list_entry(cur, struct message, from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 		if (m->msg.seq == seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 			list_del(&m->from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 			return m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642)  * Returns 0 when there is no work left to do.  This must be callable without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643)  * holding the target lock, as it is part of the waitqueue's check expression.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644)  * When called without the lock it may spuriously indicate there is remaining
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645)  * work, but when called with the lock it must be accurate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) int target_poll(struct target *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 	return !list_empty(&t->to_user) || t->dm_destroyed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) void target_release(struct kref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 	struct target *t = container_of(ref, struct target, references);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 	struct list_head *cur, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 	 * There may be outstanding BIOs that have not yet been given to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	 * userspace.  At this point there's nothing we can do about them, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 	 * there are and will never be any channels.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 	list_for_each_safe (cur, tmp, &t->to_user) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 		struct message *m = list_entry(cur, struct message, to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 		if (unlikely(m->delayed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 			bool ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 			mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 			ret = cancel_delayed_work_sync(&m->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 			mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 			if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 		message_kill(m, &t->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 	mempool_exit(&t->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 	mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 	mutex_destroy(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 	kfree(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) void target_put(struct target *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	 * This both releases a reference to the target and the lock.  We leave
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	 * it up to the caller to hold the lock, as they probably needed it for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 	 * something else.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 	lockdep_assert_held(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 	if (!kref_put(&t->references, target_release)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 		 * User-space thread is getting terminated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 		 * We need to scan the list for all those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 		 * pending IO's which were not processed yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 		 * and put them back to work-queue for delayed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 		 * processing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 		if (!is_user_space_thread_present(t)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 			struct list_head *cur, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 			list_for_each_safe(cur, tmp, &t->to_user) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 				struct message *m = list_entry(cur,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 							       struct message,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 							       to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 				if (!m->delayed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 					enqueue_delayed_work(m, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 			 * Daemon attached to this target is terminated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 			t->daemon_terminated = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 		mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) static struct channel *channel_alloc(struct target *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 	struct channel *c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 	lockdep_assert_held(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 	c = kzalloc(sizeof(*c), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 	if (c == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 	kref_get(&t->references);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	c->target = t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	c->cur_from_user = &c->scratch_message_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 	mutex_init(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	INIT_LIST_HEAD(&c->from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 	return c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) void channel_free(struct channel *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	struct list_head *cur, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 	lockdep_assert_held(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 	 * There may be outstanding BIOs that have been given to userspace but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	 * have not yet been completed.  The channel has been shut down so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 	 * there's no way to process the rest of those messages, so we just go
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	 * ahead and error out the BIOs.  Hopefully whatever's on the other end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 	 * can handle the errors.  One could imagine splitting the BIOs and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 	 * completing as much as we got, but that seems like overkill here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 	 * Our only other options would be to let the BIO hang around (which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 	 * seems way worse) or to resubmit it to userspace in the hope there's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 	 * another channel.  I don't really like the idea of submitting a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	 * message twice.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	if (c->cur_to_user != NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 		message_kill(c->cur_to_user, &c->target->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	if (c->cur_from_user != &c->scratch_message_from_user)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 		message_kill(c->cur_from_user, &c->target->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	list_for_each_safe (cur, tmp, &c->from_user)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 		message_kill(list_entry(cur, struct message, from_user),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 			     &c->target->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 	mutex_lock(&c->target->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	target_put(c->target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 	mutex_unlock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 	mutex_destroy(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	kfree(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) static int dev_open(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 	struct channel *c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 	struct target *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 	 * This is called by miscdev, which sets private_data to point to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	 * struct miscdevice that was opened.  The rest of our file operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 	 * want to refer to the channel that's been opened, so we swap that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 	 * pointer out with a fresh channel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 	 * This is called with the miscdev lock held, which is also held while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 	 * registering/unregistering the miscdev.  The miscdev must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 	 * registered for this to get called, which means there must be an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	 * outstanding reference to the target, which means it cannot be freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	 * out from under us despite us not holding a reference yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	t = container_of(file->private_data, struct target, miscdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 	mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 	file->private_data = c = channel_alloc(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	if (c == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 		mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 	mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) static ssize_t dev_read(struct kiocb *iocb, struct iov_iter *to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	struct channel *c = channel_from_file(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 	ssize_t total_processed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 	ssize_t processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 	mutex_lock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 	if (unlikely(c->to_user_error)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 		total_processed = c->to_user_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 		goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 	if (c->cur_to_user == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 		struct target *t = target_from_channel(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 		mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 		while (!target_poll(t)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 			int e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 			mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 			mutex_unlock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 			e = wait_event_interruptible(t->wq, target_poll(t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 			mutex_lock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 			mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 			if (unlikely(e != 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 				 * We haven't processed any bytes in either the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 				 * BIO or the IOV, so we can just terminate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 				 * right now.  Elsewhere in the kernel handles
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 				 * restarting the syscall when appropriate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 				total_processed = e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 				mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 				goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 		if (unlikely(t->dm_destroyed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 			 * DM has destroyed this target, so just lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 			 * the user out.  There's really nothing else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 			 * we can do here.  Note that we don't actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 			 * tear any thing down until userspace has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 			 * closed the FD, as there may still be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 			 * outstanding BIOs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 			 * This is kind of a wacky error code to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 			 * return.  My goal was really just to try and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 			 * find something that wasn't likely to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 			 * returned by anything else in the miscdev
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 			 * path.  The message "block device required"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 			 * seems like a somewhat reasonable thing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 			 * say when the target has disappeared out from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 			 * under us, but "not block" isn't sensible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 			c->to_user_error = total_processed = -ENOTBLK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 			mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 			goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 		 * Ensures that accesses to the message data are not ordered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 		 * before the remote accesses that produce that message data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 		 * This pairs with the barrier in user_map(), via the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 		 * conditional within the while loop above. Also see the lack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 		 * of barrier in user_dtr(), which is why this can be after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 		 * destroyed check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 		smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 		c->cur_to_user = msg_get_to_user(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 		WARN_ON(c->cur_to_user == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 		mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 	processed = msg_copy_to_iov(c->cur_to_user, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 	total_processed += processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 	WARN_ON(c->cur_to_user->posn_to_user > c->cur_to_user->total_to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 	if (c->cur_to_user->posn_to_user == c->cur_to_user->total_to_user) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		struct message *m = c->cur_to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 		c->cur_to_user = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 		list_add_tail(&m->from_user, &c->from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) cleanup_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	mutex_unlock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	return total_processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) static ssize_t dev_write(struct kiocb *iocb, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 	struct channel *c = channel_from_file(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	ssize_t total_processed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 	ssize_t processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	mutex_lock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	if (unlikely(c->from_user_error)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 		total_processed = c->from_user_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 		goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	 * cur_from_user can never be NULL.  If there's no real message it must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	 * point to the scratch space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	WARN_ON(c->cur_from_user == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	if (c->cur_from_user->posn_from_user < sizeof(struct dm_user_message)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 		struct message *msg, *old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 		processed = msg_copy_from_iov(c->cur_from_user, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 		if (processed <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 			pr_warn("msg_copy_from_iov() returned %zu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 				processed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 			c->from_user_error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 			goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 		total_processed += processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 		 * In the unlikely event the user has provided us a very short
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 		 * write, not even big enough to fill a message, just succeed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 		 * We'll eventually build up enough bytes to do something.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 		if (unlikely(c->cur_from_user->posn_from_user <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 			     sizeof(struct dm_user_message)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 			goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 		old = c->cur_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 		mutex_lock(&c->target->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 		msg = msg_get_from_user(c, c->cur_from_user->msg.seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 		if (msg == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 			pr_info("user provided an invalid messag seq of %llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 				old->msg.seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 			mutex_unlock(&c->target->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 			c->from_user_error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 			goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 		mutex_unlock(&c->target->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 		WARN_ON(old->posn_from_user != sizeof(struct dm_user_message));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 		msg->posn_from_user = sizeof(struct dm_user_message);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 		msg->return_type = old->msg.type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 		msg->return_flags = old->msg.flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 		WARN_ON(msg->posn_from_user > msg->total_from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 		c->cur_from_user = msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 		WARN_ON(old != &c->scratch_message_from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 	 * Userspace can signal an error for single requests by overwriting the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 	 * seq field.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 	switch (c->cur_from_user->return_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 	case DM_USER_RESP_SUCCESS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 		c->cur_from_user->bio->bi_status = BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	case DM_USER_RESP_ERROR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 	case DM_USER_RESP_UNSUPPORTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 		c->cur_from_user->bio->bi_status = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 		goto finish_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 	 * The op was a success as far as userspace is concerned, so process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 	 * whatever data may come along with it.  The user may provide the BIO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 	 * data in multiple chunks, in which case we don't need to finish the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 	 * BIO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 	processed = msg_copy_from_iov(c->cur_from_user, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 	total_processed += processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 	if (c->cur_from_user->posn_from_user <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	    c->cur_from_user->total_from_user)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 		goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) finish_bio:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 	 * When we set up this message the BIO's size matched the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 	 * message size, if that's not still the case then something
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 	 * has gone off the rails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 	WARN_ON(bio_size(c->cur_from_user->bio) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	bio_endio(c->cur_from_user->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 	bio_put(c->cur_from_user->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 	 * We don't actually need to take the target lock here, as all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 	 * we're doing is freeing the message and mempools have their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 	 * own lock.  Each channel has its ows scratch message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 	WARN_ON(c->cur_from_user == &c->scratch_message_from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	mempool_free(c->cur_from_user, &c->target->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	c->scratch_message_from_user.posn_from_user = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 	c->cur_from_user = &c->scratch_message_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) cleanup_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 	mutex_unlock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 	return total_processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) static int dev_release(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 	struct channel *c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 	c = channel_from_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	mutex_lock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 	channel_free(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) static const struct file_operations file_operations = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	.owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 	.open = dev_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 	.llseek = no_llseek,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	.read_iter = dev_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 	.write_iter = dev_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 	.release = dev_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) static int user_ctr(struct dm_target *ti, unsigned int argc, char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 	struct target *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 	int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 	if (argc != 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 		ti->error = "Invalid argument count";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 		r = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 		goto cleanup_none;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	t = kzalloc(sizeof(*t), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 	if (t == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 		r = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 		goto cleanup_none;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 	ti->private = t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 	/* Enable more BIO types. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 	ti->num_discard_bios = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 	ti->discards_supported = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 	ti->num_flush_bios = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	ti->flush_supported = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	 * We begin with a single reference to the target, which is miscdev's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 	 * reference.  This ensures that the target won't be freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	 * until after the miscdev has been unregistered and all extant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 	 * channels have been closed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 	kref_init(&t->references);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 	t->daemon_terminated = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	mutex_init(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	init_waitqueue_head(&t->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 	INIT_LIST_HEAD(&t->to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	mempool_init_kmalloc_pool(&t->message_pool, MAX_OUTSTANDING_MESSAGES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 				  sizeof(struct message));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	t->miscdev.minor = MISC_DYNAMIC_MINOR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	t->miscdev.fops = &file_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	t->miscdev.name = kasprintf(GFP_KERNEL, "dm-user/%s", argv[2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 	if (t->miscdev.name == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 		r = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 		goto cleanup_message_pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	 * Once the miscdev is registered it can be opened and therefor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 	 * concurrent references to the channel can happen.  Holding the target
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 	 * lock during misc_register() could deadlock.  If registration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 	 * succeeds then we will not access the target again so we just stick a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 	 * barrier here, which pairs with taking the target lock everywhere
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 	 * else the target is accessed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 	 * I forgot where we ended up on the RCpc/RCsc locks.  IIU RCsc locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 	 * would mean that we could take the target lock earlier and release it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 	 * here instead of the memory barrier.  I'm not sure that's any better,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 	 * though, and this isn't on a hot path so it probably doesn't matter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 	 * either way.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	r = misc_register(&t->miscdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 		DMERR("Unable to register miscdev %s for dm-user",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 		      t->miscdev.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 		r = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 		goto cleanup_misc_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) cleanup_misc_name:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 	kfree(t->miscdev.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) cleanup_message_pool:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 	mempool_exit(&t->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 	kfree(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) cleanup_none:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 	return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) static void user_dtr(struct dm_target *ti)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 	struct target *t = target_from_target(ti);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 	 * Removes the miscdev.  This must be called without the target lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 	 * held to avoid a possible deadlock because our open implementation is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	 * called holding the miscdev lock and must later take the target lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 	 * There is no race here because only DM can register/unregister the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	 * miscdev, and DM ensures that doesn't happen twice.  The internal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	 * miscdev lock is sufficient to ensure there are no races between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 	 * deregistering the miscdev and open.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 	misc_deregister(&t->miscdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 	 * We are now free to take the target's lock and drop our reference to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 	 * the target.  There are almost certainly tasks sleeping in read on at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 	 * least one of the channels associated with this target, this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	 * explicitly wakes them up and terminates the read.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	 * No barrier here, as wait/wake ensures that the flag visibility is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	 * correct WRT the wake/sleep state of the target tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	t->dm_destroyed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 	wake_up_all(&t->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	target_put(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)  * Consumes a BIO from device mapper, queueing it up for userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) static int user_map(struct dm_target *ti, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 	struct target *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	struct message *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	t = target_from_target(ti);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 	 * FIXME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 	 * This seems like a bad idea.  Specifically, here we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 	 * directly on the IO path when we take the target lock, which may also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 	 * be taken from a user context.  The user context doesn't actively
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 	 * trigger anything that may sleep while holding the lock, but this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 	 * still seems like a bad idea.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 	 * The obvious way to fix this would be to use a proper queue, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	 * would result in no shared locks between the direct IO path and user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	 * tasks.  I had a version that did this, but the head-of-line blocking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 	 * from the circular buffer resulted in us needing a fairly large
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 	 * allocation in order to avoid situations in which the queue fills up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 	 * and everything goes off the rails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	 * I could jump through a some hoops to avoid a shared lock while still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 	 * allowing for a large queue, but I'm not actually sure that allowing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	 * for very large queues is the right thing to do here.  Intuitively it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	 * seems better to keep the queues small in here (essentially sized to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	 * the user latency for performance reasons only) and rely on returning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	 * DM_MAPIO_REQUEUE regularly, as that would give the rest of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	 * kernel more information.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	 * I'll spend some time trying to figure out what's going on with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 	 * DM_MAPIO_REQUEUE, but if someone has a better idea of how to fix
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	 * this I'm all ears.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 	mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	 * FIXME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	 * The assumption here is that there's no benefit to returning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	 * DM_MAPIO_KILL as opposed to just erroring out the BIO, but I'm not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 	 * sure that's actually true -- for example, I could imagine users
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 	 * expecting that submitted BIOs are unlikely to fail and therefor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 	 * relying on submission failure to indicate an unsupported type.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 	 * There's two ways I can think of to fix this:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	 *   - Add DM arguments that are parsed during the constructor that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 	 *     allow various dm_target flags to be set that indicate the op
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	 *     types supported by this target.  This may make sense for things
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	 *     like discard, where DM can already transform the BIOs to a form
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 	 *     that's likely to be supported.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	 *   - Some sort of pre-filter that allows userspace to hook in here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	 *     and kill BIOs before marking them as submitted.  My guess would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 	 *     be that a userspace round trip is a bad idea here, but a BPF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	 *     call seems resonable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	 * My guess is that we'd likely want to do both.  The first one is easy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	 * and gives DM the proper info, so it seems better.  The BPF call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	 * seems overly complex for just this, but one could imagine wanting to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	 * sometimes return _MAPPED and a BPF filter would be the way to do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 	 * that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 	 * For example, in Android we have an in-kernel DM device called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	 * "dm-bow" that takes advange of some portion of the space that has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	 * been discarded on a device to provide opportunistic block-level
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	 * backups.  While one could imagine just implementing this entirely in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 	 * userspace, that would come with an appreciable performance penalty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 	 * Instead one could keep a BPF program that forwards most accesses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 	 * directly to the backing block device while informing a userspace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	 * daemon of any discarded space and on writes to blocks that are to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	 * backed up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 	if (unlikely((bio_type_to_user_type(bio) < 0) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 		     (bio_flags_to_user_flags(bio) < 0))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 		mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 		return DM_MAPIO_KILL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 	entry = msg_get_map(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 	if (unlikely(entry == NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 		mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 		return DM_MAPIO_REQUEUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 	bio_get(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 	entry->msg.type = bio_type_to_user_type(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 	entry->msg.flags = bio_flags_to_user_flags(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	entry->msg.sector = bio->bi_iter.bi_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	entry->msg.len = bio_size(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	entry->bio = bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 	entry->posn_to_user = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	entry->total_to_user = bio_bytes_needed_to_user(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 	entry->posn_from_user = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 	entry->total_from_user = bio_bytes_needed_from_user(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	entry->delayed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 	entry->t = t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	/* Pairs with the barrier in dev_read() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 	smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	list_add_tail(&entry->to_user, &t->to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	 * If there is no daemon to process the IO's,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 	 * queue these messages into a workqueue with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 	 * a timeout.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	if (!is_user_space_thread_present(t))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 		enqueue_delayed_work(entry, !t->daemon_terminated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 	wake_up_interruptible(&t->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 	mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	return DM_MAPIO_SUBMITTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) static struct target_type user_target = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 	.name = "user",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	.version = { 1, 0, 0 },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	.module = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	.ctr = user_ctr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 	.dtr = user_dtr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 	.map = user_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) static int __init dm_user_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 	int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 	r = dm_register_target(&user_target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 		DMERR("register failed %d", r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 		goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 	return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) static void __exit dm_user_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 	dm_unregister_target(&user_target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) module_init(dm_user_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) module_exit(dm_user_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) MODULE_AUTHOR("Palmer Dabbelt <palmerdabbelt@google.com>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) MODULE_DESCRIPTION(DM_NAME " target returning blocks from userspace");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) MODULE_LICENSE("GPL");