^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2020 Google, Inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2020 Palmer Dabbelt <palmerdabbelt@google.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/device-mapper.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <uapi/linux/dm-user.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/bio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/mempool.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/miscdevice.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/poll.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/wait.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/workqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #define DM_MSG_PREFIX "user"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define MAX_OUTSTANDING_MESSAGES 128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) static unsigned int daemon_timeout_msec = 4000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) module_param_named(dm_user_daemon_timeout_msec, daemon_timeout_msec, uint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) 0644);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) MODULE_PARM_DESC(dm_user_daemon_timeout_msec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) "IO Timeout in msec if daemon does not process");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * dm-user uses four structures:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * - "struct target", the outermost structure, corresponds to a single device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * mapper target. This contains the set of outstanding BIOs that have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * provided by DM and are not actively being processed by the user, along
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * with a misc device that userspace can open to communicate with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * kernel. Each time userspaces opens the misc device a new channel is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * created.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * - "struct channel", which represents a single active communication channel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * with userspace. Userspace may choose arbitrary read/write sizes to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * when processing messages, channels form these into logical accesses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * When userspace responds to a full message the channel completes the BIO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * and obtains a new message to process from the target.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * - "struct message", which wraps a BIO with the additional information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * required by the kernel to sort out what to do with BIOs when they return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * from userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * - "struct dm_user_message", which is the exact message format that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * userspace sees.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * The hot path contains three distinct operations:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * - user_map(), which is provided a BIO from device mapper that is queued
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * into the target. This allocates and enqueues a new message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * - dev_read(), which dequeues a message, copies it to userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * - dev_write(), which looks up a message (keyed by sequence number) and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * completes the corresponding BIO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * Lock ordering (outer to inner)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * 1) miscdevice's global lock. This is held around dev_open, so it has to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * the outermost lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * 2) target->lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * 3) channel->lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) struct message {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * Messages themselves do not need a lock, they're protected by either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * the target or channel's lock, depending on which can reference them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) struct dm_user_message msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) size_t posn_to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) size_t total_to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) size_t posn_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) size_t total_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) struct list_head from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) struct list_head to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * These are written back from the user. They live in the same spot in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * the message, but we need to either keep the old values around or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * call a bunch more BIO helpers. These are only valid after write has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * adopted the message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) u64 return_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) u64 return_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) struct delayed_work work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) bool delayed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) struct target *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) struct target {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * A target has a single lock, which protects everything in the target
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * (but does not protect the channels associated with a target).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) struct mutex lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * There is only one point at which anything blocks: userspace blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * reading a new message, which is woken up by device mapper providing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * a new BIO to process (or tearing down the target). The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * corresponding write side doesn't block, instead we treat userspace's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * response containing a message that has yet to be mapped as an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * invalid operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) struct wait_queue_head wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * Messages are delivered to userspace in order, but may be returned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * out of order. This allows userspace to schedule IO if it wants to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) mempool_t message_pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) u64 next_seq_to_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) u64 next_seq_to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) struct list_head to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * There is a misc device per target. The name is selected by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * userspace (via a DM create ioctl argument), and each ends up in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * /dev/dm-user/. It looks like a better way to do this may be to have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * a filesystem to manage these, but this was more expedient. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * current mechanism is functional, but does result in an arbitrary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * number of dynamically created misc devices.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) struct miscdevice miscdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * Device mapper's target destructor triggers tearing this all down,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * but we can't actually free until every channel associated with this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * target has been destroyed. Channels each have a reference to their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * target, and there is an additional single reference that corresponds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * to both DM and the misc device (both of which are destroyed by DM).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * In the common case userspace will be asleep waiting for a new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * message when device mapper decides to destroy the target, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * means no new messages will appear. The destroyed flag triggers a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * wakeup, which will end up removing the reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) struct kref references;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) int dm_destroyed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) bool daemon_terminated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) struct channel {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) struct target *target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * A channel has a single lock, which prevents multiple reads (or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * multiple writes) from conflicting with each other.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) struct mutex lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) struct message *cur_to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) struct message *cur_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) ssize_t to_user_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) ssize_t from_user_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * Once a message has been forwarded to userspace on a channel it must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * be responded to on the same channel. This allows us to error out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * the messages that have not yet been responded to by a channel when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * that channel closes, which makes handling errors more reasonable for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * fault-tolerant userspace daemons. It also happens to make avoiding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * shared locks between user_map() and dev_read() a lot easier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * This does preclude a multi-threaded work stealing userspace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * implementation (or at least, force a degree of head-of-line blocking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * on the response path).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) struct list_head from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * Responses from userspace can arrive in arbitrarily small chunks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) * We need some place to buffer one up until we can find the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * corresponding kernel-side message to continue processing, so instead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * of allocating them we just keep one off to the side here. This can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * only ever be pointer to by from_user_cur, and will never have a BIO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) struct message scratch_message_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) static void message_kill(struct message *m, mempool_t *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) m->bio->bi_status = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) bio_endio(m->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) bio_put(m->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) mempool_free(m, pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) static inline bool is_user_space_thread_present(struct target *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) lockdep_assert_held(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) return (kref_read(&t->references) > 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) static void process_delayed_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) struct delayed_work *del_work = to_delayed_work(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) struct message *msg = container_of(del_work, struct message, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) struct target *t = msg->t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) * There is a atleast one thread to process the IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) if (is_user_space_thread_present(t)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * Terminate the IO with an error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) list_del(&msg->to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) pr_err("I/O error: sector %llu: no user-space daemon for %s target\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) msg->bio->bi_iter.bi_sector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) t->miscdev.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) message_kill(msg, &t->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) static void enqueue_delayed_work(struct message *m, bool is_delay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) unsigned long delay = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) m->delayed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) INIT_DELAYED_WORK(&m->work, process_delayed_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * Snapuserd daemon is the user-space process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * which processes IO request from dm-user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * when OTA is applied. Per the current design,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) * when a dm-user target is created, daemon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * attaches to target and starts processing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * the IO's. Daemon is terminated only when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * dm-user target is destroyed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * If for some reason, daemon crashes or terminates early,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * without destroying the dm-user target; then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * there is no mechanism to restart the daemon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * and start processing the IO's from the same target.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * Theoretically, it is possible but that infrastructure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * doesn't exist in the android ecosystem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * Thus, when the daemon terminates, there is no way the IO's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * issued on that target will be processed. Hence,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * we set the delay to 0 and fail the IO's immediately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * On the other hand, when a new dm-user target is created,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * we wait for the daemon to get attached for the first time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) * This primarily happens when init first stage spins up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) * the daemon. At this point, since the snapshot device is mounted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * of a root filesystem, dm-user target may receive IO request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * even though daemon is not fully launched. We don't want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * to fail those IO requests immediately. Thus, we queue these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * requests with a timeout so that daemon is ready to process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * those IO requests. Again, if the daemon fails to launch within
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * the timeout period, then IO's will be failed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) if (is_delay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) delay = msecs_to_jiffies(daemon_timeout_msec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) queue_delayed_work(system_wq, &m->work, delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) static inline struct target *target_from_target(struct dm_target *target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) WARN_ON(target->private == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) return target->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) static inline struct target *target_from_miscdev(struct miscdevice *miscdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) return container_of(miscdev, struct target, miscdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) static inline struct channel *channel_from_file(struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) WARN_ON(file->private_data == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) return file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) static inline struct target *target_from_channel(struct channel *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) WARN_ON(c->target == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) return c->target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) static inline size_t bio_size(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) struct bio_vec bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) struct bvec_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) size_t out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) bio_for_each_segment (bvec, bio, iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) out += bio_iter_len(bio, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) return out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) static inline size_t bio_bytes_needed_to_user(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) switch (bio_op(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) case REQ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) return sizeof(struct dm_user_message) + bio_size(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) case REQ_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) case REQ_OP_FLUSH:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) case REQ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) case REQ_OP_SECURE_ERASE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) case REQ_OP_WRITE_SAME:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) case REQ_OP_WRITE_ZEROES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) return sizeof(struct dm_user_message);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * These ops are not passed to userspace under the assumption that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * they're not going to be particularly useful in that context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) static inline size_t bio_bytes_needed_from_user(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) switch (bio_op(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) case REQ_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) return sizeof(struct dm_user_message) + bio_size(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) case REQ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) case REQ_OP_FLUSH:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) case REQ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) case REQ_OP_SECURE_ERASE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) case REQ_OP_WRITE_SAME:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) case REQ_OP_WRITE_ZEROES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) return sizeof(struct dm_user_message);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) * These ops are not passed to userspace under the assumption that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * they're not going to be particularly useful in that context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) static inline long bio_type_to_user_type(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) switch (bio_op(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) case REQ_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) return DM_USER_REQ_MAP_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) case REQ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) return DM_USER_REQ_MAP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) case REQ_OP_FLUSH:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) return DM_USER_REQ_MAP_FLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) case REQ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) return DM_USER_REQ_MAP_DISCARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) case REQ_OP_SECURE_ERASE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) return DM_USER_REQ_MAP_SECURE_ERASE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) case REQ_OP_WRITE_SAME:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) return DM_USER_REQ_MAP_WRITE_SAME;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) case REQ_OP_WRITE_ZEROES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) return DM_USER_REQ_MAP_WRITE_ZEROES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) * These ops are not passed to userspace under the assumption that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) * they're not going to be particularly useful in that context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) static inline long bio_flags_to_user_flags(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) u64 out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) typeof(bio->bi_opf) opf = bio->bi_opf & ~REQ_OP_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) if (opf & REQ_FAILFAST_DEV) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) opf &= ~REQ_FAILFAST_DEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) out |= DM_USER_REQ_MAP_FLAG_FAILFAST_DEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) if (opf & REQ_FAILFAST_TRANSPORT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) opf &= ~REQ_FAILFAST_TRANSPORT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) out |= DM_USER_REQ_MAP_FLAG_FAILFAST_TRANSPORT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) if (opf & REQ_FAILFAST_DRIVER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) opf &= ~REQ_FAILFAST_DRIVER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) out |= DM_USER_REQ_MAP_FLAG_FAILFAST_DRIVER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) if (opf & REQ_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) opf &= ~REQ_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) out |= DM_USER_REQ_MAP_FLAG_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) if (opf & REQ_META) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) opf &= ~REQ_META;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) out |= DM_USER_REQ_MAP_FLAG_META;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) if (opf & REQ_PRIO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) opf &= ~REQ_PRIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) out |= DM_USER_REQ_MAP_FLAG_PRIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) if (opf & REQ_NOMERGE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) opf &= ~REQ_NOMERGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) out |= DM_USER_REQ_MAP_FLAG_NOMERGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) if (opf & REQ_IDLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) opf &= ~REQ_IDLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) out |= DM_USER_REQ_MAP_FLAG_IDLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) if (opf & REQ_INTEGRITY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) opf &= ~REQ_INTEGRITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) out |= DM_USER_REQ_MAP_FLAG_INTEGRITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) if (opf & REQ_FUA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) opf &= ~REQ_FUA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) out |= DM_USER_REQ_MAP_FLAG_FUA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) if (opf & REQ_PREFLUSH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) opf &= ~REQ_PREFLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) out |= DM_USER_REQ_MAP_FLAG_PREFLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) if (opf & REQ_RAHEAD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) opf &= ~REQ_RAHEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) out |= DM_USER_REQ_MAP_FLAG_RAHEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) if (opf & REQ_BACKGROUND) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) opf &= ~REQ_BACKGROUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) out |= DM_USER_REQ_MAP_FLAG_BACKGROUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) if (opf & REQ_NOWAIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) opf &= ~REQ_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) out |= DM_USER_REQ_MAP_FLAG_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) if (opf & REQ_NOUNMAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) opf &= ~REQ_NOUNMAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) out |= DM_USER_REQ_MAP_FLAG_NOUNMAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) if (unlikely(opf)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) pr_warn("unsupported BIO type %x\n", opf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) WARN_ON(out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) return out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * Not quite what's in blk-map.c, but instead what I thought the functions in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * blk-map did. This one seems more generally useful and I think we could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) * write the blk-map version in terms of this one. The differences are that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) * this has a return value that counts, and blk-map uses the BIO _all iters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) * Neither advance the BIO iter but don't advance the IOV iter, which is a bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) * odd here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) static ssize_t bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) struct bio_vec bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) struct bvec_iter biter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) ssize_t out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) bio_for_each_segment (bvec, bio, biter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) ret = copy_page_from_iter(bvec.bv_page, bvec.bv_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) bvec.bv_len, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * FIXME: I thought that IOV copies had a mechanism for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * terminating early, if for example a signal came in while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) * sleeping waiting for a page to be mapped, but I don't see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) * where that would happen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) WARN_ON(ret < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) out += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) if (!iov_iter_count(iter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) if (ret < bvec.bv_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) return out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) static ssize_t bio_copy_to_iter(struct bio *bio, struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) struct bio_vec bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) struct bvec_iter biter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) ssize_t out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) bio_for_each_segment (bvec, bio, biter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) ret = copy_page_to_iter(bvec.bv_page, bvec.bv_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) bvec.bv_len, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) /* as above */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) WARN_ON(ret < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) out += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) if (!iov_iter_count(iter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) if (ret < bvec.bv_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) return out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) static ssize_t msg_copy_to_iov(struct message *msg, struct iov_iter *to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) ssize_t copied = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) if (!iov_iter_count(to))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) if (msg->posn_to_user < sizeof(msg->msg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) copied = copy_to_iter((char *)(&msg->msg) + msg->posn_to_user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) sizeof(msg->msg) - msg->posn_to_user, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) copied = bio_copy_to_iter(msg->bio, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) if (copied > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) bio_advance(msg->bio, copied);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) if (copied < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) msg->posn_to_user += copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) static ssize_t msg_copy_from_iov(struct message *msg, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) ssize_t copied = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) if (!iov_iter_count(from))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) if (msg->posn_from_user < sizeof(msg->msg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) copied = copy_from_iter(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) (char *)(&msg->msg) + msg->posn_from_user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) sizeof(msg->msg) - msg->posn_from_user, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) copied = bio_copy_from_iter(msg->bio, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) if (copied > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) bio_advance(msg->bio, copied);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) if (copied < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) msg->posn_from_user += copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) static struct message *msg_get_map(struct target *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) struct message *m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) lockdep_assert_held(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) m = mempool_alloc(&t->message_pool, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) m->msg.seq = t->next_seq_to_map++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) INIT_LIST_HEAD(&m->to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) INIT_LIST_HEAD(&m->from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) return m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) static struct message *msg_get_to_user(struct target *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) struct message *m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) lockdep_assert_held(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) if (list_empty(&t->to_user))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) m = list_first_entry(&t->to_user, struct message, to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) list_del(&m->to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) * If the IO was queued to workqueue since there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) * was no daemon to service the IO, then we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) * will have to cancel the delayed work as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) * IO will be processed by this user-space thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) * If the delayed work was already picked up for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) * processing, then wait for it to complete. Note
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) * that the IO will not be terminated by the work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) * queue thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) if (unlikely(m->delayed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) cancel_delayed_work_sync(&m->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) return m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) static struct message *msg_get_from_user(struct channel *c, u64 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) struct message *m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) struct list_head *cur, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) lockdep_assert_held(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) list_for_each_safe (cur, tmp, &c->from_user) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) m = list_entry(cur, struct message, from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) if (m->msg.seq == seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) list_del(&m->from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) return m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) * Returns 0 when there is no work left to do. This must be callable without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) * holding the target lock, as it is part of the waitqueue's check expression.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) * When called without the lock it may spuriously indicate there is remaining
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) * work, but when called with the lock it must be accurate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) int target_poll(struct target *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) return !list_empty(&t->to_user) || t->dm_destroyed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) void target_release(struct kref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) struct target *t = container_of(ref, struct target, references);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) struct list_head *cur, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) * There may be outstanding BIOs that have not yet been given to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) * userspace. At this point there's nothing we can do about them, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) * there are and will never be any channels.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) list_for_each_safe (cur, tmp, &t->to_user) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) struct message *m = list_entry(cur, struct message, to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) if (unlikely(m->delayed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) bool ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) ret = cancel_delayed_work_sync(&m->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) message_kill(m, &t->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) mempool_exit(&t->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) mutex_destroy(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) kfree(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) void target_put(struct target *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) * This both releases a reference to the target and the lock. We leave
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) * it up to the caller to hold the lock, as they probably needed it for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) * something else.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) lockdep_assert_held(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) if (!kref_put(&t->references, target_release)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * User-space thread is getting terminated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * We need to scan the list for all those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * pending IO's which were not processed yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) * and put them back to work-queue for delayed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) * processing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) if (!is_user_space_thread_present(t)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) struct list_head *cur, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) list_for_each_safe(cur, tmp, &t->to_user) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) struct message *m = list_entry(cur,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) struct message,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) if (!m->delayed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) enqueue_delayed_work(m, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) * Daemon attached to this target is terminated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) t->daemon_terminated = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) static struct channel *channel_alloc(struct target *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) struct channel *c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) lockdep_assert_held(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) c = kzalloc(sizeof(*c), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) if (c == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) kref_get(&t->references);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) c->target = t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) c->cur_from_user = &c->scratch_message_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) mutex_init(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) INIT_LIST_HEAD(&c->from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) return c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) void channel_free(struct channel *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) struct list_head *cur, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) lockdep_assert_held(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) * There may be outstanding BIOs that have been given to userspace but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) * have not yet been completed. The channel has been shut down so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) * there's no way to process the rest of those messages, so we just go
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) * ahead and error out the BIOs. Hopefully whatever's on the other end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) * can handle the errors. One could imagine splitting the BIOs and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) * completing as much as we got, but that seems like overkill here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) * Our only other options would be to let the BIO hang around (which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) * seems way worse) or to resubmit it to userspace in the hope there's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) * another channel. I don't really like the idea of submitting a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) * message twice.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) if (c->cur_to_user != NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) message_kill(c->cur_to_user, &c->target->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) if (c->cur_from_user != &c->scratch_message_from_user)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) message_kill(c->cur_from_user, &c->target->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) list_for_each_safe (cur, tmp, &c->from_user)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) message_kill(list_entry(cur, struct message, from_user),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) &c->target->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) mutex_lock(&c->target->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) target_put(c->target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) mutex_unlock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) mutex_destroy(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) kfree(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) static int dev_open(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) struct channel *c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) struct target *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) * This is called by miscdev, which sets private_data to point to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) * struct miscdevice that was opened. The rest of our file operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) * want to refer to the channel that's been opened, so we swap that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) * pointer out with a fresh channel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) * This is called with the miscdev lock held, which is also held while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) * registering/unregistering the miscdev. The miscdev must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) * registered for this to get called, which means there must be an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) * outstanding reference to the target, which means it cannot be freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) * out from under us despite us not holding a reference yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) t = container_of(file->private_data, struct target, miscdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) file->private_data = c = channel_alloc(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) if (c == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) static ssize_t dev_read(struct kiocb *iocb, struct iov_iter *to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) struct channel *c = channel_from_file(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) ssize_t total_processed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) ssize_t processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) mutex_lock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) if (unlikely(c->to_user_error)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) total_processed = c->to_user_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) if (c->cur_to_user == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) struct target *t = target_from_channel(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) while (!target_poll(t)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) int e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) mutex_unlock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) e = wait_event_interruptible(t->wq, target_poll(t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) mutex_lock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) if (unlikely(e != 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) * We haven't processed any bytes in either the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) * BIO or the IOV, so we can just terminate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) * right now. Elsewhere in the kernel handles
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) * restarting the syscall when appropriate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) total_processed = e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) if (unlikely(t->dm_destroyed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) * DM has destroyed this target, so just lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) * the user out. There's really nothing else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) * we can do here. Note that we don't actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) * tear any thing down until userspace has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) * closed the FD, as there may still be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) * outstanding BIOs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) * This is kind of a wacky error code to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) * return. My goal was really just to try and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) * find something that wasn't likely to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) * returned by anything else in the miscdev
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) * path. The message "block device required"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) * seems like a somewhat reasonable thing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) * say when the target has disappeared out from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) * under us, but "not block" isn't sensible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) c->to_user_error = total_processed = -ENOTBLK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) * Ensures that accesses to the message data are not ordered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) * before the remote accesses that produce that message data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) * This pairs with the barrier in user_map(), via the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) * conditional within the while loop above. Also see the lack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) * of barrier in user_dtr(), which is why this can be after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) * destroyed check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) c->cur_to_user = msg_get_to_user(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) WARN_ON(c->cur_to_user == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) processed = msg_copy_to_iov(c->cur_to_user, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) total_processed += processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) WARN_ON(c->cur_to_user->posn_to_user > c->cur_to_user->total_to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) if (c->cur_to_user->posn_to_user == c->cur_to_user->total_to_user) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) struct message *m = c->cur_to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) c->cur_to_user = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) list_add_tail(&m->from_user, &c->from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) cleanup_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) mutex_unlock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) return total_processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) static ssize_t dev_write(struct kiocb *iocb, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) struct channel *c = channel_from_file(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) ssize_t total_processed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) ssize_t processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) mutex_lock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) if (unlikely(c->from_user_error)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) total_processed = c->from_user_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) * cur_from_user can never be NULL. If there's no real message it must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) * point to the scratch space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) WARN_ON(c->cur_from_user == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) if (c->cur_from_user->posn_from_user < sizeof(struct dm_user_message)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) struct message *msg, *old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) processed = msg_copy_from_iov(c->cur_from_user, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) if (processed <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) pr_warn("msg_copy_from_iov() returned %zu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) processed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) c->from_user_error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) total_processed += processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) * In the unlikely event the user has provided us a very short
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) * write, not even big enough to fill a message, just succeed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) * We'll eventually build up enough bytes to do something.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) if (unlikely(c->cur_from_user->posn_from_user <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) sizeof(struct dm_user_message)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) old = c->cur_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) mutex_lock(&c->target->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) msg = msg_get_from_user(c, c->cur_from_user->msg.seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) if (msg == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) pr_info("user provided an invalid messag seq of %llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) old->msg.seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) mutex_unlock(&c->target->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) c->from_user_error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) mutex_unlock(&c->target->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) WARN_ON(old->posn_from_user != sizeof(struct dm_user_message));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) msg->posn_from_user = sizeof(struct dm_user_message);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) msg->return_type = old->msg.type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) msg->return_flags = old->msg.flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) WARN_ON(msg->posn_from_user > msg->total_from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) c->cur_from_user = msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) WARN_ON(old != &c->scratch_message_from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) * Userspace can signal an error for single requests by overwriting the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) * seq field.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) switch (c->cur_from_user->return_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) case DM_USER_RESP_SUCCESS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) c->cur_from_user->bio->bi_status = BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) case DM_USER_RESP_ERROR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) case DM_USER_RESP_UNSUPPORTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) c->cur_from_user->bio->bi_status = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) goto finish_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) * The op was a success as far as userspace is concerned, so process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) * whatever data may come along with it. The user may provide the BIO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) * data in multiple chunks, in which case we don't need to finish the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) * BIO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) processed = msg_copy_from_iov(c->cur_from_user, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) total_processed += processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) if (c->cur_from_user->posn_from_user <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) c->cur_from_user->total_from_user)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) goto cleanup_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) finish_bio:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) * When we set up this message the BIO's size matched the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) * message size, if that's not still the case then something
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) * has gone off the rails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) WARN_ON(bio_size(c->cur_from_user->bio) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) bio_endio(c->cur_from_user->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) bio_put(c->cur_from_user->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) * We don't actually need to take the target lock here, as all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) * we're doing is freeing the message and mempools have their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) * own lock. Each channel has its ows scratch message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) WARN_ON(c->cur_from_user == &c->scratch_message_from_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) mempool_free(c->cur_from_user, &c->target->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) c->scratch_message_from_user.posn_from_user = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) c->cur_from_user = &c->scratch_message_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) cleanup_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) mutex_unlock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) return total_processed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) static int dev_release(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) struct channel *c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) c = channel_from_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) mutex_lock(&c->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) channel_free(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) static const struct file_operations file_operations = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) .owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) .open = dev_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) .llseek = no_llseek,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) .read_iter = dev_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) .write_iter = dev_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) .release = dev_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) static int user_ctr(struct dm_target *ti, unsigned int argc, char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) struct target *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) if (argc != 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) ti->error = "Invalid argument count";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) r = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) goto cleanup_none;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) t = kzalloc(sizeof(*t), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) if (t == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) r = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) goto cleanup_none;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) ti->private = t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) /* Enable more BIO types. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) ti->num_discard_bios = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) ti->discards_supported = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) ti->num_flush_bios = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) ti->flush_supported = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) * We begin with a single reference to the target, which is miscdev's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) * reference. This ensures that the target won't be freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) * until after the miscdev has been unregistered and all extant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) * channels have been closed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) kref_init(&t->references);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) t->daemon_terminated = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) mutex_init(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) init_waitqueue_head(&t->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) INIT_LIST_HEAD(&t->to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) mempool_init_kmalloc_pool(&t->message_pool, MAX_OUTSTANDING_MESSAGES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) sizeof(struct message));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) t->miscdev.minor = MISC_DYNAMIC_MINOR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) t->miscdev.fops = &file_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) t->miscdev.name = kasprintf(GFP_KERNEL, "dm-user/%s", argv[2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) if (t->miscdev.name == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) r = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) goto cleanup_message_pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) * Once the miscdev is registered it can be opened and therefor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) * concurrent references to the channel can happen. Holding the target
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) * lock during misc_register() could deadlock. If registration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) * succeeds then we will not access the target again so we just stick a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) * barrier here, which pairs with taking the target lock everywhere
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) * else the target is accessed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) * I forgot where we ended up on the RCpc/RCsc locks. IIU RCsc locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) * would mean that we could take the target lock earlier and release it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) * here instead of the memory barrier. I'm not sure that's any better,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) * though, and this isn't on a hot path so it probably doesn't matter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) * either way.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) r = misc_register(&t->miscdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) DMERR("Unable to register miscdev %s for dm-user",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) t->miscdev.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) r = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) goto cleanup_misc_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) cleanup_misc_name:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) kfree(t->miscdev.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) cleanup_message_pool:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) mempool_exit(&t->message_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) kfree(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) cleanup_none:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) static void user_dtr(struct dm_target *ti)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) struct target *t = target_from_target(ti);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) * Removes the miscdev. This must be called without the target lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) * held to avoid a possible deadlock because our open implementation is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) * called holding the miscdev lock and must later take the target lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) * There is no race here because only DM can register/unregister the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) * miscdev, and DM ensures that doesn't happen twice. The internal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) * miscdev lock is sufficient to ensure there are no races between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) * deregistering the miscdev and open.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) misc_deregister(&t->miscdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) * We are now free to take the target's lock and drop our reference to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) * the target. There are almost certainly tasks sleeping in read on at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * least one of the channels associated with this target, this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * explicitly wakes them up and terminates the read.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) * No barrier here, as wait/wake ensures that the flag visibility is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) * correct WRT the wake/sleep state of the target tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) t->dm_destroyed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) wake_up_all(&t->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) target_put(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) * Consumes a BIO from device mapper, queueing it up for userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) static int user_map(struct dm_target *ti, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) struct target *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) struct message *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) t = target_from_target(ti);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) * FIXME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) * This seems like a bad idea. Specifically, here we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) * directly on the IO path when we take the target lock, which may also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) * be taken from a user context. The user context doesn't actively
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) * trigger anything that may sleep while holding the lock, but this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) * still seems like a bad idea.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) * The obvious way to fix this would be to use a proper queue, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) * would result in no shared locks between the direct IO path and user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) * tasks. I had a version that did this, but the head-of-line blocking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) * from the circular buffer resulted in us needing a fairly large
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) * allocation in order to avoid situations in which the queue fills up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) * and everything goes off the rails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) * I could jump through a some hoops to avoid a shared lock while still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) * allowing for a large queue, but I'm not actually sure that allowing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) * for very large queues is the right thing to do here. Intuitively it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) * seems better to keep the queues small in here (essentially sized to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) * the user latency for performance reasons only) and rely on returning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) * DM_MAPIO_REQUEUE regularly, as that would give the rest of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) * kernel more information.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) * I'll spend some time trying to figure out what's going on with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) * DM_MAPIO_REQUEUE, but if someone has a better idea of how to fix
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) * this I'm all ears.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) mutex_lock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) * FIXME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) * The assumption here is that there's no benefit to returning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) * DM_MAPIO_KILL as opposed to just erroring out the BIO, but I'm not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) * sure that's actually true -- for example, I could imagine users
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) * expecting that submitted BIOs are unlikely to fail and therefor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) * relying on submission failure to indicate an unsupported type.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) * There's two ways I can think of to fix this:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) * - Add DM arguments that are parsed during the constructor that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) * allow various dm_target flags to be set that indicate the op
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) * types supported by this target. This may make sense for things
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) * like discard, where DM can already transform the BIOs to a form
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) * that's likely to be supported.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) * - Some sort of pre-filter that allows userspace to hook in here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) * and kill BIOs before marking them as submitted. My guess would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) * be that a userspace round trip is a bad idea here, but a BPF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) * call seems resonable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) * My guess is that we'd likely want to do both. The first one is easy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) * and gives DM the proper info, so it seems better. The BPF call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) * seems overly complex for just this, but one could imagine wanting to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) * sometimes return _MAPPED and a BPF filter would be the way to do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) * that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) * For example, in Android we have an in-kernel DM device called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) * "dm-bow" that takes advange of some portion of the space that has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) * been discarded on a device to provide opportunistic block-level
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) * backups. While one could imagine just implementing this entirely in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) * userspace, that would come with an appreciable performance penalty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) * Instead one could keep a BPF program that forwards most accesses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) * directly to the backing block device while informing a userspace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) * daemon of any discarded space and on writes to blocks that are to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) * backed up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) if (unlikely((bio_type_to_user_type(bio) < 0) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) (bio_flags_to_user_flags(bio) < 0))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) return DM_MAPIO_KILL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) entry = msg_get_map(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) if (unlikely(entry == NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) return DM_MAPIO_REQUEUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) bio_get(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) entry->msg.type = bio_type_to_user_type(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) entry->msg.flags = bio_flags_to_user_flags(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) entry->msg.sector = bio->bi_iter.bi_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) entry->msg.len = bio_size(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) entry->bio = bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) entry->posn_to_user = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) entry->total_to_user = bio_bytes_needed_to_user(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) entry->posn_from_user = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) entry->total_from_user = bio_bytes_needed_from_user(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) entry->delayed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) entry->t = t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) /* Pairs with the barrier in dev_read() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) list_add_tail(&entry->to_user, &t->to_user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) * If there is no daemon to process the IO's,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) * queue these messages into a workqueue with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) * a timeout.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) if (!is_user_space_thread_present(t))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) enqueue_delayed_work(entry, !t->daemon_terminated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) wake_up_interruptible(&t->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) mutex_unlock(&t->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) return DM_MAPIO_SUBMITTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) static struct target_type user_target = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) .name = "user",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) .version = { 1, 0, 0 },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) .module = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) .ctr = user_ctr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) .dtr = user_dtr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) .map = user_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) static int __init dm_user_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) r = dm_register_target(&user_target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) DMERR("register failed %d", r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) static void __exit dm_user_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) dm_unregister_target(&user_target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) module_init(dm_user_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) module_exit(dm_user_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) MODULE_AUTHOR("Palmer Dabbelt <palmerdabbelt@google.com>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) MODULE_DESCRIPTION(DM_NAME " target returning blocks from userspace");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) MODULE_LICENSE("GPL");