^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* Copyright (C) 2009 Red Hat, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2006 Rusty Russell IBM Corporation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Author: Michael S. Tsirkin <mst@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Inspiration, some code, and most witty comments come from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Documentation/virtual/lguest/lguest.c, by Rusty Russell
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Generic code for virtio server in host kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/eventfd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/vhost.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/miscdevice.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/mutex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/poll.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/sort.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <linux/interval_tree_generic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <linux/nospec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/kcov.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include "vhost.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) static ushort max_mem_regions = 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) module_param(max_mem_regions, ushort, 0444);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) MODULE_PARM_DESC(max_mem_regions,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) "Maximum number of memory regions in memory map. (default: 64)");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) static int max_iotlb_entries = 2048;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) module_param(max_iotlb_entries, int, 0444);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) MODULE_PARM_DESC(max_iotlb_entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) "Maximum number of iotlb entries. (default: 2048)");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) VHOST_MEMORY_F_LOG = 0x1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) vq->user_be = !virtio_legacy_is_little_endian();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) static void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) vq->user_be = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) static void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) vq->user_be = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) struct vhost_vring_state s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) if (vq->private_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) if (copy_from_user(&s, argp, sizeof(s)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) if (s.num != VHOST_VRING_LITTLE_ENDIAN &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) s.num != VHOST_VRING_BIG_ENDIAN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) if (s.num == VHOST_VRING_BIG_ENDIAN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) vhost_enable_cross_endian_big(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) vhost_enable_cross_endian_little(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) int __user *argp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) struct vhost_vring_state s = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) .index = idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) .num = vq->user_be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) if (copy_to_user(argp, &s, sizeof(s)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) static void vhost_init_is_le(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) /* Note for legacy virtio: user_be is initialized at reset time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * according to the host endianness. If userspace does not set an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * explicit endianness, the default behavior is native endian, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * expected by legacy virtio.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) return -ENOIOCTLCMD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) int __user *argp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) return -ENOIOCTLCMD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) static void vhost_init_is_le(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) || virtio_legacy_is_little_endian();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) static void vhost_reset_is_le(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) vhost_init_is_le(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) struct vhost_flush_struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) struct vhost_work work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) struct completion wait_event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) static void vhost_flush_work(struct vhost_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) struct vhost_flush_struct *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) s = container_of(work, struct vhost_flush_struct, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) complete(&s->wait_event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) poll_table *pt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) struct vhost_poll *poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) poll = container_of(pt, struct vhost_poll, table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) poll->wqh = wqh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) add_wait_queue(wqh, &poll->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) struct vhost_work *work = &poll->work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) if (!(key_to_poll(key) & poll->mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) if (!poll->dev->use_worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) work->fn(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) vhost_poll_queue(poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) clear_bit(VHOST_WORK_QUEUED, &work->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) work->fn = fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) EXPORT_SYMBOL_GPL(vhost_work_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) /* Init poll structure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) __poll_t mask, struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) init_poll_funcptr(&poll->table, vhost_poll_func);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) poll->mask = mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) poll->dev = dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) poll->wqh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) vhost_work_init(&poll->work, fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) EXPORT_SYMBOL_GPL(vhost_poll_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) /* Start polling a file. We add ourselves to file's wait queue. The caller must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * keep a reference to a file until after vhost_poll_stop is called. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) int vhost_poll_start(struct vhost_poll *poll, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) __poll_t mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) if (poll->wqh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) mask = vfs_poll(file, &poll->table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) if (mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) if (mask & EPOLLERR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) vhost_poll_stop(poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) EXPORT_SYMBOL_GPL(vhost_poll_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) /* Stop polling a file. After this function returns, it becomes safe to drop the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * file reference. You must also flush afterwards. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) void vhost_poll_stop(struct vhost_poll *poll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) if (poll->wqh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) remove_wait_queue(poll->wqh, &poll->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) poll->wqh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) EXPORT_SYMBOL_GPL(vhost_poll_stop);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) struct vhost_flush_struct flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) if (dev->worker) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) init_completion(&flush.wait_event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) vhost_work_init(&flush.work, vhost_flush_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) vhost_work_queue(dev, &flush.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) wait_for_completion(&flush.wait_event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) EXPORT_SYMBOL_GPL(vhost_work_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) /* Flush any work that has been scheduled. When calling this, don't hold any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * locks that are also used by the callback. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) void vhost_poll_flush(struct vhost_poll *poll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) vhost_work_flush(poll->dev, &poll->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) EXPORT_SYMBOL_GPL(vhost_poll_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) if (!dev->worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) /* We can only add the work to the list after we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * sure it was not in the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * test_and_set_bit() implies a memory barrier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) llist_add(&work->node, &dev->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) wake_up_process(dev->worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) EXPORT_SYMBOL_GPL(vhost_work_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) /* A lockless hint for busy polling code to exit the loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) bool vhost_has_work(struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) return !llist_empty(&dev->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) EXPORT_SYMBOL_GPL(vhost_has_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) void vhost_poll_queue(struct vhost_poll *poll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) vhost_work_queue(poll->dev, &poll->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) EXPORT_SYMBOL_GPL(vhost_poll_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) for (j = 0; j < VHOST_NUM_ADDRS; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) vq->meta_iotlb[j] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) static void vhost_vq_meta_reset(struct vhost_dev *d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) for (i = 0; i < d->nvqs; ++i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) __vhost_vq_meta_reset(d->vqs[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) call_ctx->ctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) bool vhost_vq_is_setup(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) return vq->avail && vq->desc && vq->used && vhost_vq_access_ok(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) EXPORT_SYMBOL_GPL(vhost_vq_is_setup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) static void vhost_vq_reset(struct vhost_dev *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) vq->num = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) vq->desc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) vq->avail = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) vq->used = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) vq->last_avail_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) vq->avail_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) vq->last_used_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) vq->signalled_used = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) vq->signalled_used_valid = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) vq->used_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) vq->log_used = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) vq->log_addr = -1ull;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) vq->private_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) vq->acked_features = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) vq->acked_backend_features = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) vq->log_base = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) vq->error_ctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) vq->kick = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) vq->log_ctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) vhost_disable_cross_endian(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) vhost_reset_is_le(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) vq->busyloop_timeout = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) vq->umem = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) vq->iotlb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) vhost_vring_call_reset(&vq->call_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) __vhost_vq_meta_reset(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) static int vhost_worker(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) struct vhost_dev *dev = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) struct vhost_work *work, *work_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) struct llist_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) kthread_use_mm(dev->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) /* mb paired w/ kthread_stop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) set_current_state(TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) if (kthread_should_stop()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) __set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) node = llist_del_all(&dev->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) if (!node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) node = llist_reverse_order(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) /* make sure flag is seen after deletion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) llist_for_each_entry_safe(work, work_next, node, node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) clear_bit(VHOST_WORK_QUEUED, &work->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) __set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) kcov_remote_start_common(dev->kcov_handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) work->fn(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) kcov_remote_stop();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) if (need_resched())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) kthread_unuse_mm(dev->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) kfree(vq->indirect);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) vq->indirect = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) kfree(vq->log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) vq->log = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) kfree(vq->heads);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) vq->heads = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) /* Helper to allocate iovec buffers for all vqs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) struct vhost_virtqueue *vq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) for (i = 0; i < dev->nvqs; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) vq = dev->vqs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) vq->indirect = kmalloc_array(UIO_MAXIOV,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) sizeof(*vq->indirect),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) if (!vq->indirect || !vq->log || !vq->heads)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) goto err_nomem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) err_nomem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) for (; i >= 0; --i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) vhost_vq_free_iovecs(dev->vqs[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) static void vhost_dev_free_iovecs(struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) for (i = 0; i < dev->nvqs; ++i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) vhost_vq_free_iovecs(dev->vqs[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) bool vhost_exceeds_weight(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) int pkts, int total_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) struct vhost_dev *dev = vq->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) if ((dev->byte_weight && total_len >= dev->byte_weight) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) pkts >= dev->weight) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) vhost_poll_queue(&vq->poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) EXPORT_SYMBOL_GPL(vhost_exceeds_weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) static size_t vhost_get_avail_size(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) unsigned int num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) size_t event __maybe_unused =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) return sizeof(*vq->avail) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) sizeof(*vq->avail->ring) * num + event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) static size_t vhost_get_used_size(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) unsigned int num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) size_t event __maybe_unused =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) return sizeof(*vq->used) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) sizeof(*vq->used->ring) * num + event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) static size_t vhost_get_desc_size(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) unsigned int num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) return sizeof(*vq->desc) * num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) void vhost_dev_init(struct vhost_dev *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) struct vhost_virtqueue **vqs, int nvqs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) int iov_limit, int weight, int byte_weight,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) bool use_worker,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) int (*msg_handler)(struct vhost_dev *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) struct vhost_iotlb_msg *msg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) struct vhost_virtqueue *vq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) dev->vqs = vqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) dev->nvqs = nvqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) mutex_init(&dev->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) dev->log_ctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) dev->umem = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) dev->iotlb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) dev->mm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) dev->worker = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) dev->iov_limit = iov_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) dev->weight = weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) dev->byte_weight = byte_weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) dev->use_worker = use_worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) dev->msg_handler = msg_handler;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) init_llist_head(&dev->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) init_waitqueue_head(&dev->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) INIT_LIST_HEAD(&dev->read_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) INIT_LIST_HEAD(&dev->pending_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) spin_lock_init(&dev->iotlb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) for (i = 0; i < dev->nvqs; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) vq = dev->vqs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) vq->log = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) vq->indirect = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) vq->heads = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) vq->dev = dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) mutex_init(&vq->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) vhost_vq_reset(dev, vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) if (vq->handle_kick)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) vhost_poll_init(&vq->poll, vq->handle_kick,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) EPOLLIN, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) EXPORT_SYMBOL_GPL(vhost_dev_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) /* Caller should have device mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) long vhost_dev_check_owner(struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) /* Are you the owner? If not, I don't think you mean to do that */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) return dev->mm == current->mm ? 0 : -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) struct vhost_attach_cgroups_struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) struct vhost_work work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) struct task_struct *owner;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) static void vhost_attach_cgroups_work(struct vhost_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) struct vhost_attach_cgroups_struct *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) s = container_of(work, struct vhost_attach_cgroups_struct, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) s->ret = cgroup_attach_task_all(s->owner, current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) static int vhost_attach_cgroups(struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) struct vhost_attach_cgroups_struct attach;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) attach.owner = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) vhost_work_init(&attach.work, vhost_attach_cgroups_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) vhost_work_queue(dev, &attach.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) vhost_work_flush(dev, &attach.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) return attach.ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) /* Caller should have device mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) bool vhost_dev_has_owner(struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) return dev->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) EXPORT_SYMBOL_GPL(vhost_dev_has_owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) static void vhost_attach_mm(struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) /* No owner, become one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) if (dev->use_worker) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) dev->mm = get_task_mm(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) /* vDPA device does not use worker thead, so there's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) * no need to hold the address space for mm. This help
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) * to avoid deadlock in the case of mmap() which may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) * held the refcnt of the file and depends on release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) * method to remove vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) dev->mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) mmgrab(dev->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) static void vhost_detach_mm(struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) if (!dev->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) if (dev->use_worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) mmput(dev->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) mmdrop(dev->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) dev->mm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) /* Caller should have device mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) long vhost_dev_set_owner(struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) struct task_struct *worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) /* Is there an owner already? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) if (vhost_dev_has_owner(dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) err = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) goto err_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) vhost_attach_mm(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) dev->kcov_handle = kcov_common_handle();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) if (dev->use_worker) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) worker = kthread_create(vhost_worker, dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) "vhost-%d", current->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) if (IS_ERR(worker)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) err = PTR_ERR(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) goto err_worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) dev->worker = worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) wake_up_process(worker); /* avoid contributing to loadavg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) err = vhost_attach_cgroups(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) goto err_cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) err = vhost_dev_alloc_iovecs(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) goto err_cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) err_cgroup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) if (dev->worker) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) kthread_stop(dev->worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) dev->worker = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) err_worker:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) vhost_detach_mm(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) dev->kcov_handle = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) err_mm:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) static struct vhost_iotlb *iotlb_alloc(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) return vhost_iotlb_alloc(max_iotlb_entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) VHOST_IOTLB_FLAG_RETIRE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) struct vhost_iotlb *vhost_dev_reset_owner_prepare(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) return iotlb_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) /* Caller should have device mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) vhost_dev_cleanup(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) dev->umem = umem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) /* We don't need VQ locks below since vhost_dev_cleanup makes sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) * VQs aren't running.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) for (i = 0; i < dev->nvqs; ++i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) dev->vqs[i]->umem = umem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) EXPORT_SYMBOL_GPL(vhost_dev_reset_owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) void vhost_dev_stop(struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) for (i = 0; i < dev->nvqs; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) vhost_poll_stop(&dev->vqs[i]->poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) vhost_poll_flush(&dev->vqs[i]->poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) EXPORT_SYMBOL_GPL(vhost_dev_stop);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) static void vhost_clear_msg(struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) struct vhost_msg_node *node, *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) spin_lock(&dev->iotlb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) list_for_each_entry_safe(node, n, &dev->read_list, node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) list_del(&node->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) kfree(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) list_for_each_entry_safe(node, n, &dev->pending_list, node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) list_del(&node->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) kfree(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) spin_unlock(&dev->iotlb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) void vhost_dev_cleanup(struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) for (i = 0; i < dev->nvqs; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) if (dev->vqs[i]->error_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) eventfd_ctx_put(dev->vqs[i]->error_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) if (dev->vqs[i]->kick)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) fput(dev->vqs[i]->kick);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) if (dev->vqs[i]->call_ctx.ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) vhost_vq_reset(dev, dev->vqs[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) vhost_dev_free_iovecs(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) if (dev->log_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) eventfd_ctx_put(dev->log_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) dev->log_ctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) /* No one will access memory at this point */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) vhost_iotlb_free(dev->umem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) dev->umem = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) vhost_iotlb_free(dev->iotlb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) dev->iotlb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) vhost_clear_msg(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) WARN_ON(!llist_empty(&dev->work_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) if (dev->worker) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) kthread_stop(dev->worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) dev->worker = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) dev->kcov_handle = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) vhost_detach_mm(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) u64 a = addr / VHOST_PAGE_SIZE / 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) /* Make sure 64 bit math will not overflow. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) if (a > ULONG_MAX - (unsigned long)log_base ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) a + (unsigned long)log_base > ULONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) return access_ok(log_base + a,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) /* Make sure 64 bit math will not overflow. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) static bool vhost_overflow(u64 uaddr, u64 size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) if (uaddr > ULONG_MAX || size > ULONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) if (!size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) return uaddr > ULONG_MAX - size + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) /* Caller should have vq mutex and device mutex. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) static bool vq_memory_access_ok(void __user *log_base, struct vhost_iotlb *umem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) int log_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) struct vhost_iotlb_map *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) if (!umem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) list_for_each_entry(map, &umem->list, link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) unsigned long a = map->addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) if (vhost_overflow(map->addr, map->size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) if (!access_ok((void __user *)a, map->size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) else if (log_all && !log_access_ok(log_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) map->start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) map->size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) u64 addr, unsigned int size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) const struct vhost_iotlb_map *map = vq->meta_iotlb[type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) if (!map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) return (void __user *)(uintptr_t)(map->addr + addr - map->start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) /* Can we switch to this memory table? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) /* Caller should have device mutex but not vq mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) static bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) int log_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) for (i = 0; i < d->nvqs; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) bool ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) bool log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) mutex_lock(&d->vqs[i]->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) /* If ring is inactive, will check when it's enabled. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) if (d->vqs[i]->private_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) ok = vq_memory_access_ok(d->vqs[i]->log_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) umem, log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) ok = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) mutex_unlock(&d->vqs[i]->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) if (!ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) struct iovec iov[], int iov_size, int access);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) const void *from, unsigned size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) if (!vq->iotlb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) return __copy_to_user(to, from, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) /* This function should be called after iotlb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) * prefetch, which means we're sure that all vq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) * could be access through iotlb. So -EAGAIN should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) * not happen in this case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) struct iov_iter t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) void __user *uaddr = vhost_vq_meta_fetch(vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) (u64)(uintptr_t)to, size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) VHOST_ADDR_USED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) if (uaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) return __copy_to_user(uaddr, from, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) ARRAY_SIZE(vq->iotlb_iov),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) VHOST_ACCESS_WO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) iov_iter_init(&t, WRITE, vq->iotlb_iov, ret, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) ret = copy_to_iter(from, size, &t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) if (ret == size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) void __user *from, unsigned size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) if (!vq->iotlb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) return __copy_from_user(to, from, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) /* This function should be called after iotlb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) * prefetch, which means we're sure that vq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) * could be access through iotlb. So -EAGAIN should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) * not happen in this case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) void __user *uaddr = vhost_vq_meta_fetch(vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) (u64)(uintptr_t)from, size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) VHOST_ADDR_DESC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) struct iov_iter f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) if (uaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) return __copy_from_user(to, uaddr, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) ARRAY_SIZE(vq->iotlb_iov),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) VHOST_ACCESS_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) vq_err(vq, "IOTLB translation failure: uaddr "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) "%p size 0x%llx\n", from,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) (unsigned long long) size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) iov_iter_init(&f, READ, vq->iotlb_iov, ret, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) ret = copy_from_iter(to, size, &f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) if (ret == size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) void __user *addr, unsigned int size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) ARRAY_SIZE(vq->iotlb_iov),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) VHOST_ACCESS_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) vq_err(vq, "IOTLB translation failure: uaddr "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) "%p size 0x%llx\n", addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) (unsigned long long) size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) if (ret != 1 || vq->iotlb_iov[0].iov_len != size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) vq_err(vq, "Non atomic userspace memory access: uaddr "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) "%p size 0x%llx\n", addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) (unsigned long long) size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) return vq->iotlb_iov[0].iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) /* This function should be called after iotlb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) * prefetch, which means we're sure that vq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) * could be access through iotlb. So -EAGAIN should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) * not happen in this case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) void __user *addr, unsigned int size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) void __user *uaddr = vhost_vq_meta_fetch(vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) (u64)(uintptr_t)addr, size, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) if (uaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) return uaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) return __vhost_get_user_slow(vq, addr, size, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) #define vhost_put_user(vq, x, ptr) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) ({ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) int ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) if (!vq->iotlb) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) ret = __put_user(x, ptr); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) } else { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) __typeof__(ptr) to = \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) (__typeof__(ptr)) __vhost_get_user(vq, ptr, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) sizeof(*ptr), VHOST_ADDR_USED); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) if (to != NULL) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) ret = __put_user(x, to); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) else \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) ret = -EFAULT; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) } \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) })
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) vhost_avail_event(vq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) static inline int vhost_put_used(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) struct vring_used_elem *head, int idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) return vhost_copy_to_user(vq, vq->used->ring + idx, head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) count * sizeof(*head));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) &vq->used->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) &vq->used->idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) #define vhost_get_user(vq, x, ptr, type) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) ({ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) int ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) if (!vq->iotlb) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) ret = __get_user(x, ptr); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) } else { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) __typeof__(ptr) from = \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) (__typeof__(ptr)) __vhost_get_user(vq, ptr, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) sizeof(*ptr), \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) type); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) if (from != NULL) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) ret = __get_user(x, from); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) else \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) ret = -EFAULT; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) } \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) })
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) #define vhost_get_avail(vq, x, ptr) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) #define vhost_get_used(vq, x, ptr) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) static void vhost_dev_lock_vqs(struct vhost_dev *d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) for (i = 0; i < d->nvqs; ++i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) mutex_lock_nested(&d->vqs[i]->mutex, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) static void vhost_dev_unlock_vqs(struct vhost_dev *d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) for (i = 0; i < d->nvqs; ++i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) mutex_unlock(&d->vqs[i]->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) __virtio16 *idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) return vhost_get_avail(vq, *idx, &vq->avail->idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) __virtio16 *head, int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) return vhost_get_avail(vq, *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) &vq->avail->ring[idx & (vq->num - 1)]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) __virtio16 *flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) return vhost_get_avail(vq, *flags, &vq->avail->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) __virtio16 *event)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) return vhost_get_avail(vq, *event, vhost_used_event(vq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) __virtio16 *idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) return vhost_get_used(vq, *idx, &vq->used->idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) static inline int vhost_get_desc(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) struct vring_desc *desc, int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) static void vhost_iotlb_notify_vq(struct vhost_dev *d,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) struct vhost_iotlb_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) struct vhost_msg_node *node, *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) spin_lock(&d->iotlb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) list_for_each_entry_safe(node, n, &d->pending_list, node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) if (msg->iova <= vq_msg->iova &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) msg->iova + msg->size - 1 >= vq_msg->iova &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) vq_msg->type == VHOST_IOTLB_MISS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) vhost_poll_queue(&node->vq->poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) list_del(&node->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) kfree(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) spin_unlock(&d->iotlb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) static bool umem_access_ok(u64 uaddr, u64 size, int access)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) unsigned long a = uaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) /* Make sure 64 bit math will not overflow. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) if (vhost_overflow(uaddr, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) if ((access & VHOST_ACCESS_RO) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) !access_ok((void __user *)a, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) if ((access & VHOST_ACCESS_WO) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) !access_ok((void __user *)a, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) static int vhost_process_iotlb_msg(struct vhost_dev *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) struct vhost_iotlb_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) mutex_lock(&dev->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) vhost_dev_lock_vqs(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) switch (msg->type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) case VHOST_IOTLB_UPDATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) if (!dev->iotlb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) if (!umem_access_ok(msg->uaddr, msg->size, msg->perm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) vhost_vq_meta_reset(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) if (vhost_iotlb_add_range(dev->iotlb, msg->iova,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) msg->iova + msg->size - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) msg->uaddr, msg->perm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) vhost_iotlb_notify_vq(dev, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) case VHOST_IOTLB_INVALIDATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) if (!dev->iotlb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) vhost_vq_meta_reset(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) vhost_iotlb_del_range(dev->iotlb, msg->iova,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) msg->iova + msg->size - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) vhost_dev_unlock_vqs(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) mutex_unlock(&dev->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) struct vhost_iotlb_msg msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) size_t offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) int type, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) ret = copy_from_iter(&type, sizeof(type), from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) if (ret != sizeof(type)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) case VHOST_IOTLB_MSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) /* There maybe a hole after type for V1 message type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) * so skip it here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) offset = offsetof(struct vhost_msg, iotlb) - sizeof(int);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) case VHOST_IOTLB_MSG_V2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) offset = sizeof(__u32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) iov_iter_advance(from, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) ret = copy_from_iter(&msg, sizeof(msg), from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) if (ret != sizeof(msg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) if (dev->msg_handler)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) ret = dev->msg_handler(dev, &msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) ret = vhost_process_iotlb_msg(dev, &msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) ret = (type == VHOST_IOTLB_MSG) ? sizeof(struct vhost_msg) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) sizeof(struct vhost_msg_v2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) EXPORT_SYMBOL(vhost_chr_write_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) __poll_t vhost_chr_poll(struct file *file, struct vhost_dev *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) poll_table *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) __poll_t mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) poll_wait(file, &dev->wait, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) if (!list_empty(&dev->read_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) mask |= EPOLLIN | EPOLLRDNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) return mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) EXPORT_SYMBOL(vhost_chr_poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) int noblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) struct vhost_msg_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) ssize_t ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) unsigned size = sizeof(struct vhost_msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) if (iov_iter_count(to) < size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) if (!noblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) prepare_to_wait(&dev->wait, &wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) node = vhost_dequeue_msg(dev, &dev->read_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) if (node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) if (noblock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) if (signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) ret = -ERESTARTSYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) if (!dev->iotlb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) ret = -EBADFD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) if (!noblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) finish_wait(&dev->wait, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) if (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) struct vhost_iotlb_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) void *start = &node->msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) switch (node->msg.type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) case VHOST_IOTLB_MSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) size = sizeof(node->msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) msg = &node->msg.iotlb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) case VHOST_IOTLB_MSG_V2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) size = sizeof(node->msg_v2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) msg = &node->msg_v2.iotlb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) ret = copy_to_iter(start, size, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) if (ret != size || msg->type != VHOST_IOTLB_MISS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) kfree(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) vhost_enqueue_msg(dev, &dev->pending_list, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) EXPORT_SYMBOL_GPL(vhost_chr_read_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) struct vhost_dev *dev = vq->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) struct vhost_msg_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) struct vhost_iotlb_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) bool v2 = vhost_backend_has_feature(vq, VHOST_BACKEND_F_IOTLB_MSG_V2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) node = vhost_new_msg(vq, v2 ? VHOST_IOTLB_MSG_V2 : VHOST_IOTLB_MSG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) if (!node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) if (v2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) node->msg_v2.type = VHOST_IOTLB_MSG_V2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) msg = &node->msg_v2.iotlb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) msg = &node->msg.iotlb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) msg->type = VHOST_IOTLB_MISS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) msg->iova = iova;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) msg->perm = access;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) vhost_enqueue_msg(dev, &dev->read_list, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) vring_desc_t __user *desc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) vring_avail_t __user *avail,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) vring_used_t __user *used)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) /* If an IOTLB device is present, the vring addresses are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) * GIOVAs. Access validation occurs at prefetch time. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) if (vq->iotlb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) return access_ok(desc, vhost_get_desc_size(vq, num)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) access_ok(avail, vhost_get_avail_size(vq, num)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) access_ok(used, vhost_get_used_size(vq, num));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) const struct vhost_iotlb_map *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) int access = (type == VHOST_ADDR_USED) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) VHOST_ACCESS_WO : VHOST_ACCESS_RO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) if (likely(map->perm & access))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) vq->meta_iotlb[type] = map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) static bool iotlb_access_ok(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) int access, u64 addr, u64 len, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) const struct vhost_iotlb_map *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) struct vhost_iotlb *umem = vq->iotlb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) u64 s = 0, size, orig_addr = addr, last = addr + len - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) if (vhost_vq_meta_fetch(vq, addr, len, type))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) while (len > s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) map = vhost_iotlb_itree_first(umem, addr, last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) if (map == NULL || map->start > addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) vhost_iotlb_miss(vq, addr, access);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) } else if (!(map->perm & access)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) /* Report the possible access violation by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) * request another translation from userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) size = map->size - addr + map->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) if (orig_addr == addr && size >= len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) vhost_vq_meta_update(vq, map, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) s += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) addr += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) int vq_meta_prefetch(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) unsigned int num = vq->num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) if (!vq->iotlb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) return iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->desc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->avail,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) vhost_get_avail_size(vq, num),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) VHOST_ADDR_AVAIL) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) iotlb_access_ok(vq, VHOST_MAP_WO, (u64)(uintptr_t)vq->used,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) vhost_get_used_size(vq, num), VHOST_ADDR_USED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) EXPORT_SYMBOL_GPL(vq_meta_prefetch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) /* Can we log writes? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) /* Caller should have device mutex but not vq mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) bool vhost_log_access_ok(struct vhost_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) return memory_access_ok(dev, dev->umem, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) EXPORT_SYMBOL_GPL(vhost_log_access_ok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) static bool vq_log_used_access_ok(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) void __user *log_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) bool log_used,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) u64 log_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) /* If an IOTLB device is present, log_addr is a GIOVA that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) * will never be logged by log_used(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) if (vq->iotlb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) return !log_used || log_access_ok(log_base, log_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) vhost_get_used_size(vq, vq->num));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) /* Verify access for write logging. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) /* Caller should have vq mutex and device mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) static bool vq_log_access_ok(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) void __user *log_base)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) return vq_memory_access_ok(log_base, vq->umem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) vhost_has_feature(vq, VHOST_F_LOG_ALL)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) /* Can we start vq? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) /* Caller should have vq mutex and device mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) bool vhost_vq_access_ok(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) if (!vq_log_access_ok(vq, vq->log_base))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) struct vhost_memory mem, *newmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) struct vhost_memory_region *region;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) struct vhost_iotlb *newumem, *oldumem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) unsigned long size = offsetof(struct vhost_memory, regions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) if (copy_from_user(&mem, m, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) if (mem.padding)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) if (mem.nregions > max_mem_regions)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) return -E2BIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) newmem = kvzalloc(struct_size(newmem, regions, mem.nregions),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) if (!newmem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) memcpy(newmem, &mem, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) if (copy_from_user(newmem->regions, m->regions,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) flex_array_size(newmem, regions, mem.nregions))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) kvfree(newmem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) newumem = iotlb_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) if (!newumem) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) kvfree(newmem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) for (region = newmem->regions;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) region < newmem->regions + mem.nregions;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) region++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) if (vhost_iotlb_add_range(newumem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) region->guest_phys_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) region->guest_phys_addr +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) region->memory_size - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) region->userspace_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) VHOST_MAP_RW))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) if (!memory_access_ok(d, newumem, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) oldumem = d->umem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) d->umem = newumem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) /* All memory accesses are done under some VQ mutex. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) for (i = 0; i < d->nvqs; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) mutex_lock(&d->vqs[i]->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) d->vqs[i]->umem = newumem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) mutex_unlock(&d->vqs[i]->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) kvfree(newmem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) vhost_iotlb_free(oldumem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) vhost_iotlb_free(newumem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) kvfree(newmem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) static long vhost_vring_set_num(struct vhost_dev *d,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) void __user *argp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) struct vhost_vring_state s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) /* Resizing ring with an active backend?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) * You don't want to do that. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) if (vq->private_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) if (copy_from_user(&s, argp, sizeof s))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) if (!s.num || s.num > 0xffff || (s.num & (s.num - 1)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) vq->num = s.num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) static long vhost_vring_set_addr(struct vhost_dev *d,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) void __user *argp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) struct vhost_vring_addr a;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) if (copy_from_user(&a, argp, sizeof a))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) if (a.flags & ~(0x1 << VHOST_VRING_F_LOG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) /* For 32bit, verify that the top 32bits of the user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) data are set to zero. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) (u64)(unsigned long)a.used_user_addr != a.used_user_addr ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) /* Make sure it's safe to cast pointers to vring types. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) /* We only verify access here if backend is configured.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) * If it is not, we don't as size might not have been setup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) * We will verify when backend is configured. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) if (vq->private_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) if (!vq_access_ok(vq, vq->num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) (void __user *)(unsigned long)a.desc_user_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) (void __user *)(unsigned long)a.avail_user_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) (void __user *)(unsigned long)a.used_user_addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) /* Also validate log access for used ring if enabled. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) if (!vq_log_used_access_ok(vq, vq->log_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) a.flags & (0x1 << VHOST_VRING_F_LOG),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) a.log_guest_addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) vq->desc = (void __user *)(unsigned long)a.desc_user_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) vq->avail = (void __user *)(unsigned long)a.avail_user_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) vq->log_addr = a.log_guest_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) vq->used = (void __user *)(unsigned long)a.used_user_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) static long vhost_vring_set_num_addr(struct vhost_dev *d,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) unsigned int ioctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) void __user *argp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) long r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) mutex_lock(&vq->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) switch (ioctl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) case VHOST_SET_VRING_NUM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) r = vhost_vring_set_num(d, vq, argp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) case VHOST_SET_VRING_ADDR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) r = vhost_vring_set_addr(d, vq, argp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) mutex_unlock(&vq->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) struct file *eventfp, *filep = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) bool pollstart = false, pollstop = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) struct eventfd_ctx *ctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) u32 __user *idxp = argp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) struct vhost_virtqueue *vq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) struct vhost_vring_state s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) struct vhost_vring_file f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) u32 idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) long r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) r = get_user(idx, idxp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) if (idx >= d->nvqs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) return -ENOBUFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) idx = array_index_nospec(idx, d->nvqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) vq = d->vqs[idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) if (ioctl == VHOST_SET_VRING_NUM ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) ioctl == VHOST_SET_VRING_ADDR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) return vhost_vring_set_num_addr(d, vq, ioctl, argp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) mutex_lock(&vq->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) switch (ioctl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) case VHOST_SET_VRING_BASE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) /* Moving base with an active backend?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) * You don't want to do that. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) if (vq->private_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) r = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) if (copy_from_user(&s, argp, sizeof s)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) r = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) if (s.num > 0xffff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) r = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) vq->last_avail_idx = s.num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) /* Forget the cached index value. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) vq->avail_idx = vq->last_avail_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) case VHOST_GET_VRING_BASE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) s.index = idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) s.num = vq->last_avail_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) if (copy_to_user(argp, &s, sizeof s))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) r = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) case VHOST_SET_VRING_KICK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) if (copy_from_user(&f, argp, sizeof f)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) r = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) if (IS_ERR(eventfp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) r = PTR_ERR(eventfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) if (eventfp != vq->kick) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) pollstop = (filep = vq->kick) != NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) pollstart = (vq->kick = eventfp) != NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) filep = eventfp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) case VHOST_SET_VRING_CALL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) if (copy_from_user(&f, argp, sizeof f)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) r = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) if (IS_ERR(ctx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) r = PTR_ERR(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) swap(ctx, vq->call_ctx.ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) case VHOST_SET_VRING_ERR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) if (copy_from_user(&f, argp, sizeof f)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) r = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) if (IS_ERR(ctx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) r = PTR_ERR(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) swap(ctx, vq->error_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) case VHOST_SET_VRING_ENDIAN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) r = vhost_set_vring_endian(vq, argp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) case VHOST_GET_VRING_ENDIAN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) r = vhost_get_vring_endian(vq, idx, argp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) case VHOST_SET_VRING_BUSYLOOP_TIMEOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) if (copy_from_user(&s, argp, sizeof(s))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) r = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) vq->busyloop_timeout = s.num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) case VHOST_GET_VRING_BUSYLOOP_TIMEOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) s.index = idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) s.num = vq->busyloop_timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) if (copy_to_user(argp, &s, sizeof(s)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) r = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) r = -ENOIOCTLCMD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) if (pollstop && vq->handle_kick)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) vhost_poll_stop(&vq->poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) if (!IS_ERR_OR_NULL(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) eventfd_ctx_put(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) if (filep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) fput(filep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) if (pollstart && vq->handle_kick)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) r = vhost_poll_start(&vq->poll, vq->kick);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) mutex_unlock(&vq->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) if (pollstop && vq->handle_kick)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) vhost_poll_flush(&vq->poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) EXPORT_SYMBOL_GPL(vhost_vring_ioctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) struct vhost_iotlb *niotlb, *oiotlb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) niotlb = iotlb_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) if (!niotlb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) oiotlb = d->iotlb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) d->iotlb = niotlb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) for (i = 0; i < d->nvqs; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) struct vhost_virtqueue *vq = d->vqs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) mutex_lock(&vq->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) vq->iotlb = niotlb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) __vhost_vq_meta_reset(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) mutex_unlock(&vq->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) vhost_iotlb_free(oiotlb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) EXPORT_SYMBOL_GPL(vhost_init_device_iotlb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) /* Caller must have device mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) struct eventfd_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) u64 p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) long r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) int i, fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) /* If you are not the owner, you can become one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) if (ioctl == VHOST_SET_OWNER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) r = vhost_dev_set_owner(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) /* You must be the owner to do anything else */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) r = vhost_dev_check_owner(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) switch (ioctl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) case VHOST_SET_MEM_TABLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) r = vhost_set_memory(d, argp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) case VHOST_SET_LOG_BASE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) if (copy_from_user(&p, argp, sizeof p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) r = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) if ((u64)(unsigned long)p != p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) r = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) for (i = 0; i < d->nvqs; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) struct vhost_virtqueue *vq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) void __user *base = (void __user *)(unsigned long)p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) vq = d->vqs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) mutex_lock(&vq->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) /* If ring is inactive, will check when it's enabled. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) if (vq->private_data && !vq_log_access_ok(vq, base))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) r = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) vq->log_base = base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) mutex_unlock(&vq->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) case VHOST_SET_LOG_FD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) r = get_user(fd, (int __user *)argp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) if (IS_ERR(ctx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) r = PTR_ERR(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) swap(ctx, d->log_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) for (i = 0; i < d->nvqs; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) mutex_lock(&d->vqs[i]->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) d->vqs[i]->log_ctx = d->log_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) mutex_unlock(&d->vqs[i]->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) if (ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) eventfd_ctx_put(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) r = -ENOIOCTLCMD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) EXPORT_SYMBOL_GPL(vhost_dev_ioctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) /* TODO: This is really inefficient. We need something like get_user()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) * (instruction directly accesses the data, with an exception table entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) * returning -EFAULT). See Documentation/x86/exception-tables.rst.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) static int set_bit_to_user(int nr, void __user *addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) unsigned long log = (unsigned long)addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) void *base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) int bit = nr + (log % PAGE_SIZE) * 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) r = pin_user_pages_fast(log, 1, FOLL_WRITE, &page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) BUG_ON(r != 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) base = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) set_bit(bit, base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) kunmap_atomic(base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) unpin_user_pages_dirty_lock(&page, 1, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) static int log_write(void __user *log_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) u64 write_address, u64 write_length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) u64 write_page = write_address / VHOST_PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) if (!write_length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) write_length += write_address % VHOST_PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) u64 base = (u64)(unsigned long)log_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) u64 log = base + write_page / 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) int bit = write_page % 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) if ((u64)(unsigned long)log != log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) r = set_bit_to_user(bit, (void __user *)(unsigned long)log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) if (write_length <= VHOST_PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) write_length -= VHOST_PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) write_page += 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) struct vhost_iotlb *umem = vq->umem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) struct vhost_iotlb_map *u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) u64 start, end, l, min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) bool hit = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) while (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) min = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) /* More than one GPAs can be mapped into a single HVA. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) * iterate all possible umems here to be safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) list_for_each_entry(u, &umem->list, link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) if (u->addr > hva - 1 + len ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) u->addr - 1 + u->size < hva)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) start = max(u->addr, hva);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) end = min(u->addr - 1 + u->size, hva - 1 + len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) l = end - start + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) r = log_write(vq->log_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) u->start + start - u->addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) hit = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) min = min(l, min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) if (!hit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) len -= min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) hva += min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) struct iovec *iov = vq->log_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) if (!vq->iotlb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) return log_write(vq->log_base, vq->log_addr + used_offset, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) ret = translate_desc(vq, (uintptr_t)vq->used + used_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) len, iov, 64, VHOST_ACCESS_WO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) for (i = 0; i < ret; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) iov[i].iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) unsigned int log_num, u64 len, struct iovec *iov, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) int i, r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) /* Make sure data written is seen before log. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) if (vq->iotlb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) for (i = 0; i < count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) r = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) iov[i].iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) for (i = 0; i < log_num; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) u64 l = min(log[i].len, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) r = log_write(vq->log_base, log[i].addr, l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) len -= l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) if (!len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) if (vq->log_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) eventfd_signal(vq->log_ctx, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) /* Length written exceeds what we have stored. This is a bug. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) EXPORT_SYMBOL_GPL(vhost_log_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) static int vhost_update_used_flags(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) void __user *used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) if (vhost_put_used_flags(vq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) if (unlikely(vq->log_used)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) /* Make sure the flag is seen before log. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) /* Log used flag write. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) used = &vq->used->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) log_used(vq, (used - (void __user *)vq->used),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) sizeof vq->used->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) if (vq->log_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) eventfd_signal(vq->log_ctx, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) if (vhost_put_avail_event(vq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) if (unlikely(vq->log_used)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) void __user *used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) /* Make sure the event is seen before log. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) /* Log avail event write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) used = vhost_avail_event(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) log_used(vq, (used - (void __user *)vq->used),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) sizeof *vhost_avail_event(vq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) if (vq->log_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) eventfd_signal(vq->log_ctx, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) int vhost_vq_init_access(struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) __virtio16 last_used_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) bool is_le = vq->is_le;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) if (!vq->private_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) vhost_init_is_le(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) r = vhost_update_used_flags(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) vq->signalled_used_valid = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) if (!vq->iotlb &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) !access_ok(&vq->used->idx, sizeof vq->used->idx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) r = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) r = vhost_get_used_idx(vq, &last_used_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) vq_err(vq, "Can't access used idx at %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) &vq->used->idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) vq->is_le = is_le;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) EXPORT_SYMBOL_GPL(vhost_vq_init_access);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) struct iovec iov[], int iov_size, int access)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) const struct vhost_iotlb_map *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) struct vhost_dev *dev = vq->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) struct iovec *_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) u64 s = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) while ((u64)len > s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) u64 size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) if (unlikely(ret >= iov_size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) ret = -ENOBUFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) map = vhost_iotlb_itree_first(umem, addr, addr + len - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) if (map == NULL || map->start > addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) if (umem != dev->iotlb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) } else if (!(map->perm & access)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) ret = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) _iov = iov + ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) size = map->size - addr + map->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) _iov->iov_len = min((u64)len - s, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) _iov->iov_base = (void __user *)(unsigned long)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) (map->addr + addr - map->start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) s += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) addr += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) ++ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) if (ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) vhost_iotlb_miss(vq, addr, access);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) /* Each buffer in the virtqueues is actually a chain of descriptors. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) * function returns the next descriptor in the chain,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) * or -1U if we're at the end. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) static unsigned next_desc(struct vhost_virtqueue *vq, struct vring_desc *desc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) unsigned int next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) /* If this descriptor says it doesn't chain, we're done. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) if (!(desc->flags & cpu_to_vhost16(vq, VRING_DESC_F_NEXT)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) return -1U;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) /* Check they're not leading us off end of descriptors. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) next = vhost16_to_cpu(vq, READ_ONCE(desc->next));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) return next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) static int get_indirect(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) struct iovec iov[], unsigned int iov_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) unsigned int *out_num, unsigned int *in_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) struct vhost_log *log, unsigned int *log_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) struct vring_desc *indirect)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) struct vring_desc desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) unsigned int i = 0, count, found = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) u32 len = vhost32_to_cpu(vq, indirect->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) struct iov_iter from;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) int ret, access;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) /* Sanity check */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) if (unlikely(len % sizeof desc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) vq_err(vq, "Invalid length in indirect descriptor: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) "len 0x%llx not multiple of 0x%zx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) (unsigned long long)len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) sizeof desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) UIO_MAXIOV, VHOST_ACCESS_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) if (unlikely(ret < 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) if (ret != -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) vq_err(vq, "Translation failure %d in indirect.\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) iov_iter_init(&from, READ, vq->indirect, ret, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) count = len / sizeof desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) /* Buffers are chained via a 16 bit next field, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) * we can have at most 2^16 of these. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) if (unlikely(count > USHRT_MAX + 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) vq_err(vq, "Indirect buffer length too big: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) indirect->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) return -E2BIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) unsigned iov_count = *in_num + *out_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) if (unlikely(++found > count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) vq_err(vq, "Loop detected: last one at %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) "indirect size %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) i, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) if (unlikely(desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) access = VHOST_ACCESS_WO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) access = VHOST_ACCESS_RO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) vhost32_to_cpu(vq, desc.len), iov + iov_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) iov_size - iov_count, access);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) if (unlikely(ret < 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) if (ret != -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) vq_err(vq, "Translation failure %d indirect idx %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) ret, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) /* If this is an input descriptor, increment that count. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) if (access == VHOST_ACCESS_WO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) *in_num += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) if (unlikely(log && ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) log[*log_num].addr = vhost64_to_cpu(vq, desc.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) log[*log_num].len = vhost32_to_cpu(vq, desc.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) ++*log_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) /* If it's an output descriptor, they're all supposed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) * to come before any input descriptors. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) if (unlikely(*in_num)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) vq_err(vq, "Indirect descriptor "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) "has out after in: idx %d\n", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) *out_num += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) } while ((i = next_desc(vq, &desc)) != -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) /* This looks in the virtqueue and for the first available buffer, and converts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) * it to an iovec for convenient access. Since descriptors consist of some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) * number of output then some number of input descriptors, it's actually two
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) * iovecs, but we pack them into one and note how many of each there were.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) * This function returns the descriptor number found, or vq->num (which is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) * never a valid descriptor number) if none was found. A negative code is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) * returned on error. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) int vhost_get_vq_desc(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) struct iovec iov[], unsigned int iov_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) unsigned int *out_num, unsigned int *in_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) struct vhost_log *log, unsigned int *log_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) struct vring_desc desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) unsigned int i, head, found = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) u16 last_avail_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) __virtio16 avail_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) __virtio16 ring_head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) int ret, access;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) /* Check it isn't doing very strange things with descriptor numbers. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) last_avail_idx = vq->last_avail_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) if (vq->avail_idx == vq->last_avail_idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) vq_err(vq, "Failed to access avail idx at %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) &vq->avail->idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) vq_err(vq, "Guest moved used index from %u to %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) last_avail_idx, vq->avail_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) /* If there's nothing new since last we looked, return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) * invalid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) if (vq->avail_idx == last_avail_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) return vq->num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) /* Only get avail ring entries after they have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) * exposed by guest.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) /* Grab the next descriptor number they're advertising, and increment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) * the index we've seen. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) vq_err(vq, "Failed to read head: idx %d address %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) last_avail_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) &vq->avail->ring[last_avail_idx % vq->num]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) head = vhost16_to_cpu(vq, ring_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) /* If their number is silly, that's an error. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) if (unlikely(head >= vq->num)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) vq_err(vq, "Guest says index %u > %u is available",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) head, vq->num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) /* When we start there are none of either input nor output. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) *out_num = *in_num = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) if (unlikely(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) *log_num = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) i = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) unsigned iov_count = *in_num + *out_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) if (unlikely(i >= vq->num)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) vq_err(vq, "Desc index is %u > %u, head = %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) i, vq->num, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) if (unlikely(++found > vq->num)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) vq_err(vq, "Loop detected: last one at %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) "vq size %u head %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) i, vq->num, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) ret = vhost_get_desc(vq, &desc, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) i, vq->desc + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) ret = get_indirect(vq, iov, iov_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) out_num, in_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) log, log_num, &desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) if (unlikely(ret < 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) if (ret != -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) vq_err(vq, "Failure detected "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) "in indirect descriptor at idx %d\n", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) access = VHOST_ACCESS_WO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) access = VHOST_ACCESS_RO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) vhost32_to_cpu(vq, desc.len), iov + iov_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) iov_size - iov_count, access);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) if (unlikely(ret < 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) if (ret != -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) vq_err(vq, "Translation failure %d descriptor idx %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) ret, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) if (access == VHOST_ACCESS_WO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) /* If this is an input descriptor,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) * increment that count. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) *in_num += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) if (unlikely(log && ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) log[*log_num].addr = vhost64_to_cpu(vq, desc.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) log[*log_num].len = vhost32_to_cpu(vq, desc.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) ++*log_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) /* If it's an output descriptor, they're all supposed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) * to come before any input descriptors. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) if (unlikely(*in_num)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) vq_err(vq, "Descriptor has out after in: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) "idx %d\n", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) *out_num += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) } while ((i = next_desc(vq, &desc)) != -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) /* On success, increment avail index. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) vq->last_avail_idx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) /* Assume notifications from guest are disabled at this point,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) * if they aren't we would need to update avail_event index. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) return head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) vq->last_avail_idx -= n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) /* After we've used one of their buffers, we tell them about it. We'll then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) * want to notify the guest, using eventfd. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) struct vring_used_elem heads = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) cpu_to_vhost32(vq, head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) cpu_to_vhost32(vq, len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) return vhost_add_used_n(vq, &heads, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) EXPORT_SYMBOL_GPL(vhost_add_used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) static int __vhost_add_used_n(struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) struct vring_used_elem *heads,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) unsigned count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) vring_used_elem_t __user *used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) u16 old, new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) int start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) start = vq->last_used_idx & (vq->num - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) used = vq->used->ring + start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) if (vhost_put_used(vq, heads, start, count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) vq_err(vq, "Failed to write used");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) if (unlikely(vq->log_used)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) /* Make sure data is seen before log. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) /* Log used ring entry write. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) log_used(vq, ((void __user *)used - (void __user *)vq->used),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) count * sizeof *used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) old = vq->last_used_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) new = (vq->last_used_idx += count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) /* If the driver never bothers to signal in a very long while,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) * used index might wrap around. If that happens, invalidate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) * signalled_used index we stored. TODO: make sure driver
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) * signals at least once in 2^16 and remove this. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) vq->signalled_used_valid = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) /* After we've used one of their buffers, we tell them about it. We'll then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) * want to notify the guest, using eventfd. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) unsigned count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) int start, n, r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) start = vq->last_used_idx & (vq->num - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) n = vq->num - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) if (n < count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) r = __vhost_add_used_n(vq, heads, n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) heads += n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) count -= n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) r = __vhost_add_used_n(vq, heads, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) /* Make sure buffer is written before we update index. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) if (vhost_put_used_idx(vq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) vq_err(vq, "Failed to increment used idx");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) if (unlikely(vq->log_used)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) /* Make sure used idx is seen before log. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) /* Log used index update. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) log_used(vq, offsetof(struct vring_used, idx),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) sizeof vq->used->idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) if (vq->log_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) eventfd_signal(vq->log_ctx, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) EXPORT_SYMBOL_GPL(vhost_add_used_n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) __u16 old, new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) __virtio16 event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) bool v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) /* Flush out used index updates. This is paired
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) * with the barrier that the Guest executes when enabling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) * interrupts. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) unlikely(vq->avail_idx == vq->last_avail_idx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) __virtio16 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) if (vhost_get_avail_flags(vq, &flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) vq_err(vq, "Failed to get flags");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) return !(flags & cpu_to_vhost16(vq, VRING_AVAIL_F_NO_INTERRUPT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) old = vq->signalled_used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) v = vq->signalled_used_valid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) new = vq->signalled_used = vq->last_used_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) vq->signalled_used_valid = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) if (unlikely(!v))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) if (vhost_get_used_event(vq, &event)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) vq_err(vq, "Failed to get used event idx");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) return vring_need_event(vhost16_to_cpu(vq, event), new, old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) /* This actually signals the guest, using eventfd. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) /* Signal the Guest tell them we used something up. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) if (vq->call_ctx.ctx && vhost_notify(dev, vq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) eventfd_signal(vq->call_ctx.ctx, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) EXPORT_SYMBOL_GPL(vhost_signal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) /* And here's the combo meal deal. Supersize me! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) void vhost_add_used_and_signal(struct vhost_dev *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) unsigned int head, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) vhost_add_used(vq, head, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) vhost_signal(dev, vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) /* multi-buffer version of vhost_add_used_and_signal */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) void vhost_add_used_and_signal_n(struct vhost_dev *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) struct vhost_virtqueue *vq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) struct vring_used_elem *heads, unsigned count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) vhost_add_used_n(vq, heads, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) vhost_signal(dev, vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) /* return true if we're sure that avaiable ring is empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) __virtio16 avail_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) if (vq->avail_idx != vq->last_avail_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) r = vhost_get_avail_idx(vq, &avail_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) if (unlikely(r))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) return vq->avail_idx == vq->last_avail_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) /* OK, now we need to know about added descriptors. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) __virtio16 avail_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) r = vhost_update_used_flags(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) vq_err(vq, "Failed to enable notification at %p: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) &vq->used->flags, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) r = vhost_update_avail_event(vq, vq->avail_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) vq_err(vq, "Failed to update avail event index at %p: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) vhost_avail_event(vq), r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) /* They could have slipped one in as we were doing that: make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) * sure it's written, then check again. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) r = vhost_get_avail_idx(vq, &avail_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) vq_err(vq, "Failed to check avail idx at %p: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) &vq->avail->idx, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) return vhost16_to_cpu(vq, avail_idx) != vq->avail_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) EXPORT_SYMBOL_GPL(vhost_enable_notify);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) /* We don't need to be notified again. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) vq->used_flags |= VRING_USED_F_NO_NOTIFY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) r = vhost_update_used_flags(vq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) vq_err(vq, "Failed to disable notification at %p: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) &vq->used->flags, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) EXPORT_SYMBOL_GPL(vhost_disable_notify);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) /* Create a new message. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) if (!node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) /* Make sure all padding within the structure is initialized. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) memset(&node->msg, 0, sizeof node->msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) node->vq = vq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) node->msg.type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) return node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) EXPORT_SYMBOL_GPL(vhost_new_msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) void vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) struct vhost_msg_node *node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) spin_lock(&dev->iotlb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) list_add_tail(&node->node, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) spin_unlock(&dev->iotlb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) EXPORT_SYMBOL_GPL(vhost_enqueue_msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) struct list_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) struct vhost_msg_node *node = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) spin_lock(&dev->iotlb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) if (!list_empty(head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) node = list_first_entry(head, struct vhost_msg_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) list_del(&node->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) spin_unlock(&dev->iotlb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) return node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) EXPORT_SYMBOL_GPL(vhost_dequeue_msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) void vhost_set_backend_features(struct vhost_dev *dev, u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) struct vhost_virtqueue *vq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) mutex_lock(&dev->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) for (i = 0; i < dev->nvqs; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) vq = dev->vqs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) mutex_lock(&vq->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) vq->acked_backend_features = features;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) mutex_unlock(&vq->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) mutex_unlock(&dev->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) EXPORT_SYMBOL_GPL(vhost_set_backend_features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) static int __init vhost_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) static void __exit vhost_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) module_init(vhost_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) module_exit(vhost_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) MODULE_VERSION("0.0.1");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) MODULE_LICENSE("GPL v2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) MODULE_AUTHOR("Michael S. Tsirkin");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) MODULE_DESCRIPTION("Host kernel accelerator for virtio");