^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * VFIO generic eventfd code for IRQFD support.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Derived from drivers/vfio/pci/vfio_pci_intrs.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Author: Alex Williamson <alex.williamson@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/vfio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/eventfd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #define DRIVER_VERSION "0.1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #define DRIVER_DESC "IRQFD support for VFIO bus drivers"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) static struct workqueue_struct *vfio_irqfd_cleanup_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) static DEFINE_SPINLOCK(virqfd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) static int __init vfio_virqfd_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) vfio_irqfd_cleanup_wq =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) create_singlethread_workqueue("vfio-irqfd-cleanup");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) if (!vfio_irqfd_cleanup_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) static void __exit vfio_virqfd_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) destroy_workqueue(vfio_irqfd_cleanup_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) static void virqfd_deactivate(struct virqfd *virqfd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) __poll_t flags = key_to_poll(key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) if (flags & EPOLLIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /* An event has been signaled, call function */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) if ((!virqfd->handler ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) virqfd->handler(virqfd->opaque, virqfd->data)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) virqfd->thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) schedule_work(&virqfd->inject);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) if (flags & EPOLLHUP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) spin_lock_irqsave(&virqfd_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * The eventfd is closing, if the virqfd has not yet been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * queued for release, as determined by testing whether the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * virqfd pointer to it is still valid, queue it now. As
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * with kvm irqfds, we know we won't race against the virqfd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * going away because we hold the lock to get here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) if (*(virqfd->pvirqfd) == virqfd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) *(virqfd->pvirqfd) = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) virqfd_deactivate(virqfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) spin_unlock_irqrestore(&virqfd_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) static void virqfd_ptable_queue_proc(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) wait_queue_head_t *wqh, poll_table *pt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) add_wait_queue(wqh, &virqfd->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) static void virqfd_shutdown(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) u64 cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) flush_work(&virqfd->inject);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) eventfd_ctx_put(virqfd->eventfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) kfree(virqfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) static void virqfd_inject(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) struct virqfd *virqfd = container_of(work, struct virqfd, inject);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) if (virqfd->thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) virqfd->thread(virqfd->opaque, virqfd->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) int vfio_virqfd_enable(void *opaque,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) int (*handler)(void *, void *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) void (*thread)(void *, void *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) void *data, struct virqfd **pvirqfd, int fd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) struct fd irqfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) struct eventfd_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) struct virqfd *virqfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) __poll_t events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) if (!virqfd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) virqfd->pvirqfd = pvirqfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) virqfd->opaque = opaque;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) virqfd->handler = handler;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) virqfd->thread = thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) virqfd->data = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) INIT_WORK(&virqfd->inject, virqfd_inject);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) irqfd = fdget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) if (!irqfd.file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) ret = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) goto err_fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) ctx = eventfd_ctx_fileget(irqfd.file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) if (IS_ERR(ctx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) ret = PTR_ERR(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) goto err_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) virqfd->eventfd = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * virqfds can be released by closing the eventfd or directly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * through ioctl. These are both done through a workqueue, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * we update the pointer to the virqfd under lock to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * pushing multiple jobs to release the same virqfd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) spin_lock_irq(&virqfd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) if (*pvirqfd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) spin_unlock_irq(&virqfd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) goto err_busy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) *pvirqfd = virqfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) spin_unlock_irq(&virqfd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * Install our own custom wake-up handling so we are notified via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * a callback whenever someone signals the underlying eventfd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) events = vfs_poll(irqfd.file, &virqfd->pt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * Check if there was an event already pending on the eventfd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * before we registered and trigger it as if we didn't miss it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) if (events & EPOLLIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) if ((!handler || handler(opaque, data)) && thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) schedule_work(&virqfd->inject);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * Do not drop the file until the irqfd is fully initialized,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) * otherwise we might race against the EPOLLHUP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) fdput(irqfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) err_busy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) eventfd_ctx_put(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) err_ctx:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) fdput(irqfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) err_fd:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) kfree(virqfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) EXPORT_SYMBOL_GPL(vfio_virqfd_enable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) void vfio_virqfd_disable(struct virqfd **pvirqfd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) spin_lock_irqsave(&virqfd_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) if (*pvirqfd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) virqfd_deactivate(*pvirqfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) *pvirqfd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) spin_unlock_irqrestore(&virqfd_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * Block until we know all outstanding shutdown jobs have completed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) * Even if we don't queue the job, flush the wq to be sure it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) * been released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) flush_workqueue(vfio_irqfd_cleanup_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) EXPORT_SYMBOL_GPL(vfio_virqfd_disable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) module_init(vfio_virqfd_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) module_exit(vfio_virqfd_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) MODULE_VERSION(DRIVER_VERSION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) MODULE_LICENSE("GPL v2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) MODULE_AUTHOR(DRIVER_AUTHOR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) MODULE_DESCRIPTION(DRIVER_DESC);