^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Author: Alex Williamson <alex.williamson@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Derived from original vfio:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Copyright 2010 Cisco Systems, Inc. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Author: Tom Lyon, pugs@cisco.com
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/device.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/eventfd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/interrupt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/iommu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/mutex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/pci.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/pm_runtime.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/vfio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/vgaarb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/nospec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include "vfio_pci_private.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #define DRIVER_VERSION "0.2"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #define DRIVER_DESC "VFIO PCI - User Level meta-driver"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) static char ids[1024] __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) module_param_string(ids, ids, sizeof(ids), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) static bool nointxmask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) MODULE_PARM_DESC(nointxmask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #ifdef CONFIG_VFIO_PCI_VGA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) static bool disable_vga;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) module_param(disable_vga, bool, S_IRUGO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) MODULE_PARM_DESC(disable_vga, "Disable VGA resource access through vfio-pci");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) static bool disable_idle_d3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) MODULE_PARM_DESC(disable_idle_d3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) "Disable using the PCI D3 low power state for idle, unused devices");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) static bool enable_sriov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #ifdef CONFIG_PCI_IOV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) module_param(enable_sriov, bool, 0644);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) MODULE_PARM_DESC(enable_sriov, "Enable support for SR-IOV configuration. Enabling SR-IOV on a PF typically requires support of the userspace PF driver, enabling VFs without such support may result in non-functional VFs or PF.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) static bool disable_denylist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) module_param(disable_denylist, bool, 0444);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) static inline bool vfio_vga_disabled(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #ifdef CONFIG_VFIO_PCI_VGA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) return disable_vga;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) switch (pdev->vendor) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) case PCI_VENDOR_ID_INTEL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) switch (pdev->device) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) case PCI_DEVICE_ID_INTEL_QAT_C3XXX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) case PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) case PCI_DEVICE_ID_INTEL_QAT_C62X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) case PCI_DEVICE_ID_INTEL_QAT_C62X_VF:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) case PCI_DEVICE_ID_INTEL_QAT_DH895XCC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) case PCI_DEVICE_ID_INTEL_QAT_DH895XCC_VF:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) static bool vfio_pci_is_denylisted(struct pci_dev *pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) if (!vfio_pci_dev_in_denylist(pdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) if (disable_denylist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) pci_warn(pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) "device denylist disabled - allowing device %04x:%04x.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) pdev->vendor, pdev->device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) pci_warn(pdev, "%04x:%04x exists in vfio-pci device denylist, driver probing disallowed.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) pdev->vendor, pdev->device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * Our VGA arbiter participation is limited since we don't know anything
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * about the device itself. However, if the device is the only VGA device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * downstream of a bridge and VFIO VGA support is disabled, then we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * safely return legacy VGA IO and memory as not decoded since the user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * has no way to get to it and routing can be disabled externally at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * bridge.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) static unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) struct vfio_pci_device *vdev = opaque;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) struct pci_dev *tmp = NULL, *pdev = vdev->pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) unsigned char max_busnr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) unsigned int decodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) max_busnr = pci_bus_max_busnr(pdev->bus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) if (tmp == pdev ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) pci_is_root_bus(tmp->bus))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) if (tmp->bus->number >= pdev->bus->number &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) tmp->bus->number <= max_busnr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) pci_dev_put(tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) return decodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) struct resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) struct vfio_pci_dummy_resource *dummy_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) for (i = 0; i < PCI_STD_NUM_BARS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) int bar = i + PCI_STD_RESOURCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) res = &vdev->pdev->resource[bar];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) goto no_mmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) if (!(res->flags & IORESOURCE_MEM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) goto no_mmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * The PCI core shouldn't set up a resource with a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * type but zero size. But there may be bugs that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * cause us to do that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) if (!resource_size(res))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) goto no_mmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) if (resource_size(res) >= PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) vdev->bar_mmap_supported[bar] = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) if (!(res->start & ~PAGE_MASK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) * Add a dummy resource to reserve the remainder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * of the exclusive page in case that hot-add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) * device's bar is assigned into it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) if (dummy_res == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) goto no_mmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) dummy_res->resource.name = "vfio sub-page reserved";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) dummy_res->resource.start = res->end + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) dummy_res->resource.end = res->start + PAGE_SIZE - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) dummy_res->resource.flags = res->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) if (request_resource(res->parent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) &dummy_res->resource)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) kfree(dummy_res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) goto no_mmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) dummy_res->index = bar;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) list_add(&dummy_res->res_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) &vdev->dummy_resources_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) vdev->bar_mmap_supported[bar] = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * Here we don't handle the case when the BAR is not page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * aligned because we can't expect the BAR will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * assigned into the same location in a page in guest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * when we passthrough the BAR. And it's hard to access
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * this BAR in userspace because we have no way to get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * the BAR's location in a page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) no_mmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) vdev->bar_mmap_supported[bar] = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) static void vfio_pci_disable(struct vfio_pci_device *vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * _and_ the ability detect when the device is asserting INTx via PCI_STATUS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * If a device implements the former but not the latter we would typically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) * expect broken_intx_masking be set and require an exclusive interrupt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * However since we do have control of the device's ability to assert INTx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * we can instead pretend that the device does not implement INTx, virtualizing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * the pin register to report zero and maintaining DisINTx set on the host.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) static bool vfio_pci_nointx(struct pci_dev *pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) switch (pdev->vendor) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) case PCI_VENDOR_ID_INTEL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) switch (pdev->device) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) /* All i40e (XL710/X710/XXV710) 10/20/25/40GbE NICs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) case 0x1572:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) case 0x1574:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) case 0x1580 ... 0x1581:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) case 0x1583 ... 0x158b:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) case 0x37d0 ... 0x37d2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) /* X550 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) case 0x1563:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) static void vfio_pci_probe_power_state(struct vfio_pci_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) struct pci_dev *pdev = vdev->pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) u16 pmcsr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) if (!pdev->pm_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) pci_read_config_word(pdev, pdev->pm_cap + PCI_PM_CTRL, &pmcsr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) vdev->needs_pm_restore = !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * pci_set_power_state() wrapper handling devices which perform a soft reset on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) * D3->D0 transition. Save state prior to D0/1/2->D3, stash it on the vdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) * restore when returned to D0. Saved separately from pci_saved_state for use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * by PM capability emulation and separately from pci_dev internal saved state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * to avoid it being overwritten and consumed around other resets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) struct pci_dev *pdev = vdev->pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) bool needs_restore = false, needs_save = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) if (vdev->needs_pm_restore) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) if (pdev->current_state < PCI_D3hot && state >= PCI_D3hot) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) pci_save_state(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) needs_save = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) if (pdev->current_state >= PCI_D3hot && state <= PCI_D0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) needs_restore = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) ret = pci_set_power_state(pdev, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) /* D3 might be unsupported via quirk, skip unless in D3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) if (needs_save && pdev->current_state >= PCI_D3hot) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) vdev->pm_save = pci_store_saved_state(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) } else if (needs_restore) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) pci_load_and_free_saved_state(pdev, &vdev->pm_save);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) pci_restore_state(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) static int vfio_pci_enable(struct vfio_pci_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) struct pci_dev *pdev = vdev->pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) u16 cmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) u8 msix_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) vfio_pci_set_power_state(vdev, PCI_D0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) /* Don't allow our initial saved state to include busmaster */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) pci_clear_master(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) ret = pci_enable_device(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) /* If reset fails because of the device lock, fail this path entirely */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) ret = pci_try_reset_function(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) if (ret == -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) pci_disable_device(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) vdev->reset_works = !ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) pci_save_state(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) vdev->pci_saved_state = pci_store_saved_state(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) if (!vdev->pci_saved_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) pci_dbg(pdev, "%s: Couldn't store saved state\n", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) if (likely(!nointxmask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) if (vfio_pci_nointx(pdev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) pci_info(pdev, "Masking broken INTx support\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) vdev->nointx = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) pci_intx(pdev, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) vdev->pci_2_3 = pci_intx_mask_supported(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) pci_read_config_word(pdev, PCI_COMMAND, &cmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) cmd &= ~PCI_COMMAND_INTX_DISABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) pci_write_config_word(pdev, PCI_COMMAND, cmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) ret = vfio_config_init(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) kfree(vdev->pci_saved_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) vdev->pci_saved_state = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) pci_disable_device(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) msix_pos = pdev->msix_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) if (msix_pos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) u16 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) u32 table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) pci_read_config_word(pdev, msix_pos + PCI_MSIX_FLAGS, &flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) pci_read_config_dword(pdev, msix_pos + PCI_MSIX_TABLE, &table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) vdev->msix_bar = table & PCI_MSIX_TABLE_BIR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) vdev->msix_offset = table & PCI_MSIX_TABLE_OFFSET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) vdev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) vdev->msix_bar = 0xFF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) vdev->has_vga = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) if (vfio_pci_is_vga(pdev) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) pdev->vendor == PCI_VENDOR_ID_INTEL &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) IS_ENABLED(CONFIG_VFIO_PCI_IGD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) ret = vfio_pci_igd_init(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) if (ret && ret != -ENODEV) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) pci_warn(pdev, "Failed to setup Intel IGD regions\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) goto disable_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) if (pdev->vendor == PCI_VENDOR_ID_NVIDIA &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) ret = vfio_pci_nvdia_v100_nvlink2_init(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) if (ret && ret != -ENODEV) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) pci_warn(pdev, "Failed to setup NVIDIA NV2 RAM region\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) goto disable_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) if (pdev->vendor == PCI_VENDOR_ID_IBM &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) ret = vfio_pci_ibm_npu2_init(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) if (ret && ret != -ENODEV) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) pci_warn(pdev, "Failed to setup NVIDIA NV2 ATSD region\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) goto disable_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) vfio_pci_probe_mmaps(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) disable_exit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) vfio_pci_disable(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) static void vfio_pci_disable(struct vfio_pci_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) struct pci_dev *pdev = vdev->pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) struct vfio_pci_dummy_resource *dummy_res, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) int i, bar;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) /* Stop the device from further DMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) pci_clear_master(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) VFIO_IRQ_SET_ACTION_TRIGGER,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) vdev->irq_type, 0, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) /* Device closed, don't need mutex here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) list_for_each_entry_safe(ioeventfd, ioeventfd_tmp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) &vdev->ioeventfds_list, next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) vfio_virqfd_disable(&ioeventfd->virqfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) list_del(&ioeventfd->next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) kfree(ioeventfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) vdev->ioeventfds_nr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) vdev->virq_disabled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) for (i = 0; i < vdev->num_regions; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) vdev->region[i].ops->release(vdev, &vdev->region[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) vdev->num_regions = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) kfree(vdev->region);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) vdev->region = NULL; /* don't krealloc a freed pointer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) vfio_config_free(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) for (i = 0; i < PCI_STD_NUM_BARS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) bar = i + PCI_STD_RESOURCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) if (!vdev->barmap[bar])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) pci_iounmap(pdev, vdev->barmap[bar]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) pci_release_selected_regions(pdev, 1 << bar);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) vdev->barmap[bar] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) list_for_each_entry_safe(dummy_res, tmp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) &vdev->dummy_resources_list, res_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) list_del(&dummy_res->res_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) release_resource(&dummy_res->resource);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) kfree(dummy_res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) vdev->needs_reset = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) * If we have saved state, restore it. If we can reset the device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * even better. Resetting with current state seems better than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) * nothing, but saving and restoring current state without reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * is just busy work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) if (pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) pci_info(pdev, "%s: Couldn't reload saved state\n", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) if (!vdev->reset_works)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) pci_save_state(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * Disable INTx and MSI, presumably to avoid spurious interrupts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * during reset. Stolen from pci_reset_function()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) * Try to get the locks ourselves to prevent a deadlock. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) * success of this is dependent on being able to lock the device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) * which is not always possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) * We can not use the "try" reset interface here, which will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) * overwrite the previously restored configuration information.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) if (vdev->reset_works && pci_cfg_access_trylock(pdev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) if (device_trylock(&pdev->dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) if (!__pci_reset_function_locked(pdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) vdev->needs_reset = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) device_unlock(&pdev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) pci_cfg_access_unlock(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) pci_restore_state(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) pci_disable_device(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) vfio_pci_try_bus_reset(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) if (!disable_idle_d3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) vfio_pci_set_power_state(vdev, PCI_D3hot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) static struct pci_driver vfio_pci_driver;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) struct vfio_device **pf_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) struct pci_dev *physfn = pci_physfn(vdev->pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) if (!vdev->pdev->is_virtfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) *pf_dev = vfio_device_get_from_dev(&physfn->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) if (!*pf_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) if (pci_dev_driver(physfn) != &vfio_pci_driver) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) vfio_device_put(*pf_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) return vfio_device_data(*pf_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) struct vfio_device *pf_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) if (!pf_vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) mutex_lock(&pf_vdev->vf_token->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) pf_vdev->vf_token->users += val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) WARN_ON(pf_vdev->vf_token->users < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) mutex_unlock(&pf_vdev->vf_token->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) vfio_device_put(pf_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) static void vfio_pci_release(void *device_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) struct vfio_pci_device *vdev = device_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) mutex_lock(&vdev->reflck->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) if (!(--vdev->refcnt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) vfio_pci_vf_token_user_add(vdev, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) vfio_spapr_pci_eeh_release(vdev->pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) vfio_pci_disable(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) mutex_lock(&vdev->igate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) if (vdev->err_trigger) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) eventfd_ctx_put(vdev->err_trigger);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) vdev->err_trigger = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) if (vdev->req_trigger) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) eventfd_ctx_put(vdev->req_trigger);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) vdev->req_trigger = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) mutex_unlock(&vdev->igate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) mutex_unlock(&vdev->reflck->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) module_put(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) static int vfio_pci_open(void *device_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) struct vfio_pci_device *vdev = device_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) if (!try_module_get(THIS_MODULE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) mutex_lock(&vdev->reflck->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) if (!vdev->refcnt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) ret = vfio_pci_enable(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) vfio_spapr_pci_eeh_open(vdev->pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) vfio_pci_vf_token_user_add(vdev, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) vdev->refcnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) mutex_unlock(&vdev->reflck->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) module_put(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) u8 pin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) vdev->nointx || vdev->pdev->is_virtfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) return pin ? 1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) u8 pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) u16 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) pos = vdev->pdev->msi_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) if (pos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) pci_read_config_word(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) pos + PCI_MSI_FLAGS, &flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) return 1 << ((flags & PCI_MSI_FLAGS_QMASK) >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) } else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) u8 pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) u16 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) pos = vdev->pdev->msix_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) if (pos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) pci_read_config_word(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) pos + PCI_MSIX_FLAGS, &flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) } else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) if (pci_is_pcie(vdev->pdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) } else if (irq_type == VFIO_PCI_REQ_IRQ_INDEX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) static int vfio_pci_count_devs(struct pci_dev *pdev, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) (*(int *)data)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) struct vfio_pci_fill_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) int cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) struct vfio_pci_dependent_device *devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) struct vfio_pci_fill_info *fill = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) struct iommu_group *iommu_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) if (fill->cur == fill->max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) return -EAGAIN; /* Something changed, try again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) iommu_group = iommu_group_get(&pdev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) if (!iommu_group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) return -EPERM; /* Cannot reset non-isolated devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) fill->devices[fill->cur].bus = pdev->bus->number;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) fill->devices[fill->cur].devfn = pdev->devfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) fill->cur++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) iommu_group_put(iommu_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) struct vfio_pci_group_entry {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) struct vfio_group *group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) int id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) struct vfio_pci_group_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) struct vfio_pci_group_entry *groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) static int vfio_pci_validate_devs(struct pci_dev *pdev, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) struct vfio_pci_group_info *info = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) struct iommu_group *group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) int id, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) group = iommu_group_get(&pdev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) if (!group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) id = iommu_group_id(group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) for (i = 0; i < info->count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) if (info->groups[i].id == id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) iommu_group_put(group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) return (i == info->count) ? -EINVAL : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) for (; pdev; pdev = pdev->bus->self)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) if (pdev->bus == slot->bus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) return (pdev->slot == slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) struct vfio_pci_walk_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) int (*fn)(struct pci_dev *, void *data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) void *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) struct pci_dev *pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) bool slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) static int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) struct vfio_pci_walk_info *walk = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) walk->ret = walk->fn(pdev, walk->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) return walk->ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) int (*fn)(struct pci_dev *,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) void *data), void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) bool slot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) struct vfio_pci_walk_info walk = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) .fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) return walk.ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) static int msix_mmappable_cap(struct vfio_pci_device *vdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) struct vfio_info_cap *caps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) struct vfio_info_cap_header header = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) .id = VFIO_REGION_INFO_CAP_MSIX_MAPPABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) .version = 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) return vfio_info_add_capability(caps, &header, sizeof(header));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) unsigned int type, unsigned int subtype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) const struct vfio_pci_regops *ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) size_t size, u32 flags, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) struct vfio_pci_region *region;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) region = krealloc(vdev->region,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) (vdev->num_regions + 1) * sizeof(*region),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) if (!region)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) vdev->region = region;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) vdev->region[vdev->num_regions].type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) vdev->region[vdev->num_regions].subtype = subtype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) vdev->region[vdev->num_regions].ops = ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) vdev->region[vdev->num_regions].size = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) vdev->region[vdev->num_regions].flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) vdev->region[vdev->num_regions].data = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) vdev->num_regions++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) struct vfio_devices {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) struct vfio_device **devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) int cur_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) int max_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) static long vfio_pci_ioctl(void *device_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) unsigned int cmd, unsigned long arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) struct vfio_pci_device *vdev = device_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) unsigned long minsz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) if (cmd == VFIO_DEVICE_GET_INFO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) struct vfio_device_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) unsigned long capsz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) minsz = offsetofend(struct vfio_device_info, num_irqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) /* For backward compatibility, cannot require this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) if (copy_from_user(&info, (void __user *)arg, minsz))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) if (info.argsz < minsz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) if (info.argsz >= capsz) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) minsz = capsz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) info.cap_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) info.flags = VFIO_DEVICE_FLAGS_PCI;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) if (vdev->reset_works)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) info.flags |= VFIO_DEVICE_FLAGS_RESET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) info.num_irqs = VFIO_PCI_NUM_IRQS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) int ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) if (ret && ret != -ENODEV) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) if (caps.size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) info.flags |= VFIO_DEVICE_FLAGS_CAPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) if (info.argsz < sizeof(info) + caps.size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) info.argsz = sizeof(info) + caps.size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) vfio_info_cap_shift(&caps, sizeof(info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) if (copy_to_user((void __user *)arg +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) sizeof(info), caps.buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) caps.size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) kfree(caps.buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) info.cap_offset = sizeof(info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) kfree(caps.buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) return copy_to_user((void __user *)arg, &info, minsz) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) -EFAULT : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) struct pci_dev *pdev = vdev->pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) struct vfio_region_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) minsz = offsetofend(struct vfio_region_info, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) if (copy_from_user(&info, (void __user *)arg, minsz))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) if (info.argsz < minsz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) switch (info.index) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) case VFIO_PCI_CONFIG_REGION_INDEX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) info.size = pdev->cfg_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) info.flags = VFIO_REGION_INFO_FLAG_READ |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) VFIO_REGION_INFO_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) info.size = pci_resource_len(pdev, info.index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) if (!info.size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) info.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) info.flags = VFIO_REGION_INFO_FLAG_READ |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) VFIO_REGION_INFO_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) if (vdev->bar_mmap_supported[info.index]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) if (info.index == vdev->msix_bar) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) ret = msix_mmappable_cap(vdev, &caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) case VFIO_PCI_ROM_REGION_INDEX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) void __iomem *io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) u16 cmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) info.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) /* Report the BAR size, not the ROM size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) info.size = pci_resource_len(pdev, info.index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) if (!info.size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) /* Shadow ROMs appear as PCI option ROMs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) if (pdev->resource[PCI_ROM_RESOURCE].flags &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) IORESOURCE_ROM_SHADOW)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) info.size = 0x20000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) * Is it really there? Enable memory decode for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) * implicit access in pci_map_rom().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) cmd = vfio_pci_memory_lock_and_enable(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) io = pci_map_rom(pdev, &size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) if (io) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) info.flags = VFIO_REGION_INFO_FLAG_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) pci_unmap_rom(pdev, io);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) info.size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) vfio_pci_memory_unlock_and_restore(vdev, cmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) case VFIO_PCI_VGA_REGION_INDEX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) if (!vdev->has_vga)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) info.size = 0xc0000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) info.flags = VFIO_REGION_INFO_FLAG_READ |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) VFIO_REGION_INFO_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) struct vfio_region_info_cap_type cap_type = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) .header.id = VFIO_REGION_INFO_CAP_TYPE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) .header.version = 1 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) if (info.index >=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) VFIO_PCI_NUM_REGIONS + vdev->num_regions)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) info.index = array_index_nospec(info.index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) VFIO_PCI_NUM_REGIONS +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) vdev->num_regions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) i = info.index - VFIO_PCI_NUM_REGIONS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) info.size = vdev->region[i].size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) info.flags = vdev->region[i].flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) cap_type.type = vdev->region[i].type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) cap_type.subtype = vdev->region[i].subtype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) ret = vfio_info_add_capability(&caps, &cap_type.header,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) sizeof(cap_type));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) if (vdev->region[i].ops->add_capability) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) ret = vdev->region[i].ops->add_capability(vdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) &vdev->region[i], &caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) if (caps.size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) if (info.argsz < sizeof(info) + caps.size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) info.argsz = sizeof(info) + caps.size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) info.cap_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) vfio_info_cap_shift(&caps, sizeof(info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) if (copy_to_user((void __user *)arg +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) sizeof(info), caps.buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) caps.size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) kfree(caps.buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) info.cap_offset = sizeof(info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) kfree(caps.buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) return copy_to_user((void __user *)arg, &info, minsz) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) -EFAULT : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) struct vfio_irq_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) minsz = offsetofend(struct vfio_irq_info, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) if (copy_from_user(&info, (void __user *)arg, minsz))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) switch (info.index) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) case VFIO_PCI_REQ_IRQ_INDEX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) case VFIO_PCI_ERR_IRQ_INDEX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) if (pci_is_pcie(vdev->pdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) info.flags = VFIO_IRQ_INFO_EVENTFD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) info.count = vfio_pci_get_irq_count(vdev, info.index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) info.flags |= (VFIO_IRQ_INFO_MASKABLE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) VFIO_IRQ_INFO_AUTOMASKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) info.flags |= VFIO_IRQ_INFO_NORESIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) return copy_to_user((void __user *)arg, &info, minsz) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) -EFAULT : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) } else if (cmd == VFIO_DEVICE_SET_IRQS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) struct vfio_irq_set hdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) u8 *data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) int max, ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) size_t data_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) minsz = offsetofend(struct vfio_irq_set, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) if (copy_from_user(&hdr, (void __user *)arg, minsz))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) max = vfio_pci_get_irq_count(vdev, hdr.index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) VFIO_PCI_NUM_IRQS, &data_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) if (data_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) data = memdup_user((void __user *)(arg + minsz),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) data_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) if (IS_ERR(data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) return PTR_ERR(data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) mutex_lock(&vdev->igate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) ret = vfio_pci_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) hdr.start, hdr.count, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) mutex_unlock(&vdev->igate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) kfree(data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) } else if (cmd == VFIO_DEVICE_RESET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) if (!vdev->reset_works)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) vfio_pci_zap_and_down_write_memory_lock(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) ret = pci_try_reset_function(vdev->pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) up_write(&vdev->memory_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) struct vfio_pci_hot_reset_info hdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) struct vfio_pci_fill_info fill = { 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) struct vfio_pci_dependent_device *devices = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) bool slot = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) minsz = offsetofend(struct vfio_pci_hot_reset_info, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) if (copy_from_user(&hdr, (void __user *)arg, minsz))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) if (hdr.argsz < minsz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) hdr.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) /* Can we do a slot or bus reset or neither? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) if (!pci_probe_reset_slot(vdev->pdev->slot))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) slot = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) else if (pci_probe_reset_bus(vdev->pdev->bus))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) /* How many devices are affected? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) vfio_pci_count_devs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) &fill.max, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) WARN_ON(!fill.max); /* Should always be at least one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) * If there's enough space, fill it now, otherwise return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) * -ENOSPC and the number of devices affected.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) hdr.count = fill.max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) goto reset_info_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) if (!devices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) fill.devices = devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) vfio_pci_fill_devs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) &fill, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) * If a device was removed between counting and filling,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) * we may come up short of fill.max. If a device was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) * added, we'll have a return of -EAGAIN above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) hdr.count = fill.cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) reset_info_exit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) if (copy_to_user((void __user *)arg, &hdr, minsz))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) if (copy_to_user((void __user *)(arg + minsz), devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) hdr.count * sizeof(*devices)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) kfree(devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) } else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) struct vfio_pci_hot_reset hdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) int32_t *group_fds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) struct vfio_pci_group_entry *groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) struct vfio_pci_group_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) struct vfio_devices devs = { .cur_index = 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) bool slot = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) int i, group_idx, mem_idx = 0, count = 0, ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) minsz = offsetofend(struct vfio_pci_hot_reset, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) if (copy_from_user(&hdr, (void __user *)arg, minsz))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) if (hdr.argsz < minsz || hdr.flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) /* Can we do a slot or bus reset or neither? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) if (!pci_probe_reset_slot(vdev->pdev->slot))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) slot = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) else if (pci_probe_reset_bus(vdev->pdev->bus))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) * We can't let userspace give us an arbitrarily large
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) * buffer to copy, so verify how many we think there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) * could be. Note groups can have multiple devices so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) * one group per device is the max.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) vfio_pci_count_devs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) &count, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) /* Somewhere between 1 and count is OK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) if (!hdr.count || hdr.count > count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) if (!group_fds || !groups) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) kfree(group_fds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) kfree(groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) if (copy_from_user(group_fds, (void __user *)(arg + minsz),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) hdr.count * sizeof(*group_fds))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) kfree(group_fds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) kfree(groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) * For each group_fd, get the group through the vfio external
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) * user interface and store the group and iommu ID. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) * ensures the group is held across the reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) for (group_idx = 0; group_idx < hdr.count; group_idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) struct vfio_group *group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) struct fd f = fdget(group_fds[group_idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) if (!f.file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) ret = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) group = vfio_group_get_external_user(f.file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) if (IS_ERR(group)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) ret = PTR_ERR(group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) groups[group_idx].group = group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) groups[group_idx].id =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) vfio_external_user_iommu_id(group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) kfree(group_fds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) /* release reference to groups on error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) goto hot_reset_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) info.count = hdr.count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) info.groups = groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) * Test whether all the affected devices are contained
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) * by the set of groups provided by the user.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) vfio_pci_validate_devs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) &info, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) goto hot_reset_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) devs.max_index = count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) devs.devices = kcalloc(count, sizeof(struct vfio_device *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) if (!devs.devices) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) goto hot_reset_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) * We need to get memory_lock for each device, but devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) * can share mmap_lock, therefore we need to zap and hold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) * the vma_lock for each device, and only then get each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) * memory_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) vfio_pci_try_zap_and_vma_lock_cb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) &devs, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) goto hot_reset_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) for (; mem_idx < devs.cur_index; mem_idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) struct vfio_pci_device *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) tmp = vfio_device_data(devs.devices[mem_idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) ret = down_write_trylock(&tmp->memory_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) goto hot_reset_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) mutex_unlock(&tmp->vma_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) /* User has access, do the reset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) ret = pci_reset_bus(vdev->pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) hot_reset_release:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) for (i = 0; i < devs.cur_index; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) struct vfio_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) struct vfio_pci_device *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) device = devs.devices[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) tmp = vfio_device_data(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) if (i < mem_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) up_write(&tmp->memory_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) mutex_unlock(&tmp->vma_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) vfio_device_put(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) kfree(devs.devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) for (group_idx--; group_idx >= 0; group_idx--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) vfio_group_put_external_user(groups[group_idx].group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) kfree(groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) } else if (cmd == VFIO_DEVICE_IOEVENTFD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) struct vfio_device_ioeventfd ioeventfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) minsz = offsetofend(struct vfio_device_ioeventfd, fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) if (copy_from_user(&ioeventfd, (void __user *)arg, minsz))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) if (ioeventfd.argsz < minsz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) if (ioeventfd.flags & ~VFIO_DEVICE_IOEVENTFD_SIZE_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) count = ioeventfd.flags & VFIO_DEVICE_IOEVENTFD_SIZE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) if (hweight8(count) != 1 || ioeventfd.fd < -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) ioeventfd.data, count, ioeventfd.fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) } else if (cmd == VFIO_DEVICE_FEATURE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) struct vfio_device_feature feature;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) uuid_t uuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) minsz = offsetofend(struct vfio_device_feature, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) if (copy_from_user(&feature, (void __user *)arg, minsz))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) if (feature.argsz < minsz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) /* Check unknown flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) VFIO_DEVICE_FEATURE_SET |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) VFIO_DEVICE_FEATURE_GET |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) VFIO_DEVICE_FEATURE_PROBE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) /* GET & SET are mutually exclusive except with PROBE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) (feature.flags & VFIO_DEVICE_FEATURE_GET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) if (!vdev->vf_token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) return -ENOTTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) * We do not support GET of the VF Token UUID as this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) * could expose the token of the previous device user.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) if (feature.flags & VFIO_DEVICE_FEATURE_GET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) if (feature.flags & VFIO_DEVICE_FEATURE_PROBE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) /* Don't SET unless told to do so */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) if (!(feature.flags & VFIO_DEVICE_FEATURE_SET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) if (feature.argsz < minsz + sizeof(uuid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) if (copy_from_user(&uuid, (void __user *)(arg + minsz),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) sizeof(uuid)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) mutex_lock(&vdev->vf_token->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) uuid_copy(&vdev->vf_token->uuid, &uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) mutex_unlock(&vdev->vf_token->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) return -ENOTTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) return -ENOTTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) size_t count, loff_t *ppos, bool iswrite)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) struct vfio_pci_device *vdev = device_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) switch (index) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) case VFIO_PCI_CONFIG_REGION_INDEX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) case VFIO_PCI_ROM_REGION_INDEX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) if (iswrite)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) return vfio_pci_bar_rw(vdev, buf, count, ppos, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) case VFIO_PCI_VGA_REGION_INDEX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) index -= VFIO_PCI_NUM_REGIONS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) return vdev->region[index].ops->rw(vdev, buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) count, ppos, iswrite);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) static ssize_t vfio_pci_read(void *device_data, char __user *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) size_t count, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) if (!count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) return vfio_pci_rw(device_data, buf, count, ppos, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) static ssize_t vfio_pci_write(void *device_data, const char __user *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) size_t count, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) if (!count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) /* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) struct vfio_pci_mmap_vma *mmap_vma, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) * Lock ordering:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) * vma_lock is nested under mmap_lock for vm_ops callback paths.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) * The memory_lock semaphore is used by both code paths calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) * into this function to zap vmas and the vm_ops.fault callback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) * to protect the memory enable state of the device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) * When zapping vmas we need to maintain the mmap_lock => vma_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) * ordering, which requires using vma_lock to walk vma_list to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) * acquire an mm, then dropping vma_lock to get the mmap_lock and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) * reacquiring vma_lock. This logic is derived from similar
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) * requirements in uverbs_user_mmap_disassociate().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) * mmap_lock must always be the top-level lock when it is taken.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) * Therefore we can only hold the memory_lock write lock when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) * vma_list is empty, as we'd need to take mmap_lock to clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) * entries. vma_list can only be guaranteed empty when holding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) * vma_lock, thus memory_lock is nested under vma_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) * This enables the vm_ops.fault callback to acquire vma_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) * followed by memory_lock read lock, while already holding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) * mmap_lock without risk of deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) struct mm_struct *mm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) if (try) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) if (!mutex_trylock(&vdev->vma_lock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) mutex_lock(&vdev->vma_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) while (!list_empty(&vdev->vma_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) mmap_vma = list_first_entry(&vdev->vma_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) struct vfio_pci_mmap_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) vma_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) mm = mmap_vma->vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) if (mmget_not_zero(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) list_del(&mmap_vma->vma_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) kfree(mmap_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) mm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) if (!mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) mutex_unlock(&vdev->vma_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) if (try) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) if (!mmap_read_trylock(mm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) mmput(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) if (try) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) if (!mutex_trylock(&vdev->vma_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) mmput(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) mutex_lock(&vdev->vma_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) list_for_each_entry_safe(mmap_vma, tmp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) &vdev->vma_list, vma_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) struct vm_area_struct *vma = mmap_vma->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) if (vma->vm_mm != mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) list_del(&mmap_vma->vma_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) kfree(mmap_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) zap_vma_ptes(vma, vma->vm_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) vma->vm_end - vma->vm_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) mutex_unlock(&vdev->vma_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) mmput(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) vfio_pci_zap_and_vma_lock(vdev, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) down_write(&vdev->memory_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) mutex_unlock(&vdev->vma_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) u16 cmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) down_write(&vdev->memory_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) pci_read_config_word(vdev->pdev, PCI_COMMAND, &cmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) if (!(cmd & PCI_COMMAND_MEMORY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) pci_write_config_word(vdev->pdev, PCI_COMMAND,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) cmd | PCI_COMMAND_MEMORY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) return cmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, u16 cmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) up_write(&vdev->memory_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) /* Caller holds vma_lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) static int __vfio_pci_add_vma(struct vfio_pci_device *vdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) struct vfio_pci_mmap_vma *mmap_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) if (!mmap_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) mmap_vma->vma = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) list_add(&mmap_vma->vma_next, &vdev->vma_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) * Zap mmaps on open so that we can fault them in on access and therefore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) * our vma_list only tracks mappings accessed since last zap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) static void vfio_pci_mmap_open(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) static void vfio_pci_mmap_close(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) struct vfio_pci_device *vdev = vma->vm_private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) struct vfio_pci_mmap_vma *mmap_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) mutex_lock(&vdev->vma_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) if (mmap_vma->vma == vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) list_del(&mmap_vma->vma_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) kfree(mmap_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) mutex_unlock(&vdev->vma_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) struct vm_area_struct *vma = vmf->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) struct vfio_pci_device *vdev = vma->vm_private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) struct vfio_pci_mmap_vma *mmap_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) vm_fault_t ret = VM_FAULT_NOPAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) mutex_lock(&vdev->vma_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) down_read(&vdev->memory_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) if (!__vfio_pci_memory_enabled(vdev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) ret = VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) goto up_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) * We populate the whole vma on fault, so we need to test whether
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) * the vma has already been mapped, such as for concurrent faults
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) * to the same vma. io_remap_pfn_range() will trigger a BUG_ON if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) * we ask it to fill the same range again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) if (mmap_vma->vma == vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) goto up_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) vma->vm_end - vma->vm_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) vma->vm_page_prot)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) ret = VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) goto up_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) if (__vfio_pci_add_vma(vdev, vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) ret = VM_FAULT_OOM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) up_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) up_read(&vdev->memory_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) mutex_unlock(&vdev->vma_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) static const struct vm_operations_struct vfio_pci_mmap_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) .open = vfio_pci_mmap_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) .close = vfio_pci_mmap_close,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) .fault = vfio_pci_mmap_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) struct vfio_pci_device *vdev = device_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) struct pci_dev *pdev = vdev->pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) unsigned int index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) u64 phys_len, req_len, pgoff, req_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) if (vma->vm_end < vma->vm_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) if ((vma->vm_flags & VM_SHARED) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) if (index >= VFIO_PCI_NUM_REGIONS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) int regnum = index - VFIO_PCI_NUM_REGIONS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) struct vfio_pci_region *region = vdev->region + regnum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) if (region->ops && region->ops->mmap &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) (region->flags & VFIO_REGION_INFO_FLAG_MMAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) return region->ops->mmap(vdev, region, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) if (index >= VFIO_PCI_ROM_REGION_INDEX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) if (!vdev->bar_mmap_supported[index])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) req_len = vma->vm_end - vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) pgoff = vma->vm_pgoff &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) req_start = pgoff << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) if (req_start + req_len > phys_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) * Even though we don't make use of the barmap for the mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) * we need to request the region and the barmap tracks that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) if (!vdev->barmap[index]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) ret = pci_request_selected_regions(pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 1 << index, "vfio-pci");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) vdev->barmap[index] = pci_iomap(pdev, index, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) if (!vdev->barmap[index]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) pci_release_selected_regions(pdev, 1 << index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) vma->vm_private_data = vdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) * See remap_pfn_range(), called from vfio_pci_fault() but we can't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) * change vm_flags within the fault handler. Set them now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) vma->vm_ops = &vfio_pci_mmap_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) static void vfio_pci_request(void *device_data, unsigned int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) struct vfio_pci_device *vdev = device_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) struct pci_dev *pdev = vdev->pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) mutex_lock(&vdev->igate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) if (vdev->req_trigger) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) if (!(count % 10))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) pci_notice_ratelimited(pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) "Relaying device request to user (#%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) eventfd_signal(vdev->req_trigger, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) } else if (count == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) pci_warn(pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) "No device request channel registered, blocked until released by user\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) mutex_unlock(&vdev->igate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) bool vf_token, uuid_t *uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) * There's always some degree of trust or collaboration between SR-IOV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) * PF and VFs, even if just that the PF hosts the SR-IOV capability and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) * can disrupt VFs with a reset, but often the PF has more explicit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) * access to deny service to the VF or access data passed through the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) * VF. We therefore require an opt-in via a shared VF token (UUID) to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) * represent this trust. This both prevents that a VF driver might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) * assume the PF driver is a trusted, in-kernel driver, and also that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) * a PF driver might be replaced with a rogue driver, unknown to in-use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) * VF drivers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) * Therefore when presented with a VF, if the PF is a vfio device and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) * it is bound to the vfio-pci driver, the user needs to provide a VF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) * token to access the device, in the form of appending a vf_token to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) * the device name, for example:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) * "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) * When presented with a PF which has VFs in use, the user must also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) * provide the current VF token to prove collaboration with existing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) * VF users. If VFs are not in use, the VF token provided for the PF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) * device will act to set the VF token.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) * If the VF token is provided but unused, an error is generated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) return 0; /* No VF token provided or required */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) if (vdev->pdev->is_virtfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) struct vfio_device *pf_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) bool match;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) if (!pf_vdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) if (!vf_token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) return 0; /* PF is not vfio-pci, no VF token */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) pci_info_ratelimited(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) "VF token incorrectly provided, PF not bound to vfio-pci\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) if (!vf_token) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) vfio_device_put(pf_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) pci_info_ratelimited(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) "VF token required to access device\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) return -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) mutex_lock(&pf_vdev->vf_token->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) mutex_unlock(&pf_vdev->vf_token->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) vfio_device_put(pf_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) if (!match) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) pci_info_ratelimited(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) "Incorrect VF token provided for device\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) return -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) } else if (vdev->vf_token) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) mutex_lock(&vdev->vf_token->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) if (vdev->vf_token->users) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) if (!vf_token) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) mutex_unlock(&vdev->vf_token->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) pci_info_ratelimited(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) "VF token required to access device\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) return -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) if (!uuid_equal(uuid, &vdev->vf_token->uuid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) mutex_unlock(&vdev->vf_token->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) pci_info_ratelimited(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) "Incorrect VF token provided for device\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) return -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) } else if (vf_token) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) uuid_copy(&vdev->vf_token->uuid, uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) mutex_unlock(&vdev->vf_token->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) } else if (vf_token) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) pci_info_ratelimited(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) "VF token incorrectly provided, not a PF or VF\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) #define VF_TOKEN_ARG "vf_token="
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) static int vfio_pci_match(void *device_data, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) struct vfio_pci_device *vdev = device_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) bool vf_token = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) uuid_t uuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) if (strncmp(pci_name(vdev->pdev), buf, strlen(pci_name(vdev->pdev))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) return 0; /* No match */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) if (strlen(buf) > strlen(pci_name(vdev->pdev))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) buf += strlen(pci_name(vdev->pdev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) if (*buf != ' ')
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) return 0; /* No match: non-whitespace after name */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) while (*buf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) if (*buf == ' ') {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) buf++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) if (!vf_token && !strncmp(buf, VF_TOKEN_ARG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) strlen(VF_TOKEN_ARG))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) buf += strlen(VF_TOKEN_ARG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) if (strlen(buf) < UUID_STRING_LEN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) ret = uuid_parse(buf, &uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) vf_token = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) buf += UUID_STRING_LEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) /* Unknown/duplicate option */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) ret = vfio_pci_validate_vf_token(vdev, vf_token, &uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) return 1; /* Match */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) static const struct vfio_device_ops vfio_pci_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) .name = "vfio-pci",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) .open = vfio_pci_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) .release = vfio_pci_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) .ioctl = vfio_pci_ioctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) .read = vfio_pci_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) .write = vfio_pci_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) .mmap = vfio_pci_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) .request = vfio_pci_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) .match = vfio_pci_match,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) static int vfio_pci_bus_notifier(struct notifier_block *nb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) unsigned long action, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) struct vfio_pci_device *vdev = container_of(nb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) struct vfio_pci_device, nb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) struct device *dev = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) struct pci_dev *pdev = to_pci_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) struct pci_dev *physfn = pci_physfn(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) if (action == BUS_NOTIFY_ADD_DEVICE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) pdev->is_virtfn && physfn == vdev->pdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) pci_name(pdev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) pdev->driver_override = kasprintf(GFP_KERNEL, "%s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) vfio_pci_ops.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) } else if (action == BUS_NOTIFY_BOUND_DRIVER &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) pdev->is_virtfn && physfn == vdev->pdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) struct pci_driver *drv = pci_dev_driver(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) if (drv && drv != &vfio_pci_driver)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) pci_warn(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) "VF %s bound to driver %s while PF bound to vfio-pci\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) pci_name(pdev), drv->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) static int vfio_pci_vf_init(struct vfio_pci_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) struct pci_dev *pdev = vdev->pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) if (!pdev->is_physfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) if (!vdev->vf_token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) mutex_init(&vdev->vf_token->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) uuid_gen(&vdev->vf_token->uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) vdev->nb.notifier_call = vfio_pci_bus_notifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) ret = bus_register_notifier(&pci_bus_type, &vdev->nb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) kfree(vdev->vf_token);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) static void vfio_pci_vf_uninit(struct vfio_pci_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) if (!vdev->vf_token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) bus_unregister_notifier(&pci_bus_type, &vdev->nb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) WARN_ON(vdev->vf_token->users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) mutex_destroy(&vdev->vf_token->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) kfree(vdev->vf_token);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) static int vfio_pci_vga_init(struct vfio_pci_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) struct pci_dev *pdev = vdev->pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) if (!vfio_pci_is_vga(pdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) ret = vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) vga_set_legacy_decoding(pdev, vfio_pci_set_vga_decode(vdev, false));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) static void vfio_pci_vga_uninit(struct vfio_pci_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) struct pci_dev *pdev = vdev->pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) if (!vfio_pci_is_vga(pdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) vga_client_register(pdev, NULL, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) vga_set_legacy_decoding(pdev, VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) VGA_RSRC_LEGACY_IO |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) VGA_RSRC_LEGACY_MEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) struct vfio_pci_device *vdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) struct iommu_group *group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) if (vfio_pci_is_denylisted(pdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) * Prevent binding to PFs with VFs enabled, the VFs might be in use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) * by the host or other users. We cannot capture the VFs if they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) * already exist, nor can we track VF users. Disabling SR-IOV here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) * would initiate removing the VFs, which would unbind the driver,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) * which is prone to blocking if that VF is also in use by vfio-pci.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) * Just reject these PFs and let the user sort it out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) if (pci_num_vf(pdev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) pci_warn(pdev, "Cannot bind to PF with SR-IOV enabled\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) group = vfio_iommu_group_get(&pdev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) if (!group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) if (!vdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) goto out_group_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) vdev->pdev = pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) vdev->irq_type = VFIO_PCI_NUM_IRQS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) mutex_init(&vdev->igate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) spin_lock_init(&vdev->irqlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) mutex_init(&vdev->ioeventfds_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) INIT_LIST_HEAD(&vdev->dummy_resources_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) INIT_LIST_HEAD(&vdev->ioeventfds_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) mutex_init(&vdev->vma_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) INIT_LIST_HEAD(&vdev->vma_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) init_rwsem(&vdev->memory_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) ret = vfio_pci_reflck_attach(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) ret = vfio_pci_vf_init(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) goto out_reflck;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) ret = vfio_pci_vga_init(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) goto out_vf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) vfio_pci_probe_power_state(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) if (!disable_idle_d3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) * pci-core sets the device power state to an unknown value at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) * bootup and after being removed from a driver. The only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) * transition it allows from this unknown state is to D0, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) * typically happens when a driver calls pci_enable_device().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) * We're not ready to enable the device yet, but we do want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) * be able to get to D3. Therefore first do a D0 transition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) * before going to D3.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) vfio_pci_set_power_state(vdev, PCI_D0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) vfio_pci_set_power_state(vdev, PCI_D3hot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) goto out_power;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) out_power:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) if (!disable_idle_d3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) vfio_pci_set_power_state(vdev, PCI_D0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) out_vf:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) vfio_pci_vf_uninit(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) out_reflck:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) vfio_pci_reflck_put(vdev->reflck);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) kfree(vdev->pm_save);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) kfree(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) out_group_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) vfio_iommu_group_put(group, &pdev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) static void vfio_pci_remove(struct pci_dev *pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) struct vfio_pci_device *vdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) pci_disable_sriov(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) vdev = vfio_del_group_dev(&pdev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) if (!vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) vfio_pci_vf_uninit(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) vfio_pci_reflck_put(vdev->reflck);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) vfio_pci_vga_uninit(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) if (!disable_idle_d3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) vfio_pci_set_power_state(vdev, PCI_D0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) mutex_destroy(&vdev->ioeventfds_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) kfree(vdev->region);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) kfree(vdev->pm_save);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) kfree(vdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) pci_channel_state_t state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) struct vfio_pci_device *vdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) struct vfio_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) device = vfio_device_get_from_dev(&pdev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) if (device == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) return PCI_ERS_RESULT_DISCONNECT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) vdev = vfio_device_data(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) if (vdev == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) vfio_device_put(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) return PCI_ERS_RESULT_DISCONNECT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) mutex_lock(&vdev->igate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) if (vdev->err_trigger)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) eventfd_signal(vdev->err_trigger, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) mutex_unlock(&vdev->igate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) vfio_device_put(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) return PCI_ERS_RESULT_CAN_RECOVER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) struct vfio_pci_device *vdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) struct vfio_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) if (!enable_sriov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) device = vfio_device_get_from_dev(&pdev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) if (!device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) vdev = vfio_device_data(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) if (!vdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) vfio_device_put(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) if (nr_virtfn == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) pci_disable_sriov(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) ret = pci_enable_sriov(pdev, nr_virtfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) vfio_device_put(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) return ret < 0 ? ret : nr_virtfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) static const struct pci_error_handlers vfio_err_handlers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) .error_detected = vfio_pci_aer_err_detected,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) static struct pci_driver vfio_pci_driver = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) .name = "vfio-pci",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) .id_table = NULL, /* only dynamic ids */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) .probe = vfio_pci_probe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) .remove = vfio_pci_remove,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) .sriov_configure = vfio_pci_sriov_configure,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) .err_handler = &vfio_err_handlers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) static DEFINE_MUTEX(reflck_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) static struct vfio_pci_reflck *vfio_pci_reflck_alloc(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) struct vfio_pci_reflck *reflck;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) reflck = kzalloc(sizeof(*reflck), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) if (!reflck)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) kref_init(&reflck->kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) mutex_init(&reflck->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) return reflck;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) static void vfio_pci_reflck_get(struct vfio_pci_reflck *reflck)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) kref_get(&reflck->kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) static int vfio_pci_reflck_find(struct pci_dev *pdev, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) struct vfio_pci_reflck **preflck = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) struct vfio_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) struct vfio_pci_device *vdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) device = vfio_device_get_from_dev(&pdev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) if (!device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) if (pci_dev_driver(pdev) != &vfio_pci_driver) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) vfio_device_put(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) vdev = vfio_device_data(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) if (vdev->reflck) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) vfio_pci_reflck_get(vdev->reflck);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) *preflck = vdev->reflck;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) vfio_device_put(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) vfio_device_put(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) bool slot = !pci_probe_reset_slot(vdev->pdev->slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) mutex_lock(&reflck_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) if (pci_is_root_bus(vdev->pdev->bus) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_reflck_find,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) &vdev->reflck, slot) <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) vdev->reflck = vfio_pci_reflck_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) mutex_unlock(&reflck_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) return PTR_ERR_OR_ZERO(vdev->reflck);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) static void vfio_pci_reflck_release(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) struct vfio_pci_reflck *reflck = container_of(kref,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) struct vfio_pci_reflck,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) kfree(reflck);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) mutex_unlock(&reflck_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) kref_put_mutex(&reflck->kref, vfio_pci_reflck_release, &reflck_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) struct vfio_devices *devs = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) struct vfio_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) struct vfio_pci_device *vdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) if (devs->cur_index == devs->max_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) device = vfio_device_get_from_dev(&pdev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) if (!device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) if (pci_dev_driver(pdev) != &vfio_pci_driver) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) vfio_device_put(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) vdev = vfio_device_data(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) /* Fault if the device is not unused */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) if (vdev->refcnt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) vfio_device_put(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) devs->devices[devs->cur_index++] = device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) struct vfio_devices *devs = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) struct vfio_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) struct vfio_pci_device *vdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) if (devs->cur_index == devs->max_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) device = vfio_device_get_from_dev(&pdev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) if (!device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) if (pci_dev_driver(pdev) != &vfio_pci_driver) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) vfio_device_put(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) vdev = vfio_device_data(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) * Locking multiple devices is prone to deadlock, runaway and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) * unwind if we hit contention.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) vfio_device_put(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) devs->devices[devs->cur_index++] = device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) * If a bus or slot reset is available for the provided device and:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) * - All of the devices affected by that bus or slot reset are unused
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) * (!refcnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) * - At least one of the affected devices is marked dirty via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) * needs_reset (such as by lack of FLR support)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) * Then attempt to perform that bus or slot reset. Callers are required
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) * to hold vdev->reflck->lock, protecting the bus/slot reset group from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) * concurrent opens. A vfio_device reference is acquired for each device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) * to prevent unbinds during the reset operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) * NB: vfio-core considers a group to be viable even if some devices are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) * bound to drivers like pci-stub or pcieport. Here we require all devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) * to be bound to vfio_pci since that's the only way we can be sure they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) * stay put.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) struct vfio_devices devs = { .cur_index = 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) int i = 0, ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) bool slot = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) struct vfio_pci_device *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) if (!pci_probe_reset_slot(vdev->pdev->slot))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) slot = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) else if (pci_probe_reset_bus(vdev->pdev->bus))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) &i, slot) || !i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) devs.max_index = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) if (!devs.devices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) vfio_pci_get_unused_devs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) &devs, slot))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) goto put_devs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) /* Does at least one need a reset? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) for (i = 0; i < devs.cur_index; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) tmp = vfio_device_data(devs.devices[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) if (tmp->needs_reset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) ret = pci_reset_bus(vdev->pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) put_devs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) for (i = 0; i < devs.cur_index; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) tmp = vfio_device_data(devs.devices[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) * If reset was successful, affected devices no longer need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) * a reset and we should return all the collateral devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) * to low power. If not successful, we either didn't reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) * the bus or timed out waiting for it, so let's not touch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) * the power state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) tmp->needs_reset = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) if (tmp != vdev && !disable_idle_d3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) vfio_pci_set_power_state(tmp, PCI_D3hot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) vfio_device_put(devs.devices[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) kfree(devs.devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) static void __exit vfio_pci_cleanup(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) pci_unregister_driver(&vfio_pci_driver);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) vfio_pci_uninit_perm_bits();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) static void __init vfio_pci_fill_ids(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) char *p, *id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) /* no ids passed actually */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) if (ids[0] == '\0')
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) /* add ids specified in the module parameter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) p = ids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) while ((id = strsep(&p, ","))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) unsigned int vendor, device, subvendor = PCI_ANY_ID,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) int fields;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) if (!strlen(id))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) fields = sscanf(id, "%x:%x:%x:%x:%x:%x",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) &vendor, &device, &subvendor, &subdevice,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) &class, &class_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) if (fields < 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) pr_warn("invalid id string \"%s\"\n", id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) rc = pci_add_dynid(&vfio_pci_driver, vendor, device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) subvendor, subdevice, class, class_mask, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) vendor, device, subvendor, subdevice,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) class, class_mask, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) vendor, device, subvendor, subdevice,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) class, class_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) static int __init vfio_pci_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) /* Allocate shared config space permision data used by all devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) ret = vfio_pci_init_perm_bits();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) /* Register and scan for devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) ret = pci_register_driver(&vfio_pci_driver);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) goto out_driver;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) vfio_pci_fill_ids();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) if (disable_denylist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) pr_warn("device denylist disabled.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) out_driver:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) vfio_pci_uninit_perm_bits();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) module_init(vfio_pci_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) module_exit(vfio_pci_cleanup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) MODULE_VERSION(DRIVER_VERSION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) MODULE_LICENSE("GPL v2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) MODULE_AUTHOR(DRIVER_AUTHOR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) MODULE_DESCRIPTION(DRIVER_DESC);