^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright IBM Corporation 2001, 2005, 2006
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright Dave Engebretsen & Todd Inglett 2001
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright Linas Vepstas 2005, 2006
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright 2001-2012 IBM Corporation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/list.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/pci.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/iommu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/proc_fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/rbtree.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/reboot.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/seq_file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/spinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/of.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/atomic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <asm/debugfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <asm/eeh.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <asm/eeh_event.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <asm/io.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <asm/iommu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <asm/machdep.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <asm/ppc-pci.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <asm/rtas.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <asm/pte-walk.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) /** Overview:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * EEH, or "Enhanced Error Handling" is a PCI bridge technology for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * dealing with PCI bus errors that can't be dealt with within the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * usual PCI framework, except by check-stopping the CPU. Systems
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * that are designed for high-availability/reliability cannot afford
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * to crash due to a "mere" PCI error, thus the need for EEH.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * An EEH-capable bridge operates by converting a detected error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * into a "slot freeze", taking the PCI adapter off-line, making
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * the slot behave, from the OS'es point of view, as if the slot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * were "empty": all reads return 0xff's and all writes are silently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * ignored. EEH slot isolation events can be triggered by parity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * errors on the address or data busses (e.g. during posted writes),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * which in turn might be caused by low voltage on the bus, dust,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * vibration, humidity, radioactivity or plain-old failed hardware.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * Note, however, that one of the leading causes of EEH slot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * freeze events are buggy device drivers, buggy device microcode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * or buggy device hardware. This is because any attempt by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * device to bus-master data to a memory address that is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * assigned to the device will trigger a slot freeze. (The idea
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * is to prevent devices-gone-wild from corrupting system memory).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * Buggy hardware/drivers will have a miserable time co-existing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * with EEH.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * Ideally, a PCI device driver, when suspecting that an isolation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * event has occurred (e.g. by reading 0xff's), will then ask EEH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * whether this is the case, and then take appropriate steps to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * reset the PCI slot, the PCI device, and then resume operations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * However, until that day, the checking is done here, with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * eeh_check_failure() routine embedded in the MMIO macros. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * the slot is found to be isolated, an "EEH Event" is synthesized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * and sent out for processing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) /* If a device driver keeps reading an MMIO register in an interrupt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * handler after a slot isolation event, it might be broken.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * This sets the threshold for how many read attempts we allow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * before printing an error message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) #define EEH_MAX_FAILS 2100000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) /* Time to wait for a PCI slot to report status, in milliseconds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * EEH probe mode support, which is part of the flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * is to support multiple platforms for EEH. Some platforms
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * like pSeries do PCI emunation based on device tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * However, other platforms like powernv probe PCI devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * from hardware. The flag is used to distinguish that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * In addition, struct eeh_ops::probe would be invoked for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * particular OF node or PCI device so that the corresponding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * PE would be created there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) int eeh_subsystem_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) EXPORT_SYMBOL(eeh_subsystem_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * EEH allowed maximal frozen times. If one particular PE's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * frozen count in last hour exceeds this limit, the PE will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * be forced to be offline permanently.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) u32 eeh_max_freezes = 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) * Controls whether a recovery event should be scheduled when an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * isolated device is discovered. This is only really useful for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * debugging problems with the EEH core.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) bool eeh_debugfs_no_recover;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /* Platform dependent EEH operations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) struct eeh_ops *eeh_ops = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) /* Lock to avoid races due to multiple reports of an error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) DEFINE_RAW_SPINLOCK(confirm_error_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) EXPORT_SYMBOL_GPL(confirm_error_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) /* Lock to protect passed flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) static DEFINE_MUTEX(eeh_dev_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) /* Buffer for reporting pci register dumps. Its here in BSS, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * not dynamically alloced, so that it ends up in RMO where RTAS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * can access it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) #define EEH_PCI_REGS_LOG_LEN 8192
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * The struct is used to maintain the EEH global statistic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * information. Besides, the EEH global statistics will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * exported to user space through procfs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) struct eeh_stats {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) u64 no_device; /* PCI device not found */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) u64 no_dn; /* OF node not found */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) u64 no_cfg_addr; /* Config address not found */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) u64 ignored_check; /* EEH check skipped */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) u64 total_mmio_ffs; /* Total EEH checks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) u64 false_positives; /* Unnecessary EEH checks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) u64 slot_resets; /* PE reset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) static struct eeh_stats eeh_stats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) static int __init eeh_setup(char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) if (!strcmp(str, "off"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) eeh_add_flag(EEH_FORCE_DISABLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) else if (!strcmp(str, "early_log"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) eeh_add_flag(EEH_EARLY_DUMP_LOG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) __setup("eeh=", eeh_setup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) void eeh_show_enabled(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) if (eeh_has_flag(EEH_FORCE_DISABLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) pr_info("EEH: Recovery disabled by kernel parameter.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) else if (eeh_has_flag(EEH_ENABLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) pr_info("EEH: Capable adapter found: recovery enabled.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) pr_info("EEH: No capable adapters found: recovery disabled.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * This routine captures assorted PCI configuration space data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * for the indicated PCI device, and puts them into a buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * for RTAS error logging.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) u32 cfg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) int cap, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) int n = 0, l = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) char buffer[128];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) edev->pe->phb->global_number, edev->bdfn >> 8,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) edev->pe->phb->global_number, edev->bdfn >> 8,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) eeh_ops->read_config(edev, PCI_VENDOR_ID, 4, &cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) eeh_ops->read_config(edev, PCI_COMMAND, 4, &cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) /* Gather bridge-specific registers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) if (edev->mode & EEH_DEV_BRIDGE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) eeh_ops->read_config(edev, PCI_SEC_STATUS, 2, &cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) pr_warn("EEH: Bridge control: %04x\n", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) /* Dump out the PCI-X command and status regs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) cap = edev->pcix_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) if (cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) eeh_ops->read_config(edev, cap, 4, &cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) eeh_ops->read_config(edev, cap+4, 4, &cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) pr_warn("EEH: PCI-X status: %08x\n", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) /* If PCI-E capable, dump PCI-E cap 10 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) cap = edev->pcie_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) if (cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) pr_warn("EEH: PCI-E capabilities and status follow:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) for (i=0; i<=8; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) eeh_ops->read_config(edev, cap+4*i, 4, &cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) if ((i % 4) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) if (i != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) pr_warn("%s\n", buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) l = scnprintf(buffer, sizeof(buffer),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) "EEH: PCI-E %02x: %08x ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 4*i, cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) l += scnprintf(buffer+l, sizeof(buffer)-l,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) "%08x ", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) pr_warn("%s\n", buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) /* If AER capable, dump it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) cap = edev->aer_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) if (cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) n += scnprintf(buf+n, len-n, "pci-e AER:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) pr_warn("EEH: PCI-E AER capability register set follows:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) for (i=0; i<=13; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) eeh_ops->read_config(edev, cap+4*i, 4, &cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) if ((i % 4) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) if (i != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) pr_warn("%s\n", buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) l = scnprintf(buffer, sizeof(buffer),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) "EEH: PCI-E AER %02x: %08x ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 4*i, cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) l += scnprintf(buffer+l, sizeof(buffer)-l,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) "%08x ", cfg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) pr_warn("%s\n", buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) return n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) struct eeh_dev *edev, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) size_t *plen = flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) eeh_pe_for_each_dev(pe, edev, tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) EEH_PCI_REGS_LOG_LEN - *plen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * eeh_slot_error_detail - Generate combined log including driver log and error log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * @pe: EEH PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) * @severity: temporary or permanent error log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * This routine should be called to generate the combined log, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * is comprised of driver log and error log. The driver log is figured
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) * out from the config space of the corresponding PCI device, while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) * the error log is fetched through platform dependent function call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) size_t loglen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * When the PHB is fenced or dead, it's pointless to collect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * the data from PCI config space because it should return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * 0xFF's. For ER, we still retrieve the data from the PCI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * config space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * For pHyp, we have to enable IO for log retrieval. Otherwise,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * 0xFF's is always returned from PCI config space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * When the @severity is EEH_LOG_PERM, the PE is going to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * removed. Prior to that, the drivers for devices included in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) * the PE will be closed. The drivers rely on working IO path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) * to bring the devices to quiet state. Otherwise, PCI traffic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) * from those devices after they are removed is like to cause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) * another unexpected EEH error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) if (!(pe->type & EEH_PE_PHB)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) severity == EEH_LOG_PERM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * The config space of some PCI devices can't be accessed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * when their PEs are in frozen state. Otherwise, fenced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * PHB might be seen. Those PEs are identified with flag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * is set automatically when the PE is put to EEH_PE_ISOLATED.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) * Restoring BARs possibly triggers PCI config access in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * (OPAL) firmware and then causes fenced PHB. If the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * pointless to restore BARs and dump config space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) eeh_ops->configure_bridge(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) if (!(pe->state & EEH_PE_CFG_BLOCKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) eeh_pe_restore_bars(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) pci_regs_buf[0] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) * eeh_token_to_phys - Convert EEH address token to phys address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) * @token: I/O token, should be address in the form 0xA....
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * This routine should be called to convert virtual I/O address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * to physical one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) static inline unsigned long eeh_token_to_phys(unsigned long token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) pte_t *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) unsigned long pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) int hugepage_shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) * We won't find hugepages here(this is iomem). Hence we are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) * worried about _PAGE_SPLITTING/collapse. Also we will not hit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) * page table free, because of init_mm.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) ptep = find_init_mm_pte(token, &hugepage_shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) if (!ptep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) return token;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) pa = pte_pfn(*ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) /* On radix we can do hugepage mappings for io, so handle that */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) if (!hugepage_shift)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) hugepage_shift = PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) pa <<= PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) pa |= token & ((1ul << hugepage_shift) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) return pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) * On PowerNV platform, we might already have fenced PHB there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) * For that case, it's meaningless to recover frozen PE. Intead,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * We have to handle fenced PHB firstly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) static int eeh_phb_check_failure(struct eeh_pe *pe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) struct eeh_pe *phb_pe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) if (!eeh_has_flag(EEH_PROBE_MODE_DEV))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) /* Find the PHB PE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) phb_pe = eeh_phb_pe_get(pe->phb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) if (!phb_pe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) pr_warn("%s Can't find PE for PHB#%x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) __func__, pe->phb->global_number);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) /* If the PHB has been in problematic state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) eeh_serialize_lock(&flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) if (phb_pe->state & EEH_PE_ISOLATED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) /* Check PHB state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) ret = eeh_ops->get_state(phb_pe, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) if ((ret < 0) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) /* Isolate the PHB and send event */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) eeh_pe_mark_isolated(phb_pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) eeh_serialize_unlock(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) pr_debug("EEH: PHB#%x failure detected, location: %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) eeh_send_failure_event(phb_pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) eeh_serialize_unlock(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) * @edev: eeh device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) * Check for an EEH failure for the given device node. Call this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) * routine if the result of a read was all 0xff's and you want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) * find out if this is due to an EEH slot freeze. This routine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) * will query firmware for the EEH status.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) * Returns 0 if there has not been an EEH error; otherwise returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) * a non-zero value and queues up a slot isolation event notification.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) * It is safe to call this routine in an interrupt context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) int eeh_dev_check_failure(struct eeh_dev *edev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) struct device_node *dn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) struct pci_dev *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) struct eeh_pe *pe, *parent_pe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) int rc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) const char *location = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) eeh_stats.total_mmio_ffs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) if (!eeh_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) if (!edev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) eeh_stats.no_dn++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) dev = eeh_dev_to_pci_dev(edev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) pe = eeh_dev_to_pe(edev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) /* Access to IO BARs might get this far and still not want checking. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) if (!pe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) eeh_stats.ignored_check++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) eeh_edev_dbg(edev, "Ignored check\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * On PowerNV platform, we might already have fenced PHB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * there and we need take care of that firstly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) ret = eeh_phb_check_failure(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * If the PE isn't owned by us, we shouldn't check the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) * state. Instead, let the owner handle it if the PE has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) * been frozen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) if (eeh_pe_passed(pe))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) /* If we already have a pending isolation event for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) * slot, we know it's bad already, we don't need to check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) * Do this checking under a lock; as multiple PCI devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) * in one slot might report errors simultaneously, and we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) * only want one error recovery routine running.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) eeh_serialize_lock(&flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) rc = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) if (pe->state & EEH_PE_ISOLATED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) pe->check_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) if (pe->check_count == EEH_MAX_FAILS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) dn = pci_device_to_OF_node(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) if (dn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) location = of_get_property(dn, "ibm,loc-code",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) pe->check_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) location ? location : "unknown",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) eeh_driver_name(dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) eeh_edev_err(edev, "Might be infinite loop in %s driver\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) eeh_driver_name(dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) goto dn_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) * Now test for an EEH failure. This is VERY expensive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * Note that the eeh_config_addr may be a parent device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * in the case of a device behind a bridge, or it may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * function zero of a multi-function device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * In any case they must share a common PHB.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) ret = eeh_ops->get_state(pe, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) /* Note that config-io to empty slots may fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * they are empty when they don't have children.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * We will punt with the following conditions: Failure to get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) * PE's state, EEH not support and Permanently unavailable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * state, PE is in good state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) if ((ret < 0) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) eeh_stats.false_positives++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) pe->false_positives++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) rc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) goto dn_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * It should be corner case that the parent PE has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * put into frozen state as well. We should take care
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) * that at first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) parent_pe = pe->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) while (parent_pe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) /* Hit the ceiling ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) if (parent_pe->type & EEH_PE_PHB)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) /* Frozen parent PE ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) ret = eeh_ops->get_state(parent_pe, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) if (ret > 0 && !eeh_state_active(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) pe = parent_pe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) pe->phb->global_number, pe->addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) pe->phb->global_number, parent_pe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) /* Next parent level */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) parent_pe = parent_pe->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) eeh_stats.slot_resets++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) /* Avoid repeated reports of this failure, including problems
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) * with other functions on this device, and functions under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) * bridges.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) eeh_pe_mark_isolated(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) eeh_serialize_unlock(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) /* Most EEH events are due to device driver bugs. Having
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) * a stack trace will help the device-driver authors figure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * out what happened. So print that out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) __func__, pe->phb->global_number, pe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) eeh_send_failure_event(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) dn_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) eeh_serialize_unlock(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) * @token: I/O address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) * Check for an EEH failure at the given I/O address. Call this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) * routine if the result of a read was all 0xff's and you want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) * find out if this is due to an EEH slot freeze event. This routine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) * will query firmware for the EEH status.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) * Note this routine is safe to call in an interrupt context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) int eeh_check_failure(const volatile void __iomem *token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) struct eeh_dev *edev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) /* Finding the phys addr + pci device; this is pretty quick. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) addr = eeh_token_to_phys((unsigned long __force) token);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) edev = eeh_addr_cache_get_dev(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) if (!edev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) eeh_stats.no_device++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) return eeh_dev_check_failure(edev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) EXPORT_SYMBOL(eeh_check_failure);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) * @pe: EEH PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) * This routine should be called to reenable frozen MMIO or DMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) * so that it would work correctly again. It's useful while doing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) * recovery or log collection on the indicated device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) int eeh_pci_enable(struct eeh_pe *pe, int function)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) int active_flag, rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) * pHyp doesn't allow to enable IO or DMA on unfrozen PE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) * Also, it's pointless to enable them on unfrozen PE. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) * we have to check before enabling IO or DMA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) switch (function) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) case EEH_OPT_THAW_MMIO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) case EEH_OPT_THAW_DMA:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) active_flag = EEH_STATE_DMA_ACTIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) case EEH_OPT_DISABLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) case EEH_OPT_ENABLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) case EEH_OPT_FREEZE_PE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) active_flag = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) pr_warn("%s: Invalid function %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) __func__, function);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) * Check if IO or DMA has been enabled before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) * enabling them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) if (active_flag) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) rc = eeh_ops->get_state(pe, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) if (rc < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) /* Needn't enable it at all */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) if (rc == EEH_STATE_NOT_SUPPORT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) /* It's already enabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) if (rc & active_flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) /* Issue the request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) rc = eeh_ops->set_option(pe, function);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) pr_warn("%s: Unexpected state change %d on "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) "PHB#%x-PE#%x, err=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) __func__, function, pe->phb->global_number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) pe->addr, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) /* Check if the request is finished successfully */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) if (active_flag) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) if (rc < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) if (rc & active_flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) static void eeh_disable_and_save_dev_state(struct eeh_dev *edev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) void *userdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) struct pci_dev *dev = userdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * The caller should have disabled and saved the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * state for the specified device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) if (!pdev || pdev == dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) /* Ensure we have D0 power state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) pci_set_power_state(pdev, PCI_D0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) /* Save device state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) pci_save_state(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) * Disable device to avoid any DMA traffic and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) * interrupt from the device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) struct pci_dev *dev = userdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) if (!pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) /* Apply customization from firmware */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) if (eeh_ops->restore_config)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) eeh_ops->restore_config(edev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) /* The caller should restore state for the specified device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) if (pdev != dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) pci_restore_state(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) * pcibios_set_pcie_reset_state - Set PCI-E reset state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) * @dev: pci device struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) * @state: reset state to enter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) * Return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) * 0 if success
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) struct eeh_pe *pe = eeh_dev_to_pe(edev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) if (!pe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) pr_err("%s: No PE found on PCI device %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) __func__, pci_name(dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) switch (state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) case pcie_deassert_reset:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) eeh_unfreeze_pe(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) if (!(pe->type & EEH_PE_VF))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) case pcie_hot_reset:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) eeh_pe_mark_isolated(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) if (!(pe->type & EEH_PE_VF))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) eeh_ops->reset(pe, EEH_RESET_HOT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) case pcie_warm_reset:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) eeh_pe_mark_isolated(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) if (!(pe->type & EEH_PE_VF))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) * eeh_set_pe_freset - Check the required reset for the indicated device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) * @data: EEH device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) * @flag: return value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) * Each device might have its preferred reset type: fundamental or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) * hot reset. The routine is used to collected the information for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) * the indicated device and its children so that the bunch of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) * devices could be reset properly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) struct pci_dev *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) unsigned int *freset = (unsigned int *)flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) dev = eeh_dev_to_pci_dev(edev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) if (dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) *freset |= dev->needs_freset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) static void eeh_pe_refreeze_passed(struct eeh_pe *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) struct eeh_pe *pe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) int state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) eeh_for_each_pe(root, pe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) if (eeh_pe_passed(pe)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) state = eeh_ops->get_state(pe, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) if (state &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) pe->phb->global_number, pe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) * eeh_pe_reset_full - Complete a full reset process on the indicated PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) * @pe: EEH PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) * This function executes a full reset procedure on a PE, including setting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) * the appropriate flags, performing a fundamental or hot reset, and then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) * deactivating the reset status. It is designed to be used within the EEH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) * subsystem, as opposed to eeh_pe_reset which is exported to drivers and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) * only performs a single operation at a time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) * This function will attempt to reset a PE three times before failing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) int type = EEH_RESET_HOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) unsigned int freset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) int i, state = 0, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) * Determine the type of reset to perform - hot or fundamental.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) * Hot reset is the default operation, unless any device under the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) * PE requires a fundamental reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) if (freset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) type = EEH_RESET_FUNDAMENTAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) /* Mark the PE as in reset state and block config space accesses */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) eeh_pe_state_mark(pe, reset_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) /* Make three attempts at resetting the bus */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) for (i = 0; i < 3; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) ret = eeh_pe_reset(pe, type, include_passed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) include_passed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) pr_warn("EEH: Failure %d resetting PHB#%x-PE#%x (attempt %d)\n\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) state, pe->phb->global_number, pe->addr, i + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) if (i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) pr_warn("EEH: PHB#%x-PE#%x: Successful reset (attempt %d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) pe->phb->global_number, pe->addr, i + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) /* Wait until the PE is in a functioning state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) if (state < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) pr_warn("EEH: Unrecoverable slot failure on PHB#%x-PE#%x",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) pe->phb->global_number, pe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) ret = -ENOTRECOVERABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) if (eeh_state_active(state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) pr_warn("EEH: PHB#%x-PE#%x: Slot inactive after reset: 0x%x (attempt %d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) pe->phb->global_number, pe->addr, state, i + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) /* Resetting the PE may have unfrozen child PEs. If those PEs have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) * (potentially) passed through to a guest, re-freeze them:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) if (!include_passed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) eeh_pe_refreeze_passed(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) eeh_pe_state_clear(pe, reset_state, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) * eeh_save_bars - Save device bars
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) * @edev: PCI device associated EEH device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) * Save the values of the device bars. Unlike the restore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) * routine, this routine is *not* recursive. This is because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) * PCI devices are added individually; but, for the restore,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) * an entire slot is reset at a time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) void eeh_save_bars(struct eeh_dev *edev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) if (!edev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) for (i = 0; i < 16; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) eeh_ops->read_config(edev, i * 4, 4, &edev->config_space[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) * For PCI bridges including root port, we need enable bus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) * master explicitly. Otherwise, it can't fetch IODA table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) * entries correctly. So we cache the bit in advance so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) * we can restore it after reset, either PHB range or PE range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) if (edev->mode & EEH_DEV_BRIDGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) edev->config_space[1] |= PCI_COMMAND_MASTER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) static int eeh_reboot_notifier(struct notifier_block *nb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) unsigned long action, void *unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) eeh_clear_flag(EEH_ENABLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) return NOTIFY_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) static struct notifier_block eeh_reboot_nb = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) .notifier_call = eeh_reboot_notifier,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) static int eeh_device_notifier(struct notifier_block *nb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) unsigned long action, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) struct device *dev = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) switch (action) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) * Note: It's not possible to perform EEH device addition (i.e.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) * the device's resources, which have not yet been set up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) case BUS_NOTIFY_DEL_DEVICE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) eeh_remove_device(to_pci_dev(dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) return NOTIFY_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) static struct notifier_block eeh_device_nb = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) .notifier_call = eeh_device_notifier,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) * eeh_init - System wide EEH initialization
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) * It's the platform's job to call this from an arch_initcall().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) int eeh_init(struct eeh_ops *ops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) struct pci_controller *hose, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) /* the platform should only initialise EEH once */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) if (WARN_ON(eeh_ops))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) if (WARN_ON(!ops))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) eeh_ops = ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) /* Register reboot notifier */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) ret = register_reboot_notifier(&eeh_reboot_nb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) pr_warn("%s: Failed to register reboot notifier (%d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) ret = bus_register_notifier(&pci_bus_type, &eeh_device_nb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) pr_warn("%s: Failed to register bus notifier (%d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) /* Initialize PHB PEs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) eeh_phb_pe_create(hose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) eeh_addr_cache_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) /* Initialize EEH event */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) return eeh_event_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) * eeh_probe_device() - Perform EEH initialization for the indicated pci device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) * @dev: pci device for which to set up EEH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) * This routine must be used to complete EEH initialization for PCI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) * devices that were added after system boot (e.g. hotplug, dlpar).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) void eeh_probe_device(struct pci_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) struct eeh_dev *edev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) pr_debug("EEH: Adding device %s\n", pci_name(dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) * pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) * already called for this device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) pci_dbg(dev, "Already bound to an eeh_dev!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) edev = eeh_ops->probe(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) if (!edev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) pr_debug("EEH: Adding device failed\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) * FIXME: We rely on pcibios_release_device() to remove the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) * existing EEH state. The release function is only called if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) * the pci_dev's refcount drops to zero so if something is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) * keeping a ref to a device (e.g. a filesystem) we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) * remove the old EEH state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) * FIXME: HEY MA, LOOK AT ME, NO LOCKING!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) if (edev->pdev && edev->pdev != dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) eeh_pe_tree_remove(edev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) eeh_addr_cache_rmv_dev(edev->pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) eeh_sysfs_remove_device(edev->pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) * We definitely should have the PCI device removed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) * though it wasn't correctly. So we needn't call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) * into error handler afterwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) edev->mode |= EEH_DEV_NO_HANDLER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) /* bind the pdev and the edev together */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) edev->pdev = dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) dev->dev.archdata.edev = edev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) eeh_addr_cache_insert_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) eeh_sysfs_add_device(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) * eeh_remove_device - Undo EEH setup for the indicated pci device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) * @dev: pci device to be removed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) * This routine should be called when a device is removed from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) * a running system (e.g. by hotplug or dlpar). It unregisters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) * the PCI device from the EEH subsystem. I/O errors affecting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) * this device will no longer be detected after this call; thus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) * i/o errors affecting this slot may leave this device unusable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) void eeh_remove_device(struct pci_dev *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) struct eeh_dev *edev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) if (!dev || !eeh_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) edev = pci_dev_to_eeh_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) /* Unregister the device with the EEH/PCI address search system */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) dev_dbg(&dev->dev, "EEH: Removing device\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) if (!edev || !edev->pdev || !edev->pe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) dev_dbg(&dev->dev, "EEH: Device not referenced!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) * During the hotplug for EEH error recovery, we need the EEH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) * device attached to the parent PE in order for BAR restore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) * a bit later. So we keep it for BAR restore and remove it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) * from the parent PE during the BAR resotre.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) edev->pdev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) * eeh_sysfs_remove_device() uses pci_dev_to_eeh_dev() so we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) * remove the sysfs files before clearing dev.archdata.edev
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) if (edev->mode & EEH_DEV_SYSFS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) eeh_sysfs_remove_device(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) * We're removing from the PCI subsystem, that means
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) * the PCI device driver can't support EEH or not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) * well. So we rely on hotplug completely to do recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) * for the specific PCI device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) edev->mode |= EEH_DEV_NO_HANDLER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) eeh_addr_cache_rmv_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) * The flag "in_error" is used to trace EEH devices for VFs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) * in error state or not. It's set in eeh_report_error(). If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) * it's not set, eeh_report_{reset,resume}() won't be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) * for the VF EEH device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) edev->in_error = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) dev->dev.archdata.edev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) if (!(edev->pe->state & EEH_PE_KEEP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) eeh_pe_tree_remove(edev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) edev->mode |= EEH_DEV_DISCONNECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) int eeh_unfreeze_pe(struct eeh_pe *pe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) __func__, ret, pe->phb->global_number, pe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) __func__, ret, pe->phb->global_number, pe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) static struct pci_device_id eeh_reset_ids[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) { 0 }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) static int eeh_pe_change_owner(struct eeh_pe *pe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) struct eeh_dev *edev, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) struct pci_dev *pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) struct pci_device_id *id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) /* Check PE state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) ret = eeh_ops->get_state(pe, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) /* Unfrozen PE, nothing to do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) if (eeh_state_active(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) /* Frozen PE, check if it needs PE level reset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) eeh_pe_for_each_dev(pe, edev, tmp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) pdev = eeh_dev_to_pci_dev(edev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) if (!pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) if (id->vendor != PCI_ANY_ID &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) id->vendor != pdev->vendor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) if (id->device != PCI_ANY_ID &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) id->device != pdev->device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) if (id->subvendor != PCI_ANY_ID &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) id->subvendor != pdev->subsystem_vendor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) if (id->subdevice != PCI_ANY_ID &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) id->subdevice != pdev->subsystem_device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) return eeh_pe_reset_and_recover(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) ret = eeh_unfreeze_pe(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) * eeh_dev_open - Increase count of pass through devices for PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) * @pdev: PCI device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) * Increase count of passed through devices for the indicated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) * PE. In the result, the EEH errors detected on the PE won't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) * reported. The PE owner will be responsible for detection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) * and recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) int eeh_dev_open(struct pci_dev *pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) struct eeh_dev *edev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) int ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) mutex_lock(&eeh_dev_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) /* No PCI device ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) if (!pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) /* No EEH device or PE ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) edev = pci_dev_to_eeh_dev(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) if (!edev || !edev->pe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) * The PE might have been put into frozen state, but we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) * didn't detect that yet. The passed through PCI devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) * in frozen PE won't work properly. Clear the frozen state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) * in advance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) ret = eeh_pe_change_owner(edev->pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) /* Increase PE's pass through count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) atomic_inc(&edev->pe->pass_dev_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) mutex_unlock(&eeh_dev_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) mutex_unlock(&eeh_dev_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) EXPORT_SYMBOL_GPL(eeh_dev_open);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) * eeh_dev_release - Decrease count of pass through devices for PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) * @pdev: PCI device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) * Decrease count of pass through devices for the indicated PE. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) * there is no passed through device in PE, the EEH errors detected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) * on the PE will be reported and handled as usual.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) void eeh_dev_release(struct pci_dev *pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) struct eeh_dev *edev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) mutex_lock(&eeh_dev_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) /* No PCI device ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) if (!pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) /* No EEH device ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) edev = pci_dev_to_eeh_dev(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) if (!edev || !edev->pe || !eeh_pe_passed(edev->pe))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) /* Decrease PE's pass through count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) eeh_pe_change_owner(edev->pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) mutex_unlock(&eeh_dev_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) EXPORT_SYMBOL(eeh_dev_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) #ifdef CONFIG_IOMMU_API
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) static int dev_has_iommu_table(struct device *dev, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) struct pci_dev *pdev = to_pci_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) struct pci_dev **ppdev = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) if (!dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) if (device_iommu_mapped(dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) *ppdev = pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) * @group: IOMMU group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) * The routine is called to convert IOMMU group to EEH PE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) struct pci_dev *pdev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) struct eeh_dev *edev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) /* No IOMMU group ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) if (!group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) if (!ret || !pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) /* No EEH device or PE ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) edev = pci_dev_to_eeh_dev(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) if (!edev || !edev->pe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) return edev->pe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) #endif /* CONFIG_IOMMU_API */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) * eeh_pe_set_option - Set options for the indicated PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) * @pe: EEH PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) * @option: requested option
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) * The routine is called to enable or disable EEH functionality
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) * on the indicated PE, to enable IO or DMA for the frozen PE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) int eeh_pe_set_option(struct eeh_pe *pe, int option)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) /* Invalid PE ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) if (!pe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) * EEH functionality could possibly be disabled, just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) * return error for the case. And the EEH functinality
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) * isn't expected to be disabled on one specific PE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) switch (option) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) case EEH_OPT_ENABLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) if (eeh_enabled()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) ret = eeh_pe_change_owner(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) case EEH_OPT_DISABLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) case EEH_OPT_THAW_MMIO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) case EEH_OPT_THAW_DMA:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) case EEH_OPT_FREEZE_PE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) if (!eeh_ops || !eeh_ops->set_option) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) ret = eeh_pci_enable(pe, option);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) pr_debug("%s: Option %d out of range (%d, %d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) EXPORT_SYMBOL_GPL(eeh_pe_set_option);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) * eeh_pe_get_state - Retrieve PE's state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) * @pe: EEH PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) * Retrieve the PE's state, which includes 3 aspects: enabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) * DMA, enabled IO and asserted reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) int eeh_pe_get_state(struct eeh_pe *pe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) int result, ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) bool rst_active, dma_en, mmio_en;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) /* Existing PE ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) if (!pe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) if (!eeh_ops || !eeh_ops->get_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) * If the parent PE is owned by the host kernel and is undergoing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) * error recovery, we should return the PE state as temporarily
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) * unavailable so that the error recovery on the guest is suspended
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) * until the recovery completes on the host.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) if (pe->parent &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) !(pe->state & EEH_PE_REMOVED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) return EEH_PE_STATE_UNAVAIL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) result = eeh_ops->get_state(pe, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) rst_active = !!(result & EEH_STATE_RESET_ACTIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) dma_en = !!(result & EEH_STATE_DMA_ENABLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) mmio_en = !!(result & EEH_STATE_MMIO_ENABLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) if (rst_active)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) ret = EEH_PE_STATE_RESET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) else if (dma_en && mmio_en)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) ret = EEH_PE_STATE_NORMAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) else if (!dma_en && !mmio_en)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) ret = EEH_PE_STATE_STOPPED_IO_DMA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) else if (!dma_en && mmio_en)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) ret = EEH_PE_STATE_STOPPED_DMA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) ret = EEH_PE_STATE_UNAVAIL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) EXPORT_SYMBOL_GPL(eeh_pe_get_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) struct eeh_dev *edev, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) struct pci_dev *pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) eeh_pe_restore_bars(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) * Reenable PCI devices as the devices passed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) * through are always enabled before the reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) eeh_pe_for_each_dev(pe, edev, tmp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) pdev = eeh_dev_to_pci_dev(edev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) if (!pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) ret = pci_reenable_device(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) pr_warn("%s: Failure %d reenabling %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) __func__, ret, pci_name(pdev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) /* The PE is still in frozen state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) if (include_passed || !eeh_pe_passed(pe)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) ret = eeh_unfreeze_pe(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) pe->phb->global_number, pe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) * eeh_pe_reset - Issue PE reset according to specified type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) * @pe: EEH PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) * @option: reset type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) * The routine is called to reset the specified PE with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) * indicated type, either fundamental reset or hot reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) * PE reset is the most important part for error recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) /* Invalid PE ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) if (!pe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) switch (option) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) case EEH_RESET_DEACTIVATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) ret = eeh_ops->reset(pe, option);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) ret = eeh_pe_reenable_devices(pe, include_passed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) case EEH_RESET_HOT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) case EEH_RESET_FUNDAMENTAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) * Proactively freeze the PE to drop all MMIO access
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) * during reset, which should be banned as it's always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) * cause recursive EEH error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) ret = eeh_ops->reset(pe, option);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) pr_debug("%s: Unsupported option %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) __func__, option);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) EXPORT_SYMBOL_GPL(eeh_pe_reset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) * eeh_pe_configure - Configure PCI bridges after PE reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) * @pe: EEH PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) * The routine is called to restore the PCI config space for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) * those PCI devices, especially PCI bridges affected by PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) * reset issued previously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) int eeh_pe_configure(struct eeh_pe *pe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) /* Invalid PE ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) if (!pe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) EXPORT_SYMBOL_GPL(eeh_pe_configure);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) * @pe: the indicated PE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) * @type: error type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) * @function: error function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) * @addr: address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) * @mask: address mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) * The routine is called to inject the specified PCI error, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) * is determined by @type and @function, to the indicated PE for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) * testing purpose.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) unsigned long addr, unsigned long mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) /* Invalid PE ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) if (!pe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) /* Unsupported operation ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) if (!eeh_ops || !eeh_ops->err_inject)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) /* Check on PCI error type */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) /* Check on PCI error function */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) return eeh_ops->err_inject(pe, type, func, addr, mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) static int proc_eeh_show(struct seq_file *m, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) if (!eeh_enabled()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) seq_printf(m, "EEH Subsystem is globally disabled\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) seq_printf(m, "EEH Subsystem is enabled\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) seq_printf(m,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) "no device=%llu\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) "no device node=%llu\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) "no config address=%llu\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) "check not wanted=%llu\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) "eeh_total_mmio_ffs=%llu\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) "eeh_false_positives=%llu\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) "eeh_slot_resets=%llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) eeh_stats.no_device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) eeh_stats.no_dn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) eeh_stats.no_cfg_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) eeh_stats.ignored_check,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) eeh_stats.total_mmio_ffs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) eeh_stats.false_positives,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) eeh_stats.slot_resets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) #ifdef CONFIG_DEBUG_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) static int eeh_enable_dbgfs_set(void *data, u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) if (val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) eeh_clear_flag(EEH_FORCE_DISABLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) eeh_add_flag(EEH_FORCE_DISABLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) static int eeh_enable_dbgfs_get(void *data, u64 *val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) if (eeh_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) *val = 0x1ul;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) *val = 0x0ul;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) eeh_enable_dbgfs_set, "0x%llx\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) static ssize_t eeh_force_recover_write(struct file *filp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) const char __user *user_buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) size_t count, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) struct pci_controller *hose;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) uint32_t phbid, pe_no;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) struct eeh_pe *pe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) char buf[20];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) * When PE is NULL the event is a "special" event. Rather than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) * recovering a specific PE it forces the EEH core to scan for failed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) * PHBs and recovers each. This needs to be done before any device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) * recoveries can occur.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) if (!strncmp(buf, "hwcheck", 7)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) __eeh_send_failure_event(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) ret = sscanf(buf, "%x:%x", &phbid, &pe_no);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) if (ret != 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) hose = pci_find_controller_for_domain(phbid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) if (!hose)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) /* Retrieve PE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) pe = eeh_pe_get(hose, pe_no);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) if (!pe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) * We don't do any state checking here since the detection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) * process is async to the recovery process. The recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) * thread *should* not break even if we schedule a recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) * from an odd state (e.g. PE removed, or recovery of a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) * non-isolated PE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) __eeh_send_failure_event(pe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) return ret < 0 ? ret : count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) static const struct file_operations eeh_force_recover_fops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) .open = simple_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) .llseek = no_llseek,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) .write = eeh_force_recover_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) static ssize_t eeh_debugfs_dev_usage(struct file *filp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) char __user *user_buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) size_t count, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) static const char usage[] = "input format: <domain>:<bus>:<dev>.<fn>\n";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) return simple_read_from_buffer(user_buf, count, ppos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) usage, sizeof(usage) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) static ssize_t eeh_dev_check_write(struct file *filp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) const char __user *user_buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) size_t count, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) uint32_t domain, bus, dev, fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) struct pci_dev *pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) struct eeh_dev *edev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) char buf[20];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) memset(buf, 0, sizeof(buf));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) if (ret != 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) pr_err("%s: expected 4 args, got %d\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) if (!pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) edev = pci_dev_to_eeh_dev(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) if (!edev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) pci_err(pdev, "No eeh_dev for this device!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) pci_dev_put(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) ret = eeh_dev_check_failure(edev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) domain, bus, dev, fn, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) pci_dev_put(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) static const struct file_operations eeh_dev_check_fops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) .open = simple_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) .llseek = no_llseek,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) .write = eeh_dev_check_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) .read = eeh_debugfs_dev_usage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) static int eeh_debugfs_break_device(struct pci_dev *pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) struct resource *bar = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) void __iomem *mapped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) u16 old, bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) int i, pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) /* Do we have an MMIO BAR to disable? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) struct resource *r = &pdev->resource[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) if (!r->flags || !r->start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) if (r->flags & IORESOURCE_IO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) if (r->flags & IORESOURCE_UNSET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) bar = r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) if (!bar) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) pci_err(pdev, "Going to break: %pR\n", bar);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) if (pdev->is_virtfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) #ifndef CONFIG_PCI_IOV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) * VFs don't have a per-function COMMAND register, so the best
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) * we can do is clear the Memory Space Enable bit in the PF's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) * SRIOV control reg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) * Unfortunately, this requires that we have a PF (i.e doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) * work for a passed-through VF) and it has the potential side
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) * effect of also causing an EEH on every other VF under the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) * PF. Oh well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) pdev = pdev->physfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) if (!pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) return -ENXIO; /* passed through VFs have no PF */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) pos += PCI_SRIOV_CTRL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) bit = PCI_SRIOV_CTRL_MSE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) #endif /* !CONFIG_PCI_IOV */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) bit = PCI_COMMAND_MEMORY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) pos = PCI_COMMAND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) * Process here is:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) * 1. Disable Memory space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) * 2. Perform an MMIO to the device. This should result in an error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) * (CA / UR) being raised by the device which results in an EEH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) * PE freeze. Using the in_8() accessor skips the eeh detection hook
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) * so the freeze hook so the EEH Detection machinery won't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) * triggered here. This is to match the usual behaviour of EEH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) * where the HW will asyncronously freeze a PE and it's up to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) * the kernel to notice and deal with it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) * 3. Turn Memory space back on. This is more important for VFs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) * since recovery will probably fail if we don't. For normal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) * the COMMAND register is reset as a part of re-initialising
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) * the device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) * Breaking stuff is the point so who cares if it's racy ;)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) pci_read_config_word(pdev, pos, &old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) mapped = ioremap(bar->start, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) if (!mapped) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) pci_write_config_word(pdev, pos, old & ~bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) in_8(mapped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) pci_write_config_word(pdev, pos, old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) iounmap(mapped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) static ssize_t eeh_dev_break_write(struct file *filp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) const char __user *user_buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) size_t count, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) uint32_t domain, bus, dev, fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) struct pci_dev *pdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) char buf[20];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) memset(buf, 0, sizeof(buf));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) if (ret != 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) pr_err("%s: expected 4 args, got %d\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) if (!pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) ret = eeh_debugfs_break_device(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) pci_dev_put(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) static const struct file_operations eeh_dev_break_fops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) .open = simple_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) .llseek = no_llseek,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) .write = eeh_dev_break_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) .read = eeh_debugfs_dev_usage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) static int __init eeh_init_proc(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) if (machine_is(pseries) || machine_is(powernv)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) #ifdef CONFIG_DEBUG_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) debugfs_create_file_unsafe("eeh_enable", 0600,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) powerpc_debugfs_root, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) &eeh_enable_dbgfs_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) debugfs_create_u32("eeh_max_freezes", 0600,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) powerpc_debugfs_root, &eeh_max_freezes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) debugfs_create_bool("eeh_disable_recovery", 0600,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) powerpc_debugfs_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) &eeh_debugfs_no_recover);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) debugfs_create_file_unsafe("eeh_dev_check", 0600,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) powerpc_debugfs_root, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) &eeh_dev_check_fops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) debugfs_create_file_unsafe("eeh_dev_break", 0600,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) powerpc_debugfs_root, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) &eeh_dev_break_fops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) debugfs_create_file_unsafe("eeh_force_recover", 0600,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) powerpc_debugfs_root, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) &eeh_force_recover_fops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) eeh_cache_debugfs_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) __initcall(eeh_init_proc);