^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * This file implements the error recovery as a core part of PCIe error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * reporting. When a PCIe error is delivered, an error message will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * collected and printed to console, then, an error recovery procedure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * will be executed by following the PCI error recovery rules.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Copyright (C) 2006 Intel Corp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Tom Long Nguyen (tom.l.nguyen@intel.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Zhang Yanmin (yanmin.zhang@intel.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #define dev_fmt(fmt) "AER: " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/pci.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/aer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include "portdrv.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include "../pci.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) static pci_ers_result_t merge_result(enum pci_ers_result orig,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) enum pci_ers_result new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) return PCI_ERS_RESULT_NO_AER_DRIVER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) if (new == PCI_ERS_RESULT_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) return orig;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) switch (orig) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) case PCI_ERS_RESULT_CAN_RECOVER:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) case PCI_ERS_RESULT_RECOVERED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) orig = new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) case PCI_ERS_RESULT_DISCONNECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) if (new == PCI_ERS_RESULT_NEED_RESET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) orig = PCI_ERS_RESULT_NEED_RESET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) return orig;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) static int report_error_detected(struct pci_dev *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) pci_channel_state_t state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) enum pci_ers_result *result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) pci_ers_result_t vote;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) const struct pci_error_handlers *err_handler;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) device_lock(&dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) if (!pci_dev_set_io_state(dev, state) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) !dev->driver ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) !dev->driver->err_handler ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) !dev->driver->err_handler->error_detected) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * If any device in the subtree does not have an error_detected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * error callbacks of "any" device in the subtree, and will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * exit in the disconnected error state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) vote = PCI_ERS_RESULT_NO_AER_DRIVER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) pci_info(dev, "can't recover (no error_detected callback)\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) vote = PCI_ERS_RESULT_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) err_handler = dev->driver->err_handler;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) vote = err_handler->error_detected(dev, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) pci_uevent_ers(dev, vote);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) *result = merge_result(*result, vote);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) device_unlock(&dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) static int report_frozen_detected(struct pci_dev *dev, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) return report_error_detected(dev, pci_channel_io_frozen, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) static int report_normal_detected(struct pci_dev *dev, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) return report_error_detected(dev, pci_channel_io_normal, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) static int report_mmio_enabled(struct pci_dev *dev, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) pci_ers_result_t vote, *result = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) const struct pci_error_handlers *err_handler;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) device_lock(&dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) if (!dev->driver ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) !dev->driver->err_handler ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) !dev->driver->err_handler->mmio_enabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) err_handler = dev->driver->err_handler;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) vote = err_handler->mmio_enabled(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) *result = merge_result(*result, vote);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) device_unlock(&dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) static int report_slot_reset(struct pci_dev *dev, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) pci_ers_result_t vote, *result = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) const struct pci_error_handlers *err_handler;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) device_lock(&dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) if (!dev->driver ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) !dev->driver->err_handler ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) !dev->driver->err_handler->slot_reset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) err_handler = dev->driver->err_handler;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) vote = err_handler->slot_reset(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) *result = merge_result(*result, vote);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) device_unlock(&dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) static int report_resume(struct pci_dev *dev, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) const struct pci_error_handlers *err_handler;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) device_lock(&dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) if (!pci_dev_set_io_state(dev, pci_channel_io_normal) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) !dev->driver ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) !dev->driver->err_handler ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) !dev->driver->err_handler->resume)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) err_handler = dev->driver->err_handler;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) err_handler->resume(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) device_unlock(&dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) pci_channel_state_t state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) pci_ers_result_t (*reset_link)(struct pci_dev *pdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) struct pci_bus *bus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * Error recovery runs on all subordinates of the first downstream port.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * If the downstream port detected the error, it is cleared at the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) dev = dev->bus->self;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) bus = dev->subordinate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) pci_dbg(dev, "broadcast error_detected message\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) if (state == pci_channel_io_frozen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) pci_walk_bus(bus, report_frozen_detected, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) status = reset_link(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) if (status != PCI_ERS_RESULT_RECOVERED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) pci_warn(dev, "link reset failed\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) pci_walk_bus(bus, report_normal_detected, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) if (status == PCI_ERS_RESULT_CAN_RECOVER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) status = PCI_ERS_RESULT_RECOVERED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) pci_dbg(dev, "broadcast mmio_enabled message\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) pci_walk_bus(bus, report_mmio_enabled, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) if (status == PCI_ERS_RESULT_NEED_RESET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * TODO: Should call platform-specific
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * functions to reset slot before calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * drivers' slot_reset callbacks?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) status = PCI_ERS_RESULT_RECOVERED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) pci_dbg(dev, "broadcast slot_reset message\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) pci_walk_bus(bus, report_slot_reset, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) if (status != PCI_ERS_RESULT_RECOVERED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) pci_dbg(dev, "broadcast resume message\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) pci_walk_bus(bus, report_resume, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) if (pcie_aer_is_native(dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) pcie_clear_device_status(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) pci_aer_clear_nonfatal_status(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) pci_info(dev, "device recovery successful\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) failed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) /* TODO: Should kernel panic here? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) pci_info(dev, "device recovery failed\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) }