^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * GHES/EDAC Linux driver
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (c) 2013 by Mauro Carvalho Chehab
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Red Hat Inc. https://www.redhat.com
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <acpi/ghes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/edac.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/dmi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "edac_module.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <ras/ras_event.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) struct ghes_pvt {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) struct mem_ctl_info *mci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) /* Buffers for the error handling routine */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) char other_detail[400];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) char msg[80];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) static refcount_t ghes_refcount = REFCOUNT_INIT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * Access to ghes_pvt must be protected by ghes_lock. The spinlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * also provides the necessary (implicit) memory barrier for the SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * case to make the pointer visible on another CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) static struct ghes_pvt *ghes_pvt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * This driver's representation of the system hardware, as collected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * from DMI.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) struct ghes_hw_desc {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) int num_dimms;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) struct dimm_info *dimms;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) } ghes_hw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) /* GHES registration mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) static DEFINE_MUTEX(ghes_reg_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * Sync with other, potentially concurrent callers of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * ghes_edac_report_mem_error(). We don't know what the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * "inventive" firmware would do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) static DEFINE_SPINLOCK(ghes_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /* "ghes_edac.force_load=1" skips the platform check */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) static bool __read_mostly force_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) module_param(force_load, bool, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) static bool system_scanned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) /* Memory Device - Type 17 of SMBIOS spec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) struct memdev_dmi_entry {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) u8 type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) u8 length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) u16 handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) u16 phys_mem_array_handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) u16 mem_err_info_handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) u16 total_width;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) u16 data_width;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) u16 size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) u8 form_factor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) u8 device_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) u8 device_locator;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) u8 bank_locator;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) u8 memory_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) u16 type_detail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) u16 speed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) u8 manufacturer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) u8 serial_number;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) u8 asset_tag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) u8 part_number;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) u8 attributes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) u32 extended_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) u16 conf_mem_clk_speed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) } __attribute__((__packed__));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) static struct dimm_info *find_dimm_by_handle(struct mem_ctl_info *mci, u16 handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) struct dimm_info *dimm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) mci_for_each_dimm(mci, dimm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) if (dimm->smbios_handle == handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) return dimm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) static void dimm_setup_label(struct dimm_info *dimm, u16 handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) const char *bank = NULL, *device = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) dmi_memdev_name(handle, &bank, &device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) /* both strings must be non-zero */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) if (bank && *bank && device && *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) u16 rdr_mask = BIT(7) | BIT(13);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) if (entry->size == 0xffff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) pr_info("Can't get DIMM%i size\n", dimm->idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) } else if (entry->size == 0x7fff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) if (entry->size & BIT(15))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) dimm->nr_pages = MiB_TO_PAGES((entry->size & 0x7fff) << 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) dimm->nr_pages = MiB_TO_PAGES(entry->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) switch (entry->memory_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) case 0x12:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) if (entry->type_detail & BIT(13))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) dimm->mtype = MEM_RDDR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) dimm->mtype = MEM_DDR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) case 0x13:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) if (entry->type_detail & BIT(13))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) dimm->mtype = MEM_RDDR2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) dimm->mtype = MEM_DDR2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) case 0x14:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) dimm->mtype = MEM_FB_DDR2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) case 0x18:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) if (entry->type_detail & BIT(12))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) dimm->mtype = MEM_NVDIMM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) else if (entry->type_detail & BIT(13))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) dimm->mtype = MEM_RDDR3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) dimm->mtype = MEM_DDR3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) case 0x1a:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) if (entry->type_detail & BIT(12))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) dimm->mtype = MEM_NVDIMM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) else if (entry->type_detail & BIT(13))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) dimm->mtype = MEM_RDDR4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) dimm->mtype = MEM_DDR4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) if (entry->type_detail & BIT(6))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) dimm->mtype = MEM_RMBS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) else if ((entry->type_detail & rdr_mask) == rdr_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) dimm->mtype = MEM_RDR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) else if (entry->type_detail & BIT(7))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) dimm->mtype = MEM_SDR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) else if (entry->type_detail & BIT(9))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) dimm->mtype = MEM_EDO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) dimm->mtype = MEM_UNKNOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * Actually, we can only detect if the memory has bits for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * checksum or not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) if (entry->total_width == entry->data_width)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) dimm->edac_mode = EDAC_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) dimm->edac_mode = EDAC_SECDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) dimm->dtype = DEV_UNKNOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) dimm->grain = 128; /* Likely, worse case */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) dimm_setup_label(dimm, entry->handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) if (dimm->nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) edac_dbg(1, "DIMM%i: %s size = %d MB%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) dimm->idx, edac_mem_types[dimm->mtype],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) PAGES_TO_MiB(dimm->nr_pages),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) entry->memory_type, entry->type_detail,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) entry->total_width, entry->data_width);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) dimm->smbios_handle = entry->handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) static void enumerate_dimms(const struct dmi_header *dh, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) struct ghes_hw_desc *hw = (struct ghes_hw_desc *)arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) struct dimm_info *d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) if (dh->type != DMI_ENTRY_MEM_DEVICE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) /* Enlarge the array with additional 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) if (!hw->num_dimms || !(hw->num_dimms % 16)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) struct dimm_info *new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) new = krealloc(hw->dimms, (hw->num_dimms + 16) * sizeof(struct dimm_info),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) if (!new) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) hw->dimms = new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) d = &hw->dimms[hw->num_dimms];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) d->idx = hw->num_dimms;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) assign_dmi_dimm_info(d, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) hw->num_dimms++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) static void ghes_scan_system(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if (system_scanned)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) dmi_walk(enumerate_dimms, &ghes_hw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) system_scanned = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) struct edac_raw_error_desc *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) struct mem_ctl_info *mci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) struct ghes_pvt *pvt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) char *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * We can do the locking below because GHES defers error processing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * from NMI to IRQ context. Whenever that changes, we'd at least
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * know.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) if (WARN_ON_ONCE(in_nmi()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) spin_lock_irqsave(&ghes_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) pvt = ghes_pvt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) if (!pvt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) mci = pvt->mci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) e = &mci->error_desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) /* Cleans the error report buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) memset(e, 0, sizeof (*e));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) e->error_count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) e->grain = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) e->msg = pvt->msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) e->other_detail = pvt->other_detail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) e->top_layer = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) e->mid_layer = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) e->low_layer = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) *pvt->other_detail = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) *pvt->msg = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) switch (sev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) case GHES_SEV_CORRECTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) e->type = HW_EVENT_ERR_CORRECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) case GHES_SEV_RECOVERABLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) e->type = HW_EVENT_ERR_UNCORRECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) case GHES_SEV_PANIC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) e->type = HW_EVENT_ERR_FATAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) case GHES_SEV_NO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) e->type = HW_EVENT_ERR_INFO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) edac_dbg(1, "error validation_bits: 0x%08llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) (long long)mem_err->validation_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) /* Error type, mapped on e->msg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) p = pvt->msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) switch (mem_err->error_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) case 0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) p += sprintf(p, "Unknown");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) case 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) p += sprintf(p, "No error");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) case 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) p += sprintf(p, "Single-bit ECC");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) case 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) p += sprintf(p, "Multi-bit ECC");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) case 4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) p += sprintf(p, "Single-symbol ChipKill ECC");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) case 5:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) p += sprintf(p, "Multi-symbol ChipKill ECC");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) case 6:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) p += sprintf(p, "Master abort");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) case 7:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) p += sprintf(p, "Target abort");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) case 8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) p += sprintf(p, "Parity Error");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) case 9:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) p += sprintf(p, "Watchdog timeout");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) case 10:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) p += sprintf(p, "Invalid address");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) case 11:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) p += sprintf(p, "Mirror Broken");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) case 12:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) p += sprintf(p, "Memory Sparing");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) case 13:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) p += sprintf(p, "Scrub corrected error");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) case 14:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) p += sprintf(p, "Scrub uncorrected error");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) case 15:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) p += sprintf(p, "Physical Memory Map-out event");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) p += sprintf(p, "reserved error (%d)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) mem_err->error_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) strcpy(pvt->msg, "unknown error");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) /* Error address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) if (mem_err->validation_bits & CPER_MEM_VALID_PA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) e->page_frame_number = PHYS_PFN(mem_err->physical_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) e->offset_in_page = offset_in_page(mem_err->physical_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) /* Error grain */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) e->grain = ~mem_err->physical_addr_mask + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) /* Memory error location, mapped on e->location */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) p = e->location;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) if (mem_err->validation_bits & CPER_MEM_VALID_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) p += sprintf(p, "node:%d ", mem_err->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) if (mem_err->validation_bits & CPER_MEM_VALID_CARD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) p += sprintf(p, "card:%d ", mem_err->card);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) if (mem_err->validation_bits & CPER_MEM_VALID_MODULE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) p += sprintf(p, "module:%d ", mem_err->module);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) if (mem_err->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) p += sprintf(p, "rank:%d ", mem_err->rank);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) p += sprintf(p, "bank:%d ", mem_err->bank);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) if (mem_err->validation_bits & CPER_MEM_VALID_BANK_GROUP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) p += sprintf(p, "bank_group:%d ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) mem_err->bank >> CPER_MEM_BANK_GROUP_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) if (mem_err->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) p += sprintf(p, "bank_address:%d ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) mem_err->bank & CPER_MEM_BANK_ADDRESS_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) u32 row = mem_err->row;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) row |= cper_get_mem_extension(mem_err->validation_bits, mem_err->extended);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) p += sprintf(p, "row:%d ", row);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) p += sprintf(p, "col:%d ", mem_err->column);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) const char *bank = NULL, *device = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) struct dimm_info *dimm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) if (bank != NULL && device != NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) p += sprintf(p, "DIMM location:%s %s ", bank, device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) mem_err->mem_dev_handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) if (dimm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) e->top_layer = dimm->idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) strcpy(e->label, dimm->label);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) if (mem_err->validation_bits & CPER_MEM_VALID_CHIP_ID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) p += sprintf(p, "chipID: %d ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) mem_err->extended >> CPER_MEM_CHIP_ID_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) if (p > e->location)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) *(p - 1) = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) if (!*e->label)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) strcpy(e->label, "unknown memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) /* All other fields are mapped on e->other_detail */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) p = pvt->other_detail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) p += snprintf(p, sizeof(pvt->other_detail),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) "APEI location: %s ", e->location);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) u64 status = mem_err->error_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) p += sprintf(p, "status(0x%016llx): ", (long long)status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) switch ((status >> 8) & 0xff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) case 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) p += sprintf(p, "Error detected internal to the component ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) case 16:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) p += sprintf(p, "Error detected in the bus ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) case 4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) p += sprintf(p, "Storage error in DRAM memory ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) case 5:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) p += sprintf(p, "Storage error in TLB ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) case 6:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) p += sprintf(p, "Storage error in cache ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) case 7:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) p += sprintf(p, "Error in one or more functional units ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) case 8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) p += sprintf(p, "component failed self test ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) case 9:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) p += sprintf(p, "Overflow or undervalue of internal queue ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) case 17:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) case 18:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) p += sprintf(p, "Improper access error ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) case 19:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) p += sprintf(p, "Access to a memory address which is not mapped to any component ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) case 20:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) p += sprintf(p, "Loss of Lockstep ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) case 21:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) p += sprintf(p, "Response not associated with a request ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) case 22:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) case 23:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) p += sprintf(p, "Detection of a PATH_ERROR ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) case 25:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) p += sprintf(p, "Bus operation timeout ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) case 26:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) p += sprintf(p, "A read was issued to data that has been poisoned ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) p += sprintf(p, "reserved ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) p += sprintf(p, "requestorID: 0x%016llx ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) (long long)mem_err->requestor_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) p += sprintf(p, "responderID: 0x%016llx ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) (long long)mem_err->responder_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) p += sprintf(p, "targetID: 0x%016llx ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) (long long)mem_err->responder_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) if (p > pvt->other_detail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) *(p - 1) = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) edac_raw_mc_handle_error(e);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) spin_unlock_irqrestore(&ghes_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * Known systems that are safe to enable this module.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) static struct acpi_platform_list plat_list[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) {"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) { } /* End */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) int ghes_edac_register(struct ghes *ghes, struct device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) bool fake = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) struct mem_ctl_info *mci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) struct ghes_pvt *pvt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) struct edac_mc_layer layers[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) int idx = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) int rc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) if (IS_ENABLED(CONFIG_X86)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) /* Check if safe to enable on this system */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) idx = acpi_match_platform_list(plat_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) if (!force_load && idx < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) force_load = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) /* finish another registration/unregistration instance first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) mutex_lock(&ghes_reg_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * We have only one logical memory controller to which all DIMMs belong.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) if (refcount_inc_not_zero(&ghes_refcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) ghes_scan_system();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) /* Check if we've got a bogus BIOS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) if (!ghes_hw.num_dimms) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) fake = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) ghes_hw.num_dimms = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) layers[0].type = EDAC_MC_LAYER_ALL_MEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) layers[0].size = ghes_hw.num_dimms;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) layers[0].is_virt_csrow = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_pvt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) if (!mci) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) pr_info("Can't allocate memory for EDAC data\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) rc = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) pvt = mci->pvt_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) pvt->mci = mci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) mci->pdev = dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) mci->mtype_cap = MEM_FLAG_EMPTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) mci->edac_ctl_cap = EDAC_FLAG_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) mci->edac_cap = EDAC_FLAG_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) mci->mod_name = "ghes_edac.c";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) mci->ctl_name = "ghes_edac";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) mci->dev_name = "ghes";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) if (fake) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) pr_info("work on such system. Use this driver with caution\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) } else if (idx < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) pr_info("So, the end result of using this driver varies from vendor to vendor.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) pr_info("If you find incorrect reports, please contact your hardware vendor\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) pr_info("to correct its BIOS.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) pr_info("This system has %d DIMM sockets.\n", ghes_hw.num_dimms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) if (!fake) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) struct dimm_info *src, *dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) mci_for_each_dimm(mci, dst) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) src = &ghes_hw.dimms[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) dst->idx = src->idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) dst->smbios_handle = src->smbios_handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) dst->nr_pages = src->nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) dst->mtype = src->mtype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) dst->edac_mode = src->edac_mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) dst->dtype = src->dtype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) dst->grain = src->grain;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) * If no src->label, preserve default label assigned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) * from EDAC core.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) if (strlen(src->label))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) memcpy(dst->label, src->label, sizeof(src->label));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) struct dimm_info *dimm = edac_get_dimm(mci, 0, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) dimm->nr_pages = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) dimm->grain = 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) dimm->mtype = MEM_UNKNOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) dimm->dtype = DEV_UNKNOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) dimm->edac_mode = EDAC_SECDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) rc = edac_mc_add_mc(mci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) if (rc < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) pr_info("Can't register with the EDAC core\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) edac_mc_free(mci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) rc = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) spin_lock_irqsave(&ghes_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) ghes_pvt = pvt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) spin_unlock_irqrestore(&ghes_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) /* only set on success */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) refcount_set(&ghes_refcount, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) /* Not needed anymore */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) kfree(ghes_hw.dimms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) ghes_hw.dimms = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) mutex_unlock(&ghes_reg_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) void ghes_edac_unregister(struct ghes *ghes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) struct mem_ctl_info *mci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) if (!force_load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) mutex_lock(&ghes_reg_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) system_scanned = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) memset(&ghes_hw, 0, sizeof(struct ghes_hw_desc));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) if (!refcount_dec_and_test(&ghes_refcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) * Wait for the irq handler being finished.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) spin_lock_irqsave(&ghes_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) mci = ghes_pvt ? ghes_pvt->mci : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) ghes_pvt = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) spin_unlock_irqrestore(&ghes_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) if (!mci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) mci = edac_mc_del_mc(mci->pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) if (mci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) edac_mc_free(mci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) mutex_unlock(&ghes_reg_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) }