^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * File: mca_drv.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Purpose: Generic MCA handling layer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright (C) 2004 FUJITSU LIMITED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Copyright (C) 2005 Silicon Graphics, Inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Copyright (C) 2006 Russ Anderson <rja@sgi.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/interrupt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/irq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/kallsyms.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/memblock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/acpi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/timer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/smp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/workqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <asm/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <asm/page.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <asm/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <asm/sal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <asm/mca.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <asm/irq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <asm/hw_irq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include "mca_drv.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) /* max size of SAL error record (default) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) static int sal_rec_max = 10000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) /* from mca_drv_asm.S */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) extern void *mca_handler_bhhook(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) static DEFINE_SPINLOCK(mca_bh_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) typedef enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) MCA_IS_LOCAL = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) MCA_IS_GLOBAL = 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) } mca_type_t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #define MAX_PAGE_ISOLATE 1024
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) static struct page *page_isolate[MAX_PAGE_ISOLATE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) static int num_page_isolate = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) typedef enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) ISOLATE_NG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) ISOLATE_OK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) ISOLATE_NONE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) } isolate_status_t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) typedef enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) MCA_NOT_RECOVERED = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) MCA_RECOVERED = 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) } recovery_status_t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * This pool keeps pointers to the section part of SAL error record
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) static struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) slidx_list_t *buffer; /* section pointer list pool */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) int cur_idx; /* Current index of section pointer list pool */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) int max_idx; /* Maximum index of section pointer list pool */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) } slidx_pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) fatal_mca(const char *fmt, ...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) va_list args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) char buf[256];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) va_start(args, fmt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) vsnprintf(buf, sizeof(buf), fmt, args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) va_end(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) return MCA_NOT_RECOVERED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) mca_recovered(const char *fmt, ...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) va_list args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) char buf[256];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) va_start(args, fmt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) vsnprintf(buf, sizeof(buf), fmt, args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) va_end(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) ia64_mca_printk(KERN_INFO "MCA: %s\n", buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) return MCA_RECOVERED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * mca_page_isolate - isolate a poisoned page in order not to use it later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * @paddr: poisoned memory location
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * Return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * one of isolate_status_t, ISOLATE_OK/NG/NONE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) static isolate_status_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) mca_page_isolate(unsigned long paddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) struct page *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) /* whether physical address is valid or not */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) if (!ia64_phys_addr_valid(paddr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) return ISOLATE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) if (!pfn_valid(paddr >> PAGE_SHIFT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) return ISOLATE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) /* convert physical address to physical page number */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) p = pfn_to_page(paddr>>PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) /* check whether a page number have been already registered or not */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) for (i = 0; i < num_page_isolate; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) if (page_isolate[i] == p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) return ISOLATE_OK; /* already listed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /* limitation check */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) if (num_page_isolate == MAX_PAGE_ISOLATE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) return ISOLATE_NG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) /* kick pages having attribute 'SLAB' or 'Reserved' */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) if (PageSlab(p) || PageReserved(p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) return ISOLATE_NG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) /* add attribute 'Reserved' and register the page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) get_page(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) SetPageReserved(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) page_isolate[num_page_isolate++] = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) return ISOLATE_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * mca_hanlder_bh - Kill the process which occurred memory read error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * @paddr: poisoned address received from MCA Handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) ia64_mlogbuf_dump();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) raw_smp_processor_id(), current->pid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) from_kuid(&init_user_ns, current_uid()),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) iip, ipsr, paddr, current->comm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) spin_lock(&mca_bh_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) switch (mca_page_isolate(paddr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) case ISOLATE_OK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) case ISOLATE_NG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) printk(KERN_CRIT "Page isolation: ( %lx ) failure.\n", paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) spin_unlock(&mca_bh_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) /* This process is about to be killed itself */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) do_exit(SIGKILL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * mca_make_peidx - Make index of processor error section
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * @slpi: pointer to record of processor error section
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * @peidx: pointer to index of processor error section
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) * calculate the start address of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) * "struct cpuid_info" and "sal_processor_static_info_t".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) u64 total_check_num = slpi->valid.num_cache_check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) + slpi->valid.num_tlb_check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) + slpi->valid.num_bus_check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) + slpi->valid.num_reg_file_check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) + slpi->valid.num_ms_check;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) u64 head_size = sizeof(sal_log_mod_error_info_t) * total_check_num
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) + sizeof(sal_log_processor_info_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) u64 mid_size = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) peidx_head(peidx) = slpi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) peidx_mid(peidx) = (struct sal_cpuid_info *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) (slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) peidx_bottom(peidx) = (sal_processor_static_info_t *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) (slpi->valid.psi_static_struct ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) ((char*)slpi + head_size + mid_size) : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) * mca_make_slidx - Make index of SAL error record
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * @buffer: pointer to SAL error record
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * @slidx: pointer to index of SAL error record
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * Return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * 1 if record has platform error / 0 if not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) #define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) {slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) hl->hdr = ptr; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) list_add(&hl->list, &(sect)); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) mca_make_slidx(void *buffer, slidx_table_t *slidx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) int platform_err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) int record_len = ((sal_log_record_header_t*)buffer)->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) u32 ercd_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) int sects;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) sal_log_section_hdr_t *sp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * Initialize index referring current record
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) INIT_LIST_HEAD(&(slidx->proc_err));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) INIT_LIST_HEAD(&(slidx->mem_dev_err));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) INIT_LIST_HEAD(&(slidx->sel_dev_err));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) INIT_LIST_HEAD(&(slidx->pci_bus_err));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) INIT_LIST_HEAD(&(slidx->smbios_dev_err));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) INIT_LIST_HEAD(&(slidx->pci_comp_err));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) INIT_LIST_HEAD(&(slidx->plat_specific_err));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) INIT_LIST_HEAD(&(slidx->host_ctlr_err));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) INIT_LIST_HEAD(&(slidx->plat_bus_err));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) INIT_LIST_HEAD(&(slidx->unsupported));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * Extract a Record Header
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) slidx->header = buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * Extract each section records
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * (arranged from "int ia64_log_platform_info_print()")
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) ercd_pos < record_len; ercd_pos += sp->len, sects++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) } else if (!efi_guidcmp(sp->guid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) platform_err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) } else if (!efi_guidcmp(sp->guid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) platform_err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) } else if (!efi_guidcmp(sp->guid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) platform_err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) } else if (!efi_guidcmp(sp->guid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) platform_err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) } else if (!efi_guidcmp(sp->guid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) platform_err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) } else if (!efi_guidcmp(sp->guid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) platform_err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) } else if (!efi_guidcmp(sp->guid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) platform_err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) } else if (!efi_guidcmp(sp->guid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) SAL_PLAT_BUS_ERR_SECT_GUID)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) platform_err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) slidx->n_sections = sects;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) return platform_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * init_record_index_pools - Initialize pool of lists for SAL record index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) * Return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) * 0 on Success / -ENOMEM on Failure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) init_record_index_pools(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) int rec_max_size; /* Maximum size of SAL error records */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) int sect_min_size; /* Minimum size of SAL error sections */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) /* minimum size table of each section */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) static int sal_log_sect_min_sizes[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) sizeof(sal_log_processor_info_t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) + sizeof(sal_processor_static_info_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) sizeof(sal_log_mem_dev_err_info_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) sizeof(sal_log_sel_dev_err_info_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) sizeof(sal_log_pci_bus_err_info_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) sizeof(sal_log_smbios_dev_err_info_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) sizeof(sal_log_pci_comp_err_info_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) sizeof(sal_log_plat_specific_err_info_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) sizeof(sal_log_host_ctlr_err_info_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) sizeof(sal_log_plat_bus_err_info_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) * MCA handler cannot allocate new memory on flight,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) * so we preallocate enough memory to handle a SAL record.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) * Initialize a handling set of slidx_pool:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) * 1. Pick up the max size of SAL error records
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * 2. Pick up the min size of SAL error sections
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * 3. Allocate the pool as enough to 2 SAL records
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * (now we can estimate the maxinum of section in a record.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) /* - 1 - */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) rec_max_size = sal_rec_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) /* - 2 - */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) sect_min_size = sal_log_sect_min_sizes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) for (i = 1; i < ARRAY_SIZE(sal_log_sect_min_sizes); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) if (sect_min_size > sal_log_sect_min_sizes[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) sect_min_size = sal_log_sect_min_sizes[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) /* - 3 - */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) slidx_pool.buffer =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) kmalloc_array(slidx_pool.max_idx, sizeof(slidx_list_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) return slidx_pool.buffer ? 0 : -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) /*****************************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) * Recovery functions *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) *****************************************************************************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) * is_mca_global - Check whether this MCA is global or not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) * @peidx: pointer of index of processor error section
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) * @pbci: pointer to pal_bus_check_info_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) * @sos: pointer to hand off struct between SAL and OS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) * Return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) * MCA_IS_LOCAL / MCA_IS_GLOBAL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) static mca_type_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) struct ia64_sal_os_state *sos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) pal_processor_state_info_t *psp =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) (pal_processor_state_info_t*)peidx_psp(peidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) * PAL can request a rendezvous, if the MCA has a global scope.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) * If "rz_always" flag is set, SAL requests MCA rendezvous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) * in spite of global MCA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) * Therefore it is local MCA when rendezvous has not been requested.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) * Failed to rendezvous, the system must be down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) switch (sos->rv_rc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) case -1: /* SAL rendezvous unsuccessful */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) return MCA_IS_GLOBAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) case 0: /* SAL rendezvous not required */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) return MCA_IS_LOCAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) case 1: /* SAL rendezvous successful int */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) case 2: /* SAL rendezvous successful int with init */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) * If One or more Cache/TLB/Reg_File/Uarch_Check is here,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * it would be a local MCA. (i.e. processor internal error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) if (psp->tc || psp->cc || psp->rc || psp->uc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) return MCA_IS_LOCAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) * Bus_Check structure with Bus_Check.ib (internal bus error) flag set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) * would be a global MCA. (e.g. a system bus address parity error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) if (!pbci || pbci->ib)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) return MCA_IS_GLOBAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) * Bus_Check structure with Bus_Check.eb (external bus error) flag set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * could be either a local MCA or a global MCA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * Referring Bus_Check.bsi:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) * 0: Unknown/unclassified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) * 1: BERR#
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) * 2: BINIT#
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) * 3: Hard Fail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) * (FIXME: Are these SGI specific or generic bsi values?)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) if (pbci->eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) switch (pbci->bsi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) case 0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) /* e.g. a load from poisoned memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) return MCA_IS_LOCAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) case 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) case 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) case 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) return MCA_IS_GLOBAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) return MCA_IS_GLOBAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * get_target_identifier - Get the valid Cache or Bus check target identifier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) * @peidx: pointer of index of processor error section
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) * Return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) * target address on Success / 0 on Failure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) static u64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) get_target_identifier(peidx_table_t *peidx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) u64 target_address = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) sal_log_mod_error_info_t *smei;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) pal_cache_check_info_t *pcci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) int i, level = 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * Look through the cache checks for a valid target identifier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * If more than one valid target identifier, return the one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * with the lowest cache level.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) for (i = 0; i < peidx_cache_check_num(peidx); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) smei = (sal_log_mod_error_info_t *)peidx_cache_check(peidx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) if (smei->valid.target_identifier && smei->target_identifier) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) pcci = (pal_cache_check_info_t *)&(smei->check_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) if (!target_address || (pcci->level < level)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) target_address = smei->target_identifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) level = pcci->level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) if (target_address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) return target_address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * Look at the bus check for a valid target identifier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) smei = peidx_bus_check(peidx, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) if (smei && smei->valid.target_identifier)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) return smei->target_identifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) * recover_from_read_error - Try to recover the errors which type are "read"s.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) * @slidx: pointer of index of SAL error record
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) * @peidx: pointer of index of processor error section
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) * @pbci: pointer of pal_bus_check_info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * @sos: pointer to hand off struct between SAL and OS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) * Return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) * 1 on Success / 0 on Failure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) recover_from_read_error(slidx_table_t *slidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) peidx_table_t *peidx, pal_bus_check_info_t *pbci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) struct ia64_sal_os_state *sos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) u64 target_identifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) pal_min_state_area_t *pmsa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) struct ia64_psr *psr1, *psr2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) /* Is target address valid? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) target_identifier = get_target_identifier(peidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) if (!target_identifier)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) return fatal_mca("target address not valid");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) * cpu read or memory-mapped io read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * offending process affected process OS MCA do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * kernel mode kernel mode down system
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * kernel mode user mode kill the process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) * user mode kernel mode down system (*)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) * user mode user mode kill the process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * (*) You could terminate offending user-mode process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * if (pbci->pv && pbci->pl != 0) *and* if you sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * the process not have any locks of kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) /* Is minstate valid? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) return fatal_mca("minstate not valid");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * Check the privilege level of interrupted context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * If it is user-mode, then terminate affected process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) pmsa = sos->pal_min_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) if (psr1->cpl != 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) * setup for resume to bottom half of MCA,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) * "mca_handler_bhhook"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) /* pass to bhhook as argument (gr8, ...) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) pmsa->pmsa_gr[8-1] = target_identifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) /* set interrupted return address (but no use) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) pmsa->pmsa_br0 = pmsa->pmsa_iip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) /* change resume address to bottom half */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) pmsa->pmsa_iip = mca_hdlr_bh->fp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) /* set cpl with kernel mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) psr2->cpl = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) psr2->ri = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) psr2->bn = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) psr2->i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) return mca_recovered("user memory corruption. "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) "kill affected process - recovered.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) return fatal_mca("kernel context not recovered, iip 0x%lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) pmsa->pmsa_iip);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) * recover_from_platform_error - Recover from platform error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * @slidx: pointer of index of SAL error record
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) * @peidx: pointer of index of processor error section
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) * @pbci: pointer of pal_bus_check_info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * @sos: pointer to hand off struct between SAL and OS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) * Return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) * 1 on Success / 0 on Failure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) pal_bus_check_info_t *pbci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) struct ia64_sal_os_state *sos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) int status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) pal_processor_state_info_t *psp =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) (pal_processor_state_info_t*)peidx_psp(peidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) if (psp->bc && pbci->eb && pbci->bsi == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) switch(pbci->type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) case 1: /* partial read */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) case 3: /* full line(cpu) read */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) case 9: /* I/O space read */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) status = recover_from_read_error(slidx, peidx, pbci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) sos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) case 0: /* unknown */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) case 2: /* partial write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) case 4: /* full line write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) case 5: /* implicit or explicit write-back operation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) case 6: /* snoop probe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) case 7: /* incoming or outgoing ptc.g */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) case 8: /* write coalescing transactions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) case 10: /* I/O space write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) case 11: /* inter-processor interrupt message(IPI) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) case 12: /* interrupt acknowledge or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) external task priority cycle */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) } else if (psp->cc && !psp->bc) { /* Cache error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) status = recover_from_read_error(slidx, peidx, pbci, sos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) * recover_from_tlb_check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) * @peidx: pointer of index of processor error section
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) * Return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) * 1 on Success / 0 on Failure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) recover_from_tlb_check(peidx_table_t *peidx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) sal_log_mod_error_info_t *smei;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) pal_tlb_check_info_t *ptci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) ptci = (pal_tlb_check_info_t *)&(smei->check_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) * Look for signature of a duplicate TLB DTC entry, which is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) * a SW bug and always fatal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) if (ptci->op == PAL_TLB_CHECK_OP_PURGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) && !(ptci->itr || ptci->dtc || ptci->itc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) return fatal_mca("Duplicate TLB entry");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) return mca_recovered("TLB check recovered");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) * recover_from_processor_error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) * @platform: whether there are some platform error section or not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) * @slidx: pointer of index of SAL error record
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) * @peidx: pointer of index of processor error section
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) * @pbci: pointer of pal_bus_check_info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) * @sos: pointer to hand off struct between SAL and OS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) * Return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) * 1 on Success / 0 on Failure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) recover_from_processor_error(int platform, slidx_table_t *slidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) peidx_table_t *peidx, pal_bus_check_info_t *pbci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) struct ia64_sal_os_state *sos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) pal_processor_state_info_t *psp =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) (pal_processor_state_info_t*)peidx_psp(peidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) * Processor recovery status must key off of the PAL recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) * status in the Processor State Parameter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) * The machine check is corrected.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) if (psp->cm == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) return mca_recovered("machine check is already corrected.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) * The error was not contained. Software must be reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) if (psp->us || psp->ci == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) return fatal_mca("error not contained");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) * Look for recoverable TLB check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) return recover_from_tlb_check(peidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) * The cache check and bus check bits have four possible states
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) * cc bc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) * 1 1 Memory error, attempt recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) * 1 0 Cache error, attempt recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) * 0 1 I/O error, attempt recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) * 0 0 Other error type, not recovered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) return fatal_mca("No cache or bus check");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * Cannot handle more than one bus check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) if (peidx_bus_check_num(peidx) > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) return fatal_mca("Too many bus checks");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) if (pbci->ib)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) return fatal_mca("Internal Bus error");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) if (pbci->eb && pbci->bsi > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) return fatal_mca("External bus check fatal status");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) * This is a local MCA and estimated as a recoverable error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) if (platform)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) return recover_from_platform_error(slidx, peidx, pbci, sos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) * On account of strange SAL error record, we cannot recover.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) return fatal_mca("Strange SAL record");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) * mca_try_to_recover - Try to recover from MCA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) * @rec: pointer to a SAL error record
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) * @sos: pointer to hand off struct between SAL and OS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) * Return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) * 1 on Success / 0 on Failure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) int platform_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) int n_proc_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) slidx_table_t slidx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) peidx_table_t peidx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) pal_bus_check_info_t pbci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) /* Make index of SAL error record */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) platform_err = mca_make_slidx(rec, &slidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) /* Count processor error sections */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) n_proc_err = slidx_count(&slidx, proc_err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) /* Now, OS can recover when there is one processor error section */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) if (n_proc_err > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) return fatal_mca("Too Many Errors");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) else if (n_proc_err == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) /* Weird SAL record ... We can't do anything */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) return fatal_mca("Weird SAL record");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) /* Make index of processor error section */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) mca_make_peidx((sal_log_processor_info_t*)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) slidx_first_entry(&slidx.proc_err)->hdr, &peidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) /* Extract Processor BUS_CHECK[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) *((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) /* Check whether MCA is global or not */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) if (is_mca_global(&peidx, &pbci, sos))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) return fatal_mca("global MCA");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) /* Try to recover a processor error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) return recover_from_processor_error(platform_err, &slidx, &peidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) &pbci, sos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) * =============================================================================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) int __init mca_external_handler_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) if (init_record_index_pools())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) /* register external mca handlers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) if (ia64_reg_MCA_extension(mca_try_to_recover)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) printk(KERN_ERR "ia64_reg_MCA_extension failed.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) kfree(slidx_pool.buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) void __exit mca_external_handler_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) /* unregister external mca handlers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) ia64_unreg_MCA_extension();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) kfree(slidx_pool.buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) module_init(mca_external_handler_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) module_exit(mca_external_handler_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) module_param(sal_rec_max, int, 0644);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) MODULE_DESCRIPTION("ia64 platform dependent mca handler driver");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) MODULE_LICENSE("GPL");