^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Core of Xen paravirt_ops implementation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * This file contains the xen_paravirt_ops structure itself, and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * implementations for:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * - privileged instructions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * - interrupt flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * - segment operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * - booting and setup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/smp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/preempt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/hardirq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/percpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/start_kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/kprobes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/memblock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/page-flags.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <linux/console.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/pci.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/edd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <linux/objtool.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <xen/xen.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <xen/events.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include <xen/interface/xen.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <xen/interface/version.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <xen/interface/physdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include <xen/interface/vcpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include <xen/interface/memory.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #include <xen/interface/nmi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #include <xen/interface/xen-mca.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #include <xen/features.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #include <xen/page.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #include <xen/hvc-console.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #include <xen/acpi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #include <asm/paravirt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #include <asm/apic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #include <asm/page.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #include <asm/xen/pci.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #include <asm/xen/hypercall.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #include <asm/xen/hypervisor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #include <asm/xen/cpuid.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #include <asm/fixmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #include <asm/processor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #include <asm/proto.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #include <asm/msr-index.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #include <asm/traps.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #include <asm/setup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) #include <asm/desc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #include <asm/pgalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #include <asm/reboot.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #include <asm/stackprotector.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #include <asm/hypervisor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #include <asm/mach_traps.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #include <asm/mwait.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #include <asm/pci_x86.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #include <asm/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #ifdef CONFIG_X86_IOPL_IOPERM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #include <asm/io_bitmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #ifdef CONFIG_ACPI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #include <linux/acpi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #include <asm/acpi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #include <acpi/pdc_intel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #include <acpi/processor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) #include <xen/interface/platform.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) #include "xen-ops.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) #include "mmu.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) #include "smp.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #include "multicalls.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) #include "pmu.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) void *xen_initial_gdt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) static int xen_cpu_up_prepare_pv(unsigned int cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) static int xen_cpu_dead_pv(unsigned int cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) struct tls_descs {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) struct desc_struct desc[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * Updating the 3 TLS descriptors in the GDT on every task switch is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * surprisingly expensive so we avoid updating them if they haven't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * changed. Since Xen writes different descriptors than the one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * passed in the update_descriptor hypercall we keep shadow copies to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * compare against.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) static void __init xen_banner(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) struct xen_extraversion extra;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) HYPERVISOR_xen_version(XENVER_extraversion, &extra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) pr_info("Booting paravirtualized kernel on %s\n", pv_info.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) printk(KERN_INFO "Xen version: %d.%d%s%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) version >> 16, version & 0xffff, extra.extraversion,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) static void __init xen_pv_init_platform(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) /* xen clock uses per-cpu vcpu_info, need to init it for boot cpu */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) xen_vcpu_info_reset(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /* pvclock is in shared info area */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) xen_init_time_ops();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) static void __init xen_pv_guest_late_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) #ifndef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) /* Setup shared vcpu info for non-smp configurations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) xen_setup_vcpu_info_placement();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) /* Check if running on Xen version (major, minor) or later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) xen_running_on_version_or_later(unsigned int major, unsigned int minor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) unsigned int version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) if (!xen_domain())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) version = HYPERVISOR_xen_version(XENVER_version, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) ((version >> 16) > major))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) static __read_mostly unsigned int cpuid_leaf5_ecx_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) static __read_mostly unsigned int cpuid_leaf5_edx_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) static void xen_cpuid(unsigned int *ax, unsigned int *bx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) unsigned int *cx, unsigned int *dx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) unsigned maskebx = ~0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * Mask out inconvenient features, to try and disable as many
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * unsupported kernel subsystems as possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) switch (*ax) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) case CPUID_MWAIT_LEAF:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) /* Synthesize the values.. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) *ax = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) *bx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) *cx = cpuid_leaf5_ecx_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) *dx = cpuid_leaf5_edx_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) case 0xb:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) /* Suppress extended topology stuff */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) maskebx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) asm(XEN_EMULATE_PREFIX "cpuid"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) : "=a" (*ax),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) "=b" (*bx),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) "=c" (*cx),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) "=d" (*dx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) : "0" (*ax), "2" (*cx));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) *bx &= maskebx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) STACK_FRAME_NON_STANDARD(xen_cpuid); /* XEN_EMULATE_PREFIX */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) static bool __init xen_check_mwait(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) #ifdef CONFIG_ACPI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) struct xen_platform_op op = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) .cmd = XENPF_set_processor_pminfo,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) .u.set_pminfo.id = -1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) .u.set_pminfo.type = XEN_PM_PDC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) uint32_t buf[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) unsigned int ax, bx, cx, dx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) unsigned int mwait_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) /* We need to determine whether it is OK to expose the MWAIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) * capability to the kernel to harvest deeper than C3 states from ACPI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * _CST using the processor_harvest_xen.c module. For this to work, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * need to gather the MWAIT_LEAF values (which the cstate.c code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * checks against). The hypervisor won't expose the MWAIT flag because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * it would break backwards compatibility; so we will find out directly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * from the hardware and hypercall.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) if (!xen_initial_domain())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * When running under platform earlier than Xen4.2, do not expose
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * mwait, to avoid the risk of loading native acpi pad driver
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) if (!xen_running_on_version_or_later(4, 2))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) ax = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) cx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) native_cpuid(&ax, &bx, &cx, &dx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) (1 << (X86_FEATURE_MWAIT % 32));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) if ((cx & mwait_mask) != mwait_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) /* We need to emulate the MWAIT_LEAF and for that we need both
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * ecx and edx. The hypercall provides only partial information.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) ax = CPUID_MWAIT_LEAF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) bx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) cx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) dx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) native_cpuid(&ax, &bx, &cx, &dx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) buf[0] = ACPI_PDC_REVISION_ID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) buf[1] = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) if ((HYPERVISOR_platform_op(&op) == 0) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) cpuid_leaf5_ecx_val = cx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) cpuid_leaf5_edx_val = dx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) static bool __init xen_check_xsave(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) unsigned int cx, xsave_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) cx = cpuid_ecx(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) xsave_mask = (1 << (X86_FEATURE_XSAVE % 32)) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) (1 << (X86_FEATURE_OSXSAVE % 32));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) /* Xen will set CR4.OSXSAVE if supported and not disabled by force */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) return (cx & xsave_mask) == xsave_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) static void __init xen_init_capabilities(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) setup_force_cpu_cap(X86_FEATURE_XENPV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) setup_clear_cpu_cap(X86_FEATURE_DCA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) setup_clear_cpu_cap(X86_FEATURE_APERFMPERF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) setup_clear_cpu_cap(X86_FEATURE_MTRR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) setup_clear_cpu_cap(X86_FEATURE_ACC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) setup_clear_cpu_cap(X86_FEATURE_X2APIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) setup_clear_cpu_cap(X86_FEATURE_SME);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * Xen PV would need some work to support PCID: CR3 handling as well
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * as xen_flush_tlb_others() would need updating.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) setup_clear_cpu_cap(X86_FEATURE_PCID);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) if (!xen_initial_domain())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) setup_clear_cpu_cap(X86_FEATURE_ACPI);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) if (xen_check_mwait())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) setup_force_cpu_cap(X86_FEATURE_MWAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) setup_clear_cpu_cap(X86_FEATURE_MWAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) if (!xen_check_xsave()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) setup_clear_cpu_cap(X86_FEATURE_XSAVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) setup_clear_cpu_cap(X86_FEATURE_OSXSAVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) static void xen_set_debugreg(int reg, unsigned long val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) HYPERVISOR_set_debugreg(reg, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) static unsigned long xen_get_debugreg(int reg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) return HYPERVISOR_get_debugreg(reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) static void xen_end_context_switch(struct task_struct *next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) xen_mc_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) paravirt_end_context_switch(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) static unsigned long xen_store_tr(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * Set the page permissions for a particular virtual address. If the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * address is a vmalloc mapping (or other non-linear mapping), then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) * find the linear mapping of the page and also set its protections to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) * match.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) static void set_aliased_prot(void *v, pgprot_t prot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) int level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) pte_t *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) unsigned long pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) unsigned char dummy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) void *va;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) ptep = lookup_address((unsigned long)v, &level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) BUG_ON(ptep == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) pfn = pte_pfn(*ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) pte = pfn_pte(pfn, prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * Careful: update_va_mapping() will fail if the virtual address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) * we're poking isn't populated in the page tables. We don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) * need to worry about the direct map (that's always in the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) * tables), but we need to be careful about vmap space. In
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) * particular, the top level page table can lazily propagate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) * entries between processes, so if we've switched mms since we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * vmapped the target in the first place, we might not have the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) * top-level page table entry populated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) * We disable preemption because we want the same mm active when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) * we probe the target and when we issue the hypercall. We'll
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) * have the same nominal mm, but if we're a kernel thread, lazy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) * mm dropping could change our pgd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) * Out of an abundance of caution, this uses __get_user() to fault
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) * in the target address just in case there's some obscure case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) * in which the target address isn't readable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) copy_from_kernel_nofault(&dummy, v, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) va = __va(PFN_PHYS(pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) if (va != v && HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) * We need to mark the all aliases of the LDT pages RO. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) * don't need to call vm_flush_aliases(), though, since that's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) * only responsible for flushing aliases out the TLBs, not the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * page tables, and Xen will flush the TLB for us if needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) * To avoid confusing future readers: none of this is necessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) * to load the LDT. The hypervisor only checks this when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) * LDT is faulted in due to subsequent descriptor access.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) for (i = 0; i < entries; i += entries_per_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) for (i = 0; i < entries; i += entries_per_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) set_aliased_prot(ldt + i, PAGE_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) static void xen_set_ldt(const void *addr, unsigned entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) struct mmuext_op *op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) struct multicall_space mcs = xen_mc_entry(sizeof(*op));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) trace_xen_cpu_set_ldt(addr, entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) op = mcs.args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) op->cmd = MMUEXT_SET_LDT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) op->arg1.linear_addr = (unsigned long)addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) op->arg2.nr_ents = entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) xen_mc_issue(PARAVIRT_LAZY_CPU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) static void xen_load_gdt(const struct desc_ptr *dtr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) unsigned long va = dtr->address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) unsigned int size = dtr->size + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) unsigned long pfn, mfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) int level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) pte_t *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) void *virt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) /* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) BUG_ON(size > PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) BUG_ON(va & ~PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * The GDT is per-cpu and is in the percpu data area.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * That can be virtually mapped, so we need to do a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * page-walk to get the underlying MFN for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * hypercall. The page can also be in the kernel's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * linear range, so we need to RO that mapping too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) ptep = lookup_address(va, &level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) BUG_ON(ptep == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) pfn = pte_pfn(*ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) mfn = pfn_to_mfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) virt = __va(PFN_PHYS(pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) make_lowmem_page_readonly((void *)va);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) make_lowmem_page_readonly(virt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * load_gdt for early boot, when the gdt is only mapped once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) unsigned long va = dtr->address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) unsigned int size = dtr->size + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) unsigned long pfn, mfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) /* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) BUG_ON(size > PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) BUG_ON(va & ~PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) pfn = virt_to_pfn(va);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) mfn = pfn_to_mfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) pte = pfn_pte(pfn, PAGE_KERNEL_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) static inline bool desc_equal(const struct desc_struct *d1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) const struct desc_struct *d2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) return !memcmp(d1, d2, sizeof(*d1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) static void load_TLS_descriptor(struct thread_struct *t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) unsigned int cpu, unsigned int i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) struct desc_struct *gdt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) xmaddr_t maddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) struct multicall_space mc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) if (desc_equal(shadow, &t->tls_array[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) *shadow = t->tls_array[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) gdt = get_cpu_gdt_rw(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) mc = __xen_mc_entry(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * In lazy mode we need to zero %fs, otherwise we may get an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * exception between the new %fs descriptor being loaded and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * %fs being effectively cleared at __switch_to().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) loadsegment(fs, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) load_TLS_descriptor(t, cpu, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) load_TLS_descriptor(t, cpu, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) load_TLS_descriptor(t, cpu, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) xen_mc_issue(PARAVIRT_LAZY_CPU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) static void xen_load_gs_index(unsigned int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) const void *ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) u64 entry = *(u64 *)ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) trace_xen_cpu_write_ldt_entry(dt, entrynum, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) xen_mc_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) void noist_exc_debug(struct pt_regs *regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) DEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) /* On Xen PV, NMI doesn't use IST. The C part is the sane as native. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) exc_nmi(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) DEFINE_IDTENTRY_RAW(xenpv_exc_debug)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) * There's no IST on Xen PV, but we still need to dispatch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) * to the correct handler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) if (user_mode(regs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) noist_exc_debug(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) exc_debug(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) /* This should never happen and there is no way to handle it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) pr_err("Unknown trap in Xen PV mode.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) struct trap_array_entry {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) void (*orig)(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) void (*xen)(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) bool ist_okay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) #define TRAP_ENTRY(func, ist_ok) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) .orig = asm_##func, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) .xen = xen_asm_##func, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) .ist_okay = ist_ok }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) #define TRAP_ENTRY_REDIR(func, ist_ok) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) .orig = asm_##func, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) .xen = xen_asm_xenpv_##func, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) .ist_okay = ist_ok }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) static struct trap_array_entry trap_array[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) TRAP_ENTRY_REDIR(exc_debug, true ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) TRAP_ENTRY(exc_double_fault, true ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) #ifdef CONFIG_X86_MCE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) TRAP_ENTRY(exc_machine_check, true ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) TRAP_ENTRY_REDIR(exc_nmi, true ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) TRAP_ENTRY(exc_int3, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) TRAP_ENTRY(exc_overflow, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) #ifdef CONFIG_IA32_EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) { entry_INT80_compat, xen_entry_INT80_compat, false },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) TRAP_ENTRY(exc_page_fault, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) TRAP_ENTRY(exc_divide_error, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) TRAP_ENTRY(exc_bounds, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) TRAP_ENTRY(exc_invalid_op, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) TRAP_ENTRY(exc_device_not_available, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) TRAP_ENTRY(exc_coproc_segment_overrun, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) TRAP_ENTRY(exc_invalid_tss, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) TRAP_ENTRY(exc_segment_not_present, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) TRAP_ENTRY(exc_stack_segment, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) TRAP_ENTRY(exc_general_protection, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) TRAP_ENTRY(exc_spurious_interrupt_bug, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) TRAP_ENTRY(exc_coprocessor_error, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) TRAP_ENTRY(exc_alignment_check, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) TRAP_ENTRY(exc_simd_coprocessor_error, false ),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) static bool __ref get_trap_addr(void **addr, unsigned int ist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) unsigned int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) bool ist_okay = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) bool found = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) * Replace trap handler addresses by Xen specific ones.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) * Check for known traps using IST and whitelist them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) * The debugger ones are the only ones we care about.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) * Xen will handle faults like double_fault, so we should never see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) * them. Warn if there's an unexpected IST-using fault handler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) struct trap_array_entry *entry = trap_array + nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) if (*addr == entry->orig) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) *addr = entry->xen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) ist_okay = entry->ist_okay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) found = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) if (nr == ARRAY_SIZE(trap_array) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) *addr >= (void *)early_idt_handler_array[0] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) nr = (*addr - (void *)early_idt_handler_array[0]) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) EARLY_IDT_HANDLER_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) *addr = (void *)xen_early_idt_handler_array[nr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) found = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) if (!found)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) *addr = (void *)xen_asm_exc_xen_unknown_trap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) if (WARN_ON(found && ist != 0 && !ist_okay))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) static int cvt_gate_to_trap(int vector, const gate_desc *val,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) struct trap_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) if (val->bits.type != GATE_TRAP && val->bits.type != GATE_INTERRUPT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) info->vector = vector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) addr = gate_offset(val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) if (!get_trap_addr((void **)&addr, val->bits.ist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) info->address = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) info->cs = gate_segment(val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) info->flags = val->bits.dpl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) /* interrupt gates clear IF */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) if (val->bits.type == GATE_INTERRUPT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) info->flags |= 1 << 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) /* Locations of each CPU's IDT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) static DEFINE_PER_CPU(struct desc_ptr, idt_desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) /* Set an IDT entry. If the entry is part of the current IDT, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) also update Xen. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) unsigned long p = (unsigned long)&dt[entrynum];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) unsigned long start, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) trace_xen_cpu_write_idt_entry(dt, entrynum, g);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) start = __this_cpu_read(idt_desc.address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) end = start + __this_cpu_read(idt_desc.size) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) xen_mc_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) native_write_idt_entry(dt, entrynum, g);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) if (p >= start && (p + 8) <= end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) struct trap_info info[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) info[1].address = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) if (cvt_gate_to_trap(entrynum, g, &info[0]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) if (HYPERVISOR_set_trap_table(info))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) static unsigned xen_convert_trap_info(const struct desc_ptr *desc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) struct trap_info *traps, bool full)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) unsigned in, out, count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) count = (desc->size+1) / sizeof(gate_desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) BUG_ON(count > 256);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) for (in = out = 0; in < count; in++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) gate_desc *entry = (gate_desc *)(desc->address) + in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) if (cvt_gate_to_trap(in, entry, &traps[out]) || full)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) out++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) return out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) void xen_copy_trap_info(struct trap_info *traps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) const struct desc_ptr *desc = this_cpu_ptr(&idt_desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) xen_convert_trap_info(desc, traps, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) /* Load a new IDT into Xen. In principle this can be per-CPU, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) hold a spinlock to protect the static traps[] array (static because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) it avoids allocation, and saves stack space). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) static void xen_load_idt(const struct desc_ptr *desc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) static DEFINE_SPINLOCK(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) static struct trap_info traps[257];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) unsigned out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) trace_xen_cpu_load_idt(desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) spin_lock(&lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) out = xen_convert_trap_info(desc, traps, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) memset(&traps[out], 0, sizeof(traps[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) xen_mc_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) if (HYPERVISOR_set_trap_table(traps))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) spin_unlock(&lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) /* Write a GDT descriptor entry. Ignore LDT descriptors, since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) they're handled differently. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) const void *desc, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) case DESC_LDT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) case DESC_TSS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) /* ignore */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) default: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) xen_mc_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) * Version of write_gdt_entry for use at early boot-time needed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) * update an entry as simply as possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) const void *desc, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) case DESC_LDT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) case DESC_TSS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) /* ignore */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) default: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) xmaddr_t maddr = virt_to_machine(&dt[entry]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) dt[entry] = *(struct desc_struct *)desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) static void xen_load_sp0(unsigned long sp0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) struct multicall_space mcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) mcs = xen_mc_entry(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) xen_mc_issue(PARAVIRT_LAZY_CPU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) #ifdef CONFIG_X86_IOPL_IOPERM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) static void xen_invalidate_io_bitmap(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) struct physdev_set_iobitmap iobitmap = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) .bitmap = NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) .nr_ports = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) native_tss_invalidate_io_bitmap();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) static void xen_update_io_bitmap(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) struct physdev_set_iobitmap iobitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) native_tss_update_io_bitmap();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) tss->x86_tss.io_bitmap_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) iobitmap.nr_ports = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) iobitmap.nr_ports = IO_BITMAP_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) static void xen_io_delay(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) static unsigned long xen_read_cr0(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) unsigned long cr0 = this_cpu_read(xen_cr0_value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) if (unlikely(cr0 == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) cr0 = native_read_cr0();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) this_cpu_write(xen_cr0_value, cr0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) return cr0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) static void xen_write_cr0(unsigned long cr0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) struct multicall_space mcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) this_cpu_write(xen_cr0_value, cr0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) /* Only pay attention to cr0.TS; everything else is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) ignored. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) mcs = xen_mc_entry(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) xen_mc_issue(PARAVIRT_LAZY_CPU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) static void xen_write_cr4(unsigned long cr4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) cr4 &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PCE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) native_write_cr4(cr4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) static u64 xen_read_msr_safe(unsigned int msr, int *err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) u64 val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) if (pmu_msr_read(msr, &val, err))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) val = native_read_msr_safe(msr, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) switch (msr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) case MSR_IA32_APICBASE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) val &= ~X2APIC_ENABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) unsigned int which;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) u64 base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) switch (msr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) case MSR_FS_BASE: which = SEGBASE_FS; goto set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) set:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) base = ((u64)high << 32) | low;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) if (HYPERVISOR_set_segment_base(which, base) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) case MSR_STAR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) case MSR_CSTAR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) case MSR_LSTAR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) case MSR_SYSCALL_MASK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) case MSR_IA32_SYSENTER_CS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) case MSR_IA32_SYSENTER_ESP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) case MSR_IA32_SYSENTER_EIP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) /* Fast syscall setup is all done in hypercalls, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) these are all ignored. Stub them out here to stop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) Xen console noise. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) if (!pmu_msr_write(msr, low, high, &ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) ret = native_write_msr_safe(msr, low, high);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) static u64 xen_read_msr(unsigned int msr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) * This will silently swallow a #GP from RDMSR. It may be worth
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) * changing that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) return xen_read_msr_safe(msr, &err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) * This will silently swallow a #GP from WRMSR. It may be worth
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) * changing that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) xen_write_msr_safe(msr, low, high);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) /* This is called once we have the cpu_possible_mask */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) void __init xen_setup_vcpu_info_placement(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) /* Set up direct vCPU id mapping for PV guests. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) per_cpu(xen_vcpu_id, cpu) = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) * xen_vcpu_setup(cpu) can fail -- in which case it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) * falls back to the shared_info version for cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) * xen_cpu_up_prepare_pv() handles the rest by failing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) * them in hotplug.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) (void) xen_vcpu_setup(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) * xen_vcpu_setup managed to place the vcpu_info within the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) * percpu area for all cpus, so make use of it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) if (xen_have_vcpu_info_placement) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) pv_ops.irq.restore_fl =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) pv_ops.irq.irq_disable =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) pv_ops.irq.irq_enable =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) pv_ops.mmu.read_cr2 =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) __PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) static const struct pv_info xen_info __initconst = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) .extra_user_64bit_cs = FLAT_USER_CS64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) .name = "Xen",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) static const struct pv_cpu_ops xen_cpu_ops __initconst = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) .cpuid = xen_cpuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) .set_debugreg = xen_set_debugreg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) .get_debugreg = xen_get_debugreg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) .read_cr0 = xen_read_cr0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) .write_cr0 = xen_write_cr0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) .write_cr4 = xen_write_cr4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) .wbinvd = native_wbinvd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) .read_msr = xen_read_msr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) .write_msr = xen_write_msr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) .read_msr_safe = xen_read_msr_safe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) .write_msr_safe = xen_write_msr_safe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) .read_pmc = xen_read_pmc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) .iret = xen_iret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) .usergs_sysret64 = xen_sysret64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) .load_tr_desc = paravirt_nop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) .set_ldt = xen_set_ldt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) .load_gdt = xen_load_gdt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) .load_idt = xen_load_idt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) .load_tls = xen_load_tls,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) .load_gs_index = xen_load_gs_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) .alloc_ldt = xen_alloc_ldt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) .free_ldt = xen_free_ldt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) .store_tr = xen_store_tr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) .write_ldt_entry = xen_write_ldt_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) .write_gdt_entry = xen_write_gdt_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) .write_idt_entry = xen_write_idt_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) .load_sp0 = xen_load_sp0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) #ifdef CONFIG_X86_IOPL_IOPERM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) .invalidate_io_bitmap = xen_invalidate_io_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) .update_io_bitmap = xen_update_io_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) .io_delay = xen_io_delay,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) .start_context_switch = paravirt_start_context_switch,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) .end_context_switch = xen_end_context_switch,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) static void xen_restart(char *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) xen_reboot(SHUTDOWN_reboot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) static void xen_machine_halt(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) xen_reboot(SHUTDOWN_poweroff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) static void xen_machine_power_off(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) if (pm_power_off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) pm_power_off();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) xen_reboot(SHUTDOWN_poweroff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) static void xen_crash_shutdown(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) xen_reboot(SHUTDOWN_crash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) static const struct machine_ops xen_machine_ops __initconst = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) .restart = xen_restart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) .halt = xen_machine_halt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) .power_off = xen_machine_power_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) .shutdown = xen_machine_halt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) .crash_shutdown = xen_crash_shutdown,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) .emergency_restart = xen_emergency_restart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) static unsigned char xen_get_nmi_reason(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) unsigned char reason = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) /* Construct a value which looks like it came from port 0x61. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) if (test_bit(_XEN_NMIREASON_io_error,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) &HYPERVISOR_shared_info->arch.nmi_reason))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) reason |= NMI_REASON_IOCHK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) if (test_bit(_XEN_NMIREASON_pci_serr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) &HYPERVISOR_shared_info->arch.nmi_reason))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) reason |= NMI_REASON_SERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) return reason;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) static void __init xen_boot_params_init_edd(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) #if IS_ENABLED(CONFIG_EDD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) struct xen_platform_op op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) struct edd_info *edd_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) u32 *mbr_signature;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) unsigned nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) edd_info = boot_params.eddbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) mbr_signature = boot_params.edd_mbr_sig_buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) op.cmd = XENPF_firmware_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) op.u.firmware_info.type = XEN_FW_DISK_INFO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) for (nr = 0; nr < EDDMAXNR; nr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) struct edd_info *info = edd_info + nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) op.u.firmware_info.index = nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) info->params.length = sizeof(info->params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) &info->params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) ret = HYPERVISOR_platform_op(&op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) #define C(x) info->x = op.u.firmware_info.u.disk_info.x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) C(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) C(version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) C(interface_support);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) C(legacy_max_cylinder);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) C(legacy_max_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) C(legacy_sectors_per_track);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) #undef C
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) boot_params.eddbuf_entries = nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) for (nr = 0; nr < EDD_MBR_SIG_MAX; nr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) op.u.firmware_info.index = nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) ret = HYPERVISOR_platform_op(&op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) mbr_signature[nr] = op.u.firmware_info.u.disk_mbr_signature.mbr_signature;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) boot_params.edd_mbr_sig_buf_entries = nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) * Set up the GDT and segment registers for -fstack-protector. Until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) * we do this, we have to be careful not to call any stack-protected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) * function, which is most of the kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) static void __init xen_setup_gdt(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) pv_ops.cpu.load_gdt = xen_load_gdt_boot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) setup_stack_canary_segment(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) switch_to_new_gdt(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) pv_ops.cpu.load_gdt = xen_load_gdt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) static void __init xen_dom0_set_legacy_features(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) x86_platform.legacy.rtc = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) static void __init xen_domu_set_legacy_features(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) x86_platform.legacy.rtc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) /* First C function to be called on Xen boot */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) asmlinkage __visible void __init xen_start_kernel(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) struct physdev_set_iopl set_iopl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) unsigned long initrd_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) if (!xen_start_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) xen_domain_type = XEN_PV_DOMAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) xen_start_flags = xen_start_info->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) xen_setup_features();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) /* Install Xen paravirt ops */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) pv_info = xen_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) pv_ops.init.patch = paravirt_patch_default;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) pv_ops.cpu = xen_cpu_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) xen_init_irq_ops();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) * Setup xen_vcpu early because it is needed for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) * local_irq_disable(), irqs_disabled(), e.g. in printk().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) * Don't do the full vcpu_info placement stuff until we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) * the cpu_possible_mask and a non-dummy shared_info.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) xen_vcpu_info_reset(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) x86_platform.get_nmi_reason = xen_get_nmi_reason;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) x86_init.resources.memory_setup = xen_memory_setup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) x86_init.irqs.intr_mode_select = x86_init_noop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) x86_init.irqs.intr_mode_init = x86_init_noop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) x86_init.oem.arch_setup = xen_arch_setup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) x86_init.oem.banner = xen_banner;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) x86_init.hyper.init_platform = xen_pv_init_platform;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) x86_init.hyper.guest_late_init = xen_pv_guest_late_init;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) * Set up some pagetable state before starting to set any ptes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) xen_setup_machphys_mapping();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) xen_init_mmu_ops();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) /* Prevent unwanted bits from being set in PTEs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) __supported_pte_mask &= ~_PAGE_GLOBAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) __default_kernel_pte_mask &= ~_PAGE_GLOBAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) * Prevent page tables from being allocated in highmem, even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) * if CONFIG_HIGHPTE is enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) /* Get mfn list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) xen_build_dynamic_phys_to_machine();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) /* Work out if we support NX */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) get_cpu_cap(&boot_cpu_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) x86_configure_nx();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) * Set up kernel GDT and segment registers, mainly so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) * -fstack-protector code can be executed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) xen_setup_gdt(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) /* Determine virtual and physical address sizes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) get_cpu_address_sizes(&boot_cpu_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) /* Let's presume PV guests always boot on vCPU with id 0. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) per_cpu(xen_vcpu_id, 0) = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) idt_setup_early_handler();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) xen_init_capabilities();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) #ifdef CONFIG_X86_LOCAL_APIC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) * set up the basic apic ops.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) xen_init_apic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) pv_ops.mmu.ptep_modify_prot_start =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) xen_ptep_modify_prot_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) pv_ops.mmu.ptep_modify_prot_commit =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) xen_ptep_modify_prot_commit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) machine_ops = xen_machine_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) * The only reliable way to retain the initial address of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) * percpu gdt_page is to remember it here, so we can go and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) * mark it RW later, when the initial percpu area is freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) xen_initial_gdt = &per_cpu(gdt_page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) xen_smp_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) #ifdef CONFIG_ACPI_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) * The pages we from Xen are not related to machine pages, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) * any NUMA information the kernel tries to get from ACPI will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) * be meaningless. Prevent it from trying.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) disable_srat();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) local_irq_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) early_boot_irqs_disabled = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) xen_raw_console_write("mapping kernel into physical memory\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) xen_start_info->nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) xen_reserve_special_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) * We used to do this in xen_arch_setup, but that is too late
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) * on AMD were early_cpu_init (run before ->arch_setup()) calls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) * early_amd_init which pokes 0xcf8 port.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) set_iopl.iopl = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) if (rc != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) xen_raw_printk("physdev_op failed %d\n", rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) if (xen_start_info->mod_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) if (xen_start_info->flags & SIF_MOD_START_PFN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) initrd_start = PFN_PHYS(xen_start_info->mod_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) initrd_start = __pa(xen_start_info->mod_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) /* Poke various useful things into boot_params */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) boot_params.hdr.type_of_loader = (9 << 4) | 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) boot_params.hdr.ramdisk_image = initrd_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) boot_params.hdr.hardware_subarch = X86_SUBARCH_XEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) if (!xen_initial_domain()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) add_preferred_console("xenboot", 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) if (pci_xen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) x86_init.pci.arch_init = pci_xen_init;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) x86_platform.set_legacy_features =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) xen_domu_set_legacy_features;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) const struct dom0_vga_console_info *info =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) (void *)((char *)xen_start_info +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) xen_start_info->console.dom0.info_off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) struct xen_platform_op op = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) .cmd = XENPF_firmware_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) .interface_version = XENPF_INTERFACE_VERSION,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) x86_platform.set_legacy_features =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) xen_dom0_set_legacy_features;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) xen_init_vga(info, xen_start_info->console.dom0.info_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) xen_start_info->console.domU.mfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) xen_start_info->console.domU.evtchn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) if (HYPERVISOR_platform_op(&op) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) /* Make sure ACS will be enabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) pci_request_acs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) xen_acpi_sleep_register();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) xen_boot_params_init_edd();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) #ifdef CONFIG_ACPI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) * Disable selecting "Firmware First mode" for correctable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) * memory errors, as this is the duty of the hypervisor to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) * decide.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) acpi_disable_cmcff = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) if (!boot_params.screen_info.orig_video_isVGA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) add_preferred_console("tty", 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) add_preferred_console("hvc", 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) if (boot_params.screen_info.orig_video_isVGA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) add_preferred_console("tty", 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) #ifdef CONFIG_PCI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) /* PCI BIOS service won't work from a PV guest. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) pci_probe &= ~PCI_PROBE_BIOS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) xen_raw_console_write("about to get started...\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) /* We need this for printk timestamps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) xen_setup_runstate_info(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) xen_efi_init(&boot_params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) /* Start the world */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) x86_64_start_reservations((char *)__pa_symbol(&boot_params));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) static int xen_cpu_up_prepare_pv(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) if (per_cpu(xen_vcpu, cpu) == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) xen_setup_timer(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) rc = xen_smp_intr_init(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) if (rc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) cpu, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) rc = xen_smp_intr_init_pv(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) if (rc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) WARN(1, "xen_smp_intr_init_pv() for CPU %d failed: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) cpu, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) static int xen_cpu_dead_pv(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) xen_smp_intr_free(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) xen_smp_intr_free_pv(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) xen_teardown_timer(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) static uint32_t __init xen_platform_pv(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) if (xen_pv_domain())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) return xen_cpuid_base();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) const __initconst struct hypervisor_x86 x86_hyper_xen_pv = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) .name = "Xen PV",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) .detect = xen_platform_pv,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) .type = X86_HYPER_XEN_PV,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) .runtime.pin_vcpu = xen_pin_vcpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) .ignore_nopv = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) };