^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * kexec.c - kexec_load system call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/capability.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/security.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/kexec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/mutex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/list.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include "kexec_internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) static int copy_user_segment_list(struct kimage *image,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) unsigned long nr_segments,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) struct kexec_segment __user *segments)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) size_t segment_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) /* Read in the segments */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) image->nr_segments = nr_segments;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) segment_bytes = nr_segments * sizeof(*segments);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) ret = copy_from_user(image->segment, segments, segment_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) unsigned long nr_segments,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) struct kexec_segment __user *segments,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) struct kimage *image;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) bool kexec_on_panic = flags & KEXEC_ON_CRASH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) if (kexec_on_panic) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /* Verify we have a valid entry point */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) if ((entry < phys_to_boot_phys(crashk_res.start)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) (entry > phys_to_boot_phys(crashk_res.end)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) return -EADDRNOTAVAIL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) /* Allocate and initialize a controlling structure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) image = do_kimage_alloc_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) if (!image)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) image->start = entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) ret = copy_user_segment_list(image, nr_segments, segments);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) goto out_free_image;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) if (kexec_on_panic) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) /* Enable special crash kernel control page alloc policy. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) image->control_page = crashk_res.start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) image->type = KEXEC_TYPE_CRASH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) ret = sanity_check_segment_list(image);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) goto out_free_image;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * Find a location for the control code buffer, and add it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * the vector of segments so that it's pages will also be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * counted as destination pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) image->control_code_page = kimage_alloc_control_pages(image,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) get_order(KEXEC_CONTROL_PAGE_SIZE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) if (!image->control_code_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) pr_err("Could not allocate control_code_buffer\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) goto out_free_image;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) if (!kexec_on_panic) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) image->swap_page = kimage_alloc_control_pages(image, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) if (!image->swap_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) pr_err("Could not allocate swap buffer\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) goto out_free_control_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) *rimage = image;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) out_free_control_pages:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) kimage_free_page_list(&image->control_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) out_free_image:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) kfree(image);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) struct kexec_segment __user *segments, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) struct kimage **dest_image, *image;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) if (flags & KEXEC_ON_CRASH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) dest_image = &kexec_crash_image;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) if (kexec_crash_image)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) arch_kexec_unprotect_crashkres();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) dest_image = &kexec_image;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) if (nr_segments == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) /* Uninstall image */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) kimage_free(xchg(dest_image, NULL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) if (flags & KEXEC_ON_CRASH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * Loading another kernel to switch to if this one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * crashes. Free any current crash dump kernel before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * we corrupt it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) kimage_free(xchg(&kexec_crash_image, NULL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) if (flags & KEXEC_PRESERVE_CONTEXT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) image->preserve_context = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) ret = machine_kexec_prepare(image);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * Some architecture(like S390) may touch the crash memory before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) ret = kimage_crash_copy_vmcoreinfo(image);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) for (i = 0; i < nr_segments; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) ret = kimage_load_segment(image, &image->segment[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) kimage_terminate(image);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) ret = machine_kexec_post_load(image);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) /* Install the new kernel and uninstall the old */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) image = xchg(dest_image, image);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) if ((flags & KEXEC_ON_CRASH) && kexec_crash_image)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) arch_kexec_protect_crashkres();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) kimage_free(image);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * Exec Kernel system call: for obvious reasons only root may call it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * This call breaks up into three pieces.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * - A generic part which loads the new kernel from the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * address space, and very carefully places the data in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * allocated pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * - A generic part that interacts with the kernel and tells all of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * the devices to shut down. Preventing on-going dmas, and placing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * the devices in a consistent state so a later kernel can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * reinitialize them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) * - A machine specific part that includes the syscall number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * and then copies the image to it's final destination. And
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) * jumps into the image at entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * kexec does not sync, or unmount filesystems so if you need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * that to happen you need to do that yourself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) static inline int kexec_load_check(unsigned long nr_segments,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) int result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /* We only trust the superuser with rebooting the system. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) /* Permit LSMs and IMA to fail the kexec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) result = security_kernel_load_data(LOADING_KEXEC_IMAGE, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) if (result < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) return result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) * kexec can be used to circumvent module loading restrictions, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * prevent loading in that case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) result = security_locked_down(LOCKDOWN_KEXEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) if (result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) return result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * Verify we have a legal set of flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) * This leaves us room for future extensions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) /* Put an artificial cap on the number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * of segments passed to kexec_load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if (nr_segments > KEXEC_SEGMENT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) struct kexec_segment __user *, segments, unsigned long, flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) int result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) result = kexec_load_check(nr_segments, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) if (result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) return result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) /* Verify we are on the appropriate architecture */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) /* Because we write directly to the reserved memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) * region when loading crash kernels we need a mutex here to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * prevent multiple crash kernels from attempting to load
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * simultaneously, and to prevent a crash kernel from loading
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * over the top of a in use crash kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * KISS: always take the mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) if (!mutex_trylock(&kexec_mutex))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) result = do_kexec_load(entry, nr_segments, segments, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) mutex_unlock(&kexec_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) return result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) compat_ulong_t, nr_segments,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) struct compat_kexec_segment __user *, segments,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) compat_ulong_t, flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) struct compat_kexec_segment in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) struct kexec_segment out, __user *ksegments;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) unsigned long i, result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) result = kexec_load_check(nr_segments, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) if (result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) return result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) /* Don't allow clients that don't understand the native
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * architecture to do anything.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) for (i = 0; i < nr_segments; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) result = copy_from_user(&in, &segments[i], sizeof(in));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) if (result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) out.buf = compat_ptr(in.buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) out.bufsz = in.bufsz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) out.mem = in.mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) out.memsz = in.memsz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) result = copy_to_user(&ksegments[i], &out, sizeof(out));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) if (result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) /* Because we write directly to the reserved memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * region when loading crash kernels we need a mutex here to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) * prevent multiple crash kernels from attempting to load
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) * simultaneously, and to prevent a crash kernel from loading
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) * over the top of a in use crash kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) * KISS: always take the mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) if (!mutex_trylock(&kexec_mutex))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) result = do_kexec_load(entry, nr_segments, ksegments, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) mutex_unlock(&kexec_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) return result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) #endif