^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * IA-32 Huge TLB Page Support for Kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/err.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/sysctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/compat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <asm/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <asm/tlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <asm/elf.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #if 0 /* This is just for testing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) struct page *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) unsigned long start = address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) int length = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) vma = find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) if (!vma || !is_vm_hugetlb_page(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) /* hugetlb should be locked, and hence, prefaulted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) WARN_ON(!pte || pte_none(*pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) WARN_ON(!PageHead(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) int pmd_huge(pmd_t pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) int pud_huge(pud_t pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * Otherwise, returns 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) int pmd_huge(pmd_t pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) return !pmd_none(pmd) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) (pmd_val(pmd) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) int pud_huge(pud_t pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) return !!(pud_val(pud) & _PAGE_PSE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) #ifdef CONFIG_HUGETLB_PAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) unsigned long addr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) unsigned long pgoff, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) struct hstate *h = hstate_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) struct vm_unmapped_area_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) info.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) info.length = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) info.low_limit = get_mmap_base(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * in the full address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) info.high_limit = in_32bit_syscall() ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) info.align_mask = PAGE_MASK & ~huge_page_mask(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) info.align_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) return vm_unmapped_area(&info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) unsigned long addr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) unsigned long pgoff, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) struct hstate *h = hstate_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) struct vm_unmapped_area_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) info.flags = VM_UNMAPPED_AREA_TOPDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) info.length = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) info.low_limit = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) info.high_limit = get_mmap_base(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * in the full address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) info.align_mask = PAGE_MASK & ~huge_page_mask(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) info.align_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) addr = vm_unmapped_area(&info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * A failed mmap() very likely causes application failure,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * so fall back to the bottom-up function here. This scenario
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * can happen with large stack limits and large mmap()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * allocations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) if (addr & ~PAGE_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) VM_BUG_ON(addr != -ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) info.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) info.low_limit = TASK_UNMAPPED_BASE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) info.high_limit = TASK_SIZE_LOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) addr = vm_unmapped_area(&info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) unsigned long len, unsigned long pgoff, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) struct hstate *h = hstate_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) if (len & ~huge_page_mask(h))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) if (len > TASK_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /* No address checking. See comment at mmap_address_hint_valid() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) if (flags & MAP_FIXED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) if (prepare_hugepage_range(file, addr, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) if (addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) addr &= huge_page_mask(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) if (!mmap_address_hint_valid(addr, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) goto get_unmapped_area;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) vma = find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) if (!vma || addr + len <= vm_start_gap(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) get_unmapped_area:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) if (mm->get_unmapped_area == arch_get_unmapped_area)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) return hugetlb_get_unmapped_area_bottomup(file, addr, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) pgoff, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) return hugetlb_get_unmapped_area_topdown(file, addr, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) pgoff, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) #endif /* CONFIG_HUGETLB_PAGE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) bool __init arch_hugetlb_valid_size(unsigned long size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) if (size == PMD_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) else if (size == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) #ifdef CONFIG_CONTIG_ALLOC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) static __init int gigantic_pages_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) /* With compaction or CMA we can allocate gigantic pages at runtime */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) if (boot_cpu_has(X86_FEATURE_GBPAGES))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) arch_initcall(gigantic_pages_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) #endif