^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * IA-64 Huge TLB Page Support for Kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2002-2004 Rohit Seth <rohit.seth@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright (C) 2003-2004 Ken Chen <kenneth.w.chen@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Sep, 2003: add numa support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Feb, 2004: dynamic hugetlb page size via boot parameter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/sysctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/log2.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <asm/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <asm/tlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) unsigned int hpage_shift = HPAGE_SHIFT_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) EXPORT_SYMBOL(hpage_shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) pte_t *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) unsigned long addr, unsigned long sz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) unsigned long taddr = htlbpage_to_page(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) pte_t *pte = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) pgd = pgd_offset(mm, taddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) p4d = p4d_offset(pgd, taddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) pud = pud_alloc(mm, p4d, taddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) if (pud) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) pmd = pmd_alloc(mm, pud, taddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) if (pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) pte = pte_alloc_map(mm, pmd, taddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) return pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) pte_t *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) unsigned long taddr = htlbpage_to_page(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) pte_t *pte = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) pgd = pgd_offset(mm, taddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) if (pgd_present(*pgd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) p4d = p4d_offset(pgd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) if (p4d_present(*p4d)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) pud = pud_offset(p4d, taddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) if (pud_present(*pud)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) pmd = pmd_offset(pud, taddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) if (pmd_present(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) pte = pte_offset_map(pmd, taddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) return pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * Don't actually need to do any preparation, but need to make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * the address is in the right region.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) int prepare_hugepage_range(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) unsigned long addr, unsigned long len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) if (len & ~HPAGE_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) if (addr & ~HPAGE_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) if (REGION_NUMBER(addr) != RGN_HPAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) pte_t *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) if (REGION_NUMBER(addr) != RGN_HPAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) ptep = huge_pte_offset(mm, addr, HPAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) if (!ptep || pte_none(*ptep))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) page = pte_page(*ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) page += ((addr & ~HPAGE_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) int pmd_huge(pmd_t pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) int pud_huge(pud_t pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) void hugetlb_free_pgd_range(struct mmu_gather *tlb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) unsigned long floor, unsigned long ceiling)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * This is called to free hugetlb page tables.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * The offset of these addresses from the base of the hugetlb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * region must be scaled down by HPAGE_SIZE/PAGE_SIZE so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * the standard free_pgd_range will free the right page tables.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * If floor and ceiling are also in the hugetlb region, they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * must likewise be scaled down; but if outside, left unchanged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) addr = htlbpage_to_page(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) end = htlbpage_to_page(end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) if (REGION_NUMBER(floor) == RGN_HPAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) floor = htlbpage_to_page(floor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) if (REGION_NUMBER(ceiling) == RGN_HPAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) ceiling = htlbpage_to_page(ceiling);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) free_pgd_range(tlb, addr, end, floor, ceiling);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) unsigned long pgoff, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) struct vm_unmapped_area_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) if (len > RGN_MAP_LIMIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (len & ~HPAGE_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) /* Handle MAP_FIXED */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) if (flags & MAP_FIXED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) if (prepare_hugepage_range(file, addr, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) /* This code assumes that RGN_HPAGE != 0. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) if ((REGION_NUMBER(addr) != RGN_HPAGE) || (addr & (HPAGE_SIZE - 1)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) addr = HPAGE_REGION_BASE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) info.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) info.length = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) info.low_limit = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) info.high_limit = HPAGE_REGION_BASE + RGN_MAP_LIMIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) info.align_mask = PAGE_MASK & (HPAGE_SIZE - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) info.align_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) return vm_unmapped_area(&info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) static int __init hugetlb_setup_sz(char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) u64 tr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) unsigned long long size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * shouldn't happen, but just in case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) tr_pages = 0x15557000UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) size = memparse(str, &str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) if (*str || !is_power_of_2(size) || !(tr_pages & size) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) size <= PAGE_SIZE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) size >= (1UL << PAGE_SHIFT << MAX_ORDER)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) printk(KERN_WARNING "Invalid huge page size specified\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) hpage_shift = __ffs(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * override here with new page shift.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) ia64_set_rr(HPAGE_REGION_BASE, hpage_shift << 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) early_param("hugepagesz", hugetlb_setup_sz);