^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * hugetlbpage-backed filesystem. Based on ramfs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Nadia Yvette Chambers, 2002
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright (C) 2002 Linus Torvalds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * License: GPL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/thread_info.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <asm/current.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/sched/signal.h> /* remove ASAP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/falloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/mount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/capability.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/ctype.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/pagevec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <linux/fs_parser.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <linux/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <linux/dnotify.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/statfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <linux/security.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <linux/magic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <linux/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) static const struct super_operations hugetlbfs_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) static const struct address_space_operations hugetlbfs_aops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) const struct file_operations hugetlbfs_file_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static const struct inode_operations hugetlbfs_dir_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) static const struct inode_operations hugetlbfs_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) struct hugetlbfs_fs_context {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) struct hstate *hstate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) unsigned long long max_size_opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) unsigned long long min_size_opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) long max_hpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) long nr_inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) long min_hpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) enum hugetlbfs_size_type max_val_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) enum hugetlbfs_size_type min_val_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) kuid_t uid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) kgid_t gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) umode_t mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) int sysctl_hugetlb_shm_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) enum hugetlb_param {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) Opt_gid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) Opt_min_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) Opt_mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) Opt_nr_inodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) Opt_pagesize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) Opt_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) Opt_uid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) fsparam_u32 ("gid", Opt_gid),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) fsparam_string("min_size", Opt_min_size),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) fsparam_u32oct("mode", Opt_mode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) fsparam_string("nr_inodes", Opt_nr_inodes),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) fsparam_string("pagesize", Opt_pagesize),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) fsparam_string("size", Opt_size),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) fsparam_u32 ("uid", Opt_uid),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) struct inode *inode, pgoff_t index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) vma->vm_policy = mpol_shared_policy_lookup(&HUGETLBFS_I(inode)->policy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) mpol_cond_put(vma->vm_policy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) struct inode *inode, pgoff_t index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) static void huge_pagevec_release(struct pagevec *pvec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) for (i = 0; i < pagevec_count(pvec); ++i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) put_page(pvec->pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) pagevec_reinit(pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * Mask used when checking the page offset value passed in via system
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * calls. This value will be converted to a loff_t which is signed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * value. The extra bit (- 1 in the shift value) is to take the sign
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * bit into account.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) #define PGOFF_LOFFT_MAX \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) loff_t len, vma_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) struct hstate *h = hstate_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * vma address alignment (but not the pgoff alignment) has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * already been checked by prepare_hugepage_range. If you add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * any error returns here, do so after setting VM_HUGETLB, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * is_vm_hugetlb_page tests below unmap_region go the right
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * way when do_mmap unwinds (may be important on powerpc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * and ia64).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) vma->vm_ops = &hugetlb_vm_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) ret = seal_check_future_write(info->seals, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * page based offset in vm_pgoff could be sufficiently large to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * overflow a loff_t when converted to byte offset. This can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * only happen on architectures where sizeof(loff_t) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * sizeof(unsigned long). So, only check in those instances.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) if (sizeof(unsigned long) == sizeof(loff_t)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) /* must be huge page aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) vma_len = (loff_t)(vma->vm_end - vma->vm_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) /* check for overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) if (len < vma_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) inode_lock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) file_accessed(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) if (hugetlb_reserve_pages(inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) vma->vm_pgoff >> huge_page_order(h),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) len >> huge_page_shift(h), vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) vma->vm_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) if (vma->vm_flags & VM_WRITE && inode->i_size < len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) i_size_write(inode, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) inode_unlock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * Called under mmap_write_lock(mm).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) #ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) static unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) unsigned long len, unsigned long pgoff, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) struct hstate *h = hstate_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) struct vm_unmapped_area_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) info.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) info.length = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) info.low_limit = current->mm->mmap_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) info.high_limit = TASK_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) info.align_mask = PAGE_MASK & ~huge_page_mask(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) info.align_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) return vm_unmapped_area(&info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) static unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) unsigned long len, unsigned long pgoff, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) struct hstate *h = hstate_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) struct vm_unmapped_area_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) info.flags = VM_UNMAPPED_AREA_TOPDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) info.length = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) info.low_limit = max(PAGE_SIZE, mmap_min_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) info.high_limit = current->mm->mmap_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) info.align_mask = PAGE_MASK & ~huge_page_mask(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) info.align_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) addr = vm_unmapped_area(&info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * A failed mmap() very likely causes application failure,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * so fall back to the bottom-up function here. This scenario
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * can happen with large stack limits and large mmap()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) * allocations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) if (unlikely(offset_in_page(addr))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) VM_BUG_ON(addr != -ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) info.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) info.low_limit = current->mm->mmap_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) info.high_limit = TASK_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) addr = vm_unmapped_area(&info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) static unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) unsigned long len, unsigned long pgoff, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) struct hstate *h = hstate_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) if (len & ~huge_page_mask(h))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) if (len > TASK_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) if (flags & MAP_FIXED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) if (prepare_hugepage_range(file, addr, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) if (addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) addr = ALIGN(addr, huge_page_size(h));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) vma = find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) if (TASK_SIZE - len >= addr &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) (!vma || addr + len <= vm_start_gap(vma)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * Use mm->get_unmapped_area value as a hint to use topdown routine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) * If architectures have special needs, they should define their own
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) * version of hugetlb_get_unmapped_area.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) if (mm->get_unmapped_area == arch_get_unmapped_area_topdown)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) return hugetlb_get_unmapped_area_topdown(file, addr, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) pgoff, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) return hugetlb_get_unmapped_area_bottomup(file, addr, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) pgoff, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) static size_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) hugetlbfs_read_actor(struct page *page, unsigned long offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) struct iov_iter *to, unsigned long size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) size_t copied = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) int i, chunksize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) /* Find which 4k chunk and offset with in that chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) i = offset >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) offset = offset & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) while (size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) size_t n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) chunksize = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) if (offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) chunksize -= offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) if (chunksize > size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) chunksize = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) n = copy_page_to_iter(&page[i], offset, chunksize, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) copied += n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) if (n != chunksize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) size -= chunksize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * Support for read() - Find the page attached to f_mapping and copy out the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * data. Its *very* similar to do_generic_mapping_read(), we can't use that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * since it has PAGE_SIZE assumptions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) struct file *file = iocb->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) struct hstate *h = hstate_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) unsigned long index = iocb->ki_pos >> huge_page_shift(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) unsigned long offset = iocb->ki_pos & ~huge_page_mask(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) unsigned long end_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) loff_t isize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) ssize_t retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) while (iov_iter_count(to)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) size_t nr, copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) /* nr is the maximum number of bytes to copy from this page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) nr = huge_page_size(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) isize = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) if (!isize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) end_index = (isize - 1) >> huge_page_shift(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) if (index > end_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) if (index == end_index) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) if (nr <= offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) nr = nr - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) /* Find the page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) page = find_lock_page(mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) if (unlikely(page == NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) * We have a HOLE, zero out the user-buffer for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) * length of the hole or request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) copied = iov_iter_zero(nr, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * We have the page, copy it to user space buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) copied = hugetlbfs_read_actor(page, offset, to, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) offset += copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) retval += copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) if (copied != nr && iov_iter_count(to)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) if (!retval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) retval = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) index += offset >> huge_page_shift(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) offset &= ~huge_page_mask(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) iocb->ki_pos = ((loff_t)index << huge_page_shift(h)) + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) static int hugetlbfs_write_begin(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) loff_t pos, unsigned len, unsigned flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) struct page **pagep, void **fsdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) loff_t pos, unsigned len, unsigned copied,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) struct page *page, void *fsdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) static void remove_huge_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) ClearPageDirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) ClearPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) delete_from_page_cache(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) * end == 0 indicates that the entire range after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) * start should be unmapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) unsigned long v_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) unsigned long v_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) * Can the expression below overflow on 32-bit arches?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) * No, because the interval tree returns us only those vmas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) * which overlap the truncated area starting at pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) * and no vma on a 32-bit arch can span beyond the 4GB.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) if (vma->vm_pgoff < start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) v_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) if (!end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) v_end = vma->vm_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) + vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) if (v_end > vma->vm_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) v_end = vma->vm_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) * remove_inode_hugepages handles two distinct cases: truncation and hole
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) * punch. There are subtle differences in operation for each case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) * truncation is indicated by end of range being LLONG_MAX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) * In this case, we first scan the range and release found pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * maps and global counts. Page faults can not race with truncation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * in this routine. hugetlb_no_page() holds i_mmap_rwsem and prevents
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * page faults in the truncated range by checking i_size. i_size is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * modified while holding i_mmap_rwsem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * hole punch is indicated if end is not LLONG_MAX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * In the hole punch case we scan the range and release found pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) * Only when releasing a page is the associated region/reserv map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) * deleted. The region/reserv map for ranges without associated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) * pages are not modified. Page faults can race with hole punch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) * This is indicated if we find a mapped page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) * Note: If the passed end of range value is beyond the end of file, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) * not LLONG_MAX this routine still performs a hole punch operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) loff_t lend)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) struct hstate *h = hstate_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) struct address_space *mapping = &inode->i_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) const pgoff_t start = lstart >> huge_page_shift(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) const pgoff_t end = lend >> huge_page_shift(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) struct vm_area_struct pseudo_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) struct pagevec pvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) pgoff_t next, index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) int i, freed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) bool truncate_op = (lend == LLONG_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) vma_init(&pseudo_vma, current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) pagevec_init(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) next = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) while (next < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) * When no more pages are found, we are done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) if (!pagevec_lookup_range(&pvec, mapping, &next, end - 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) for (i = 0; i < pagevec_count(&pvec); ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) struct page *page = pvec.pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) u32 hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) index = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) hash = hugetlb_fault_mutex_hash(mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) if (!truncate_op) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) * Only need to hold the fault mutex in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) * hole punch case. This prevents races with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) * page faults. Races are not possible in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) * case of truncation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) mutex_lock(&hugetlb_fault_mutex_table[hash]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) * If page is mapped, it was faulted in after being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) * unmapped in caller. Unmap (again) now after taking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) * the fault mutex. The mutex will prevent faults
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) * until we finish removing the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * This race can only happen in the hole punch case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * Getting here in a truncate operation is a bug.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) if (unlikely(page_mapped(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) BUG_ON(truncate_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) mutex_unlock(&hugetlb_fault_mutex_table[hash]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) i_mmap_lock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) mutex_lock(&hugetlb_fault_mutex_table[hash]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) hugetlb_vmdelete_list(&mapping->i_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) index * pages_per_huge_page(h),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) (index + 1) * pages_per_huge_page(h));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) i_mmap_unlock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * We must free the huge page and remove from page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * cache (remove_huge_page) BEFORE removing the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * region/reserve map (hugetlb_unreserve_pages). In
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * rare out of memory conditions, removal of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) * region/reserve map could fail. Correspondingly,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * the subpool and global reserve usage count can need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * to be adjusted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) VM_BUG_ON(PagePrivate(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) remove_huge_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) if (!truncate_op) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) if (unlikely(hugetlb_unreserve_pages(inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) index, index + 1, 1)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) hugetlb_fix_reserve_counts(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) if (!truncate_op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) mutex_unlock(&hugetlb_fault_mutex_table[hash]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) huge_pagevec_release(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) if (truncate_op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) (void)hugetlb_unreserve_pages(inode, start, LONG_MAX, freed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) static void hugetlbfs_evict_inode(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) struct resv_map *resv_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) remove_inode_hugepages(inode, 0, LLONG_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) * Get the resv_map from the address space embedded in the inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) * This is the address space which points to any resv_map allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) * at inode creation time. If this is a device special inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * i_mapping may not point to the original address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) resv_map = (struct resv_map *)(&inode->i_data)->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) /* Only regular and link inodes have associated reserve maps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) if (resv_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) resv_map_release(&resv_map->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) clear_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) pgoff_t pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) struct hstate *h = hstate_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) BUG_ON(offset & ~huge_page_mask(h));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) pgoff = offset >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) i_mmap_lock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) i_size_write(inode, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) i_mmap_unlock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) remove_inode_hugepages(inode, offset, LLONG_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) struct hstate *h = hstate_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) loff_t hpage_size = huge_page_size(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) loff_t hole_start, hole_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) * For hole punch round up the beginning offset of the hole and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) * round down the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) hole_start = round_up(offset, hpage_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) hole_end = round_down(offset + len, hpage_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) if (hole_end > hole_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) inode_lock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) /* protected by i_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) inode_unlock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) i_mmap_lock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) hugetlb_vmdelete_list(&mapping->i_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) hole_start >> PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) hole_end >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) i_mmap_unlock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) remove_inode_hugepages(inode, hole_start, hole_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) inode_unlock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) loff_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) struct hstate *h = hstate_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) struct vm_area_struct pseudo_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) loff_t hpage_size = huge_page_size(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) unsigned long hpage_shift = huge_page_shift(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) pgoff_t start, index, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) u32 hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) if (mode & FALLOC_FL_PUNCH_HOLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) return hugetlbfs_punch_hole(inode, offset, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) * Default preallocate case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) * For this range, start is rounded down and end is rounded up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) * as well as being converted to page offsets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) start = offset >> hpage_shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) end = (offset + len + hpage_size - 1) >> hpage_shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) inode_lock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) error = inode_newsize_ok(inode, offset + len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) error = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) * Initialize a pseudo vma as this is required by the huge page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) * allocation routines. If NUMA is configured, use page index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) * as input to create an allocation policy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) vma_init(&pseudo_vma, mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) pseudo_vma.vm_file = file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) for (index = start; index < end; index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) * This is supposed to be the vaddr where the page is being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) * faulted in, but we have no vaddr here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) int avoid_reserve = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) * fallocate(2) manpage permits EINTR; we may have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * interrupted because we are using up too much memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) if (signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) error = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) /* Set numa allocation policy based on index */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) hugetlb_set_vma_policy(&pseudo_vma, inode, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) /* addr is the offset within the file (zero based) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) addr = index * hpage_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) * fault mutex taken here, protects against fault path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) * and hole punch. inode_lock previously taken protects
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) * against truncation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) hash = hugetlb_fault_mutex_hash(mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) mutex_lock(&hugetlb_fault_mutex_table[hash]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) /* See if already present in mapping to avoid alloc/free */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) page = find_get_page(mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) mutex_unlock(&hugetlb_fault_mutex_table[hash]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) hugetlb_drop_vma_policy(&pseudo_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) /* Allocate page and add to page cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) page = alloc_huge_page(&pseudo_vma, addr, avoid_reserve);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) hugetlb_drop_vma_policy(&pseudo_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) if (IS_ERR(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) mutex_unlock(&hugetlb_fault_mutex_table[hash]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) error = PTR_ERR(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) clear_huge_page(page, addr, pages_per_huge_page(h));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) __SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) error = huge_add_to_page_cache(page, mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) if (unlikely(error)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) mutex_unlock(&hugetlb_fault_mutex_table[hash]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) mutex_unlock(&hugetlb_fault_mutex_table[hash]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) set_page_huge_active(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) * unlock_page because locked by add_to_page_cache()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) * put_page() due to reference from alloc_huge_page()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) i_size_write(inode, offset + len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) inode->i_ctime = current_time(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) inode_unlock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) struct inode *inode = d_inode(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) struct hstate *h = hstate_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) unsigned int ia_valid = attr->ia_valid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) BUG_ON(!inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) error = setattr_prepare(dentry, attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) if (ia_valid & ATTR_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) loff_t oldsize = inode->i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) loff_t newsize = attr->ia_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) if (newsize & ~huge_page_mask(h))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) /* protected by i_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) (newsize > oldsize && (info->seals & F_SEAL_GROW)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) error = hugetlb_vmtruncate(inode, newsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) setattr_copy(inode, attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) mark_inode_dirty(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) static struct inode *hugetlbfs_get_root(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) struct hugetlbfs_fs_context *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) inode = new_inode(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) if (inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) inode->i_ino = get_next_ino();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) inode->i_mode = S_IFDIR | ctx->mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) inode->i_uid = ctx->uid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) inode->i_gid = ctx->gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) inode->i_op = &hugetlbfs_dir_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) inode->i_fop = &simple_dir_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) /* directory inodes start off with i_nlink == 2 (for "." entry) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) inc_nlink(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) lockdep_annotate_inode_mutex_key(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) return inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) * Hugetlbfs is not reclaimable; therefore its i_mmap_rwsem will never
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) * be taken from reclaim -- unlike regular filesystems. This needs an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) * annotation because huge_pmd_share() does an allocation under hugetlb's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) * i_mmap_rwsem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) static struct lock_class_key hugetlbfs_i_mmap_rwsem_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) static struct inode *hugetlbfs_get_inode(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) struct inode *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) umode_t mode, dev_t dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) struct resv_map *resv_map = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) * Reserve maps are only needed for inodes that can have associated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) * page allocations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) if (S_ISREG(mode) || S_ISLNK(mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) resv_map = resv_map_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) if (!resv_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) inode = new_inode(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) if (inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) inode->i_ino = get_next_ino();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) inode_init_owner(inode, dir, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) &hugetlbfs_i_mmap_rwsem_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) inode->i_mapping->a_ops = &hugetlbfs_aops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) inode->i_mapping->private_data = resv_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) info->seals = F_SEAL_SEAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) switch (mode & S_IFMT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) init_special_inode(inode, mode, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) case S_IFREG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) inode->i_op = &hugetlbfs_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) inode->i_fop = &hugetlbfs_file_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) case S_IFDIR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) inode->i_op = &hugetlbfs_dir_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) inode->i_fop = &simple_dir_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) /* directory inodes start off with i_nlink == 2 (for "." entry) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) inc_nlink(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) case S_IFLNK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) inode->i_op = &page_symlink_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) inode_nohighmem(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) lockdep_annotate_inode_mutex_key(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) if (resv_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) kref_put(&resv_map->refs, resv_map_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) return inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) * File creation. Allocate an inode, and we're done..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) static int do_hugetlbfs_mknod(struct inode *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) struct dentry *dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) umode_t mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) dev_t dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) bool tmpfile)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) int error = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) if (inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) dir->i_ctime = dir->i_mtime = current_time(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) if (tmpfile) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) d_tmpfile(dentry, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) d_instantiate(dentry, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) dget(dentry);/* Extra count - pin the dentry in core */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) static int hugetlbfs_mknod(struct inode *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) struct dentry *dentry, umode_t mode, dev_t dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) return do_hugetlbfs_mknod(dir, dentry, mode, dev, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) if (!retval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) inc_nlink(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) static int hugetlbfs_tmpfile(struct inode *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) struct dentry *dentry, umode_t mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) return do_hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) static int hugetlbfs_symlink(struct inode *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) struct dentry *dentry, const char *symname)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) int error = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) inode = hugetlbfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) if (inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) int l = strlen(symname)+1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) error = page_symlink(inode, symname, l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) if (!error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) d_instantiate(dentry, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) dget(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) dir->i_ctime = dir->i_mtime = current_time(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) * mark the head page dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) static int hugetlbfs_set_page_dirty(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) struct page *head = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) SetPageDirty(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) static int hugetlbfs_migrate_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) struct page *newpage, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) rc = migrate_huge_page_move_mapping(mapping, newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) if (rc != MIGRATEPAGE_SUCCESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) * page_private is subpool pointer in hugetlb pages. Transfer to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) * new page. PagePrivate is not associated with page_private for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) * hugetlb pages and can not be set here as only page_huge_active
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) * pages can be migrated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) if (page_private(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) set_page_private(newpage, page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) set_page_private(page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) if (mode != MIGRATE_SYNC_NO_COPY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) migrate_page_copy(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) migrate_page_states(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) return MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) static int hugetlbfs_error_remove_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) pgoff_t index = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) remove_huge_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) hugetlb_fix_reserve_counts(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) * Display the mount options in /proc/mounts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) static int hugetlbfs_show_options(struct seq_file *m, struct dentry *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(root->d_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) struct hugepage_subpool *spool = sbinfo->spool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) unsigned long hpage_size = huge_page_size(sbinfo->hstate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) unsigned hpage_shift = huge_page_shift(sbinfo->hstate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) char mod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) seq_printf(m, ",uid=%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) from_kuid_munged(&init_user_ns, sbinfo->uid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) seq_printf(m, ",gid=%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) from_kgid_munged(&init_user_ns, sbinfo->gid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) if (sbinfo->mode != 0755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) seq_printf(m, ",mode=%o", sbinfo->mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) if (sbinfo->max_inodes != -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) seq_printf(m, ",nr_inodes=%lu", sbinfo->max_inodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) hpage_size /= 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) mod = 'K';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) if (hpage_size >= 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) hpage_size /= 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) mod = 'M';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) seq_printf(m, ",pagesize=%lu%c", hpage_size, mod);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) if (spool) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) if (spool->max_hpages != -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) seq_printf(m, ",size=%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) (unsigned long long)spool->max_hpages << hpage_shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) if (spool->min_hpages != -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) seq_printf(m, ",min_size=%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) (unsigned long long)spool->min_hpages << hpage_shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) struct hstate *h = hstate_inode(d_inode(dentry));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) buf->f_type = HUGETLBFS_MAGIC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) buf->f_bsize = huge_page_size(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) if (sbinfo) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) spin_lock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) /* If no limits set, just report 0 for max/free/used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) * blocks, like simple_statfs() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) if (sbinfo->spool) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) long free_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) spin_lock(&sbinfo->spool->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) buf->f_blocks = sbinfo->spool->max_hpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) free_pages = sbinfo->spool->max_hpages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) - sbinfo->spool->used_hpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) buf->f_bavail = buf->f_bfree = free_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) spin_unlock(&sbinfo->spool->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) buf->f_files = sbinfo->max_inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) buf->f_ffree = sbinfo->free_inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) spin_unlock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) buf->f_namelen = NAME_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) static void hugetlbfs_put_super(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) struct hugetlbfs_sb_info *sbi = HUGETLBFS_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) if (sbi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) sb->s_fs_info = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) if (sbi->spool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) hugepage_put_subpool(sbi->spool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) kfree(sbi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) if (sbinfo->free_inodes >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) spin_lock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) if (unlikely(!sbinfo->free_inodes)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) spin_unlock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) sbinfo->free_inodes--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) spin_unlock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) if (sbinfo->free_inodes >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) spin_lock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) sbinfo->free_inodes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) spin_unlock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) static struct kmem_cache *hugetlbfs_inode_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) struct hugetlbfs_inode_info *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) if (unlikely(!p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) hugetlbfs_inc_free_inodes(sbinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * Any time after allocation, hugetlbfs_destroy_inode can be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * for the inode. mpol_free_shared_policy is unconditionally called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) * as part of hugetlbfs_destroy_inode. So, initialize policy here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) * in case of a quick call to destroy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) * Note that the policy is initialized even if we are creating a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) * private inode. This simplifies hugetlbfs_destroy_inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) mpol_shared_policy_init(&p->policy, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) return &p->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) static void hugetlbfs_free_inode(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) static void hugetlbfs_destroy_inode(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) static const struct address_space_operations hugetlbfs_aops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) .write_begin = hugetlbfs_write_begin,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) .write_end = hugetlbfs_write_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) .set_page_dirty = hugetlbfs_set_page_dirty,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) .migratepage = hugetlbfs_migrate_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) .error_remove_page = hugetlbfs_error_remove_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) static void init_once(void *foo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) inode_init_once(&ei->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) const struct file_operations hugetlbfs_file_operations = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) .read_iter = hugetlbfs_read_iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) .mmap = hugetlbfs_file_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) .fsync = noop_fsync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) .get_unmapped_area = hugetlb_get_unmapped_area,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) .llseek = default_llseek,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) .fallocate = hugetlbfs_fallocate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) static const struct inode_operations hugetlbfs_dir_inode_operations = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) .create = hugetlbfs_create,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) .lookup = simple_lookup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) .link = simple_link,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) .unlink = simple_unlink,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) .symlink = hugetlbfs_symlink,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) .mkdir = hugetlbfs_mkdir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) .rmdir = simple_rmdir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) .mknod = hugetlbfs_mknod,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) .rename = simple_rename,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) .setattr = hugetlbfs_setattr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) .tmpfile = hugetlbfs_tmpfile,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) static const struct inode_operations hugetlbfs_inode_operations = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) .setattr = hugetlbfs_setattr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) static const struct super_operations hugetlbfs_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) .alloc_inode = hugetlbfs_alloc_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) .free_inode = hugetlbfs_free_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) .destroy_inode = hugetlbfs_destroy_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) .evict_inode = hugetlbfs_evict_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) .statfs = hugetlbfs_statfs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) .put_super = hugetlbfs_put_super,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) .show_options = hugetlbfs_show_options,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) * Convert size option passed from command line to number of huge pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) * in the pool specified by hstate. Size option could be in bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) * (val_type == SIZE_STD) or percentage of the pool (val_type == SIZE_PERCENT).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) static long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) enum hugetlbfs_size_type val_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) if (val_type == NO_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) if (val_type == SIZE_PERCENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) size_opt <<= huge_page_shift(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) size_opt *= h->max_huge_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) do_div(size_opt, 100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) size_opt >>= huge_page_shift(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) return size_opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) * Parse one mount parameter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) struct hugetlbfs_fs_context *ctx = fc->fs_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) struct fs_parse_result result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) char *rest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) unsigned long ps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) int opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) opt = fs_parse(fc, hugetlb_fs_parameters, param, &result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) if (opt < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) return opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) switch (opt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) case Opt_uid:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) ctx->uid = make_kuid(current_user_ns(), result.uint_32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) if (!uid_valid(ctx->uid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) goto bad_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) case Opt_gid:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) ctx->gid = make_kgid(current_user_ns(), result.uint_32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) if (!gid_valid(ctx->gid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) goto bad_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) case Opt_mode:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) ctx->mode = result.uint_32 & 01777U;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) case Opt_size:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) /* memparse() will accept a K/M/G without a digit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) if (!isdigit(param->string[0]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) goto bad_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) ctx->max_size_opt = memparse(param->string, &rest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) ctx->max_val_type = SIZE_STD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) if (*rest == '%')
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) ctx->max_val_type = SIZE_PERCENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) case Opt_nr_inodes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) /* memparse() will accept a K/M/G without a digit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) if (!isdigit(param->string[0]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) goto bad_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) ctx->nr_inodes = memparse(param->string, &rest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) case Opt_pagesize:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) ps = memparse(param->string, &rest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) ctx->hstate = size_to_hstate(ps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) if (!ctx->hstate) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) pr_err("Unsupported page size %lu MB\n", ps >> 20);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) case Opt_min_size:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) /* memparse() will accept a K/M/G without a digit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) if (!isdigit(param->string[0]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) goto bad_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) ctx->min_size_opt = memparse(param->string, &rest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) ctx->min_val_type = SIZE_STD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) if (*rest == '%')
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) ctx->min_val_type = SIZE_PERCENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) bad_val:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) return invalfc(fc, "Bad value '%s' for mount option '%s'\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) param->string, param->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) * Validate the parsed options.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) static int hugetlbfs_validate(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) struct hugetlbfs_fs_context *ctx = fc->fs_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) * Use huge page pool size (in hstate) to convert the size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) * options to number of huge pages. If NO_SIZE, -1 is returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) ctx->max_hpages = hugetlbfs_size_to_hpages(ctx->hstate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) ctx->max_size_opt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) ctx->max_val_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) ctx->min_hpages = hugetlbfs_size_to_hpages(ctx->hstate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) ctx->min_size_opt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) ctx->min_val_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) * If max_size was specified, then min_size must be smaller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) if (ctx->max_val_type > NO_SIZE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) ctx->min_hpages > ctx->max_hpages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) pr_err("Minimum size can not be greater than maximum size\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) struct hugetlbfs_fs_context *ctx = fc->fs_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) struct hugetlbfs_sb_info *sbinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) if (!sbinfo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) sb->s_fs_info = sbinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) spin_lock_init(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) sbinfo->hstate = ctx->hstate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) sbinfo->max_inodes = ctx->nr_inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) sbinfo->free_inodes = ctx->nr_inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) sbinfo->spool = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) sbinfo->uid = ctx->uid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) sbinfo->gid = ctx->gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) sbinfo->mode = ctx->mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) * Allocate and initialize subpool if maximum or minimum size is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) * specified. Any needed reservations (for minimim size) are taken
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) * taken when the subpool is created.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) if (ctx->max_hpages != -1 || ctx->min_hpages != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) sbinfo->spool = hugepage_new_subpool(ctx->hstate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) ctx->max_hpages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) ctx->min_hpages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) if (!sbinfo->spool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) sb->s_maxbytes = MAX_LFS_FILESIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) sb->s_blocksize = huge_page_size(ctx->hstate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) sb->s_blocksize_bits = huge_page_shift(ctx->hstate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) sb->s_magic = HUGETLBFS_MAGIC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) sb->s_op = &hugetlbfs_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) sb->s_time_gran = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) * Due to the special and limited functionality of hugetlbfs, it does
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) * not work well as a stacking filesystem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) if (!sb->s_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) kfree(sbinfo->spool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) kfree(sbinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) static int hugetlbfs_get_tree(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) int err = hugetlbfs_validate(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) return get_tree_nodev(fc, hugetlbfs_fill_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) static void hugetlbfs_fs_context_free(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) kfree(fc->fs_private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) static const struct fs_context_operations hugetlbfs_fs_context_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) .free = hugetlbfs_fs_context_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) .parse_param = hugetlbfs_parse_param,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) .get_tree = hugetlbfs_get_tree,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) static int hugetlbfs_init_fs_context(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) struct hugetlbfs_fs_context *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) ctx = kzalloc(sizeof(struct hugetlbfs_fs_context), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) if (!ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) ctx->max_hpages = -1; /* No limit on size by default */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) ctx->nr_inodes = -1; /* No limit on number of inodes by default */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) ctx->uid = current_fsuid();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) ctx->gid = current_fsgid();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) ctx->mode = 0755;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) ctx->hstate = &default_hstate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) ctx->min_hpages = -1; /* No default minimum size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) ctx->max_val_type = NO_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) ctx->min_val_type = NO_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) fc->fs_private = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) fc->ops = &hugetlbfs_fs_context_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) static struct file_system_type hugetlbfs_fs_type = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) .name = "hugetlbfs",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) .init_fs_context = hugetlbfs_init_fs_context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) .parameters = hugetlb_fs_parameters,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) .kill_sb = kill_litter_super,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) static int can_do_hugetlb_shm(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) kgid_t shm_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) shm_group = make_kgid(&init_user_ns, sysctl_hugetlb_shm_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) return capable(CAP_IPC_LOCK) || in_group_p(shm_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) static int get_hstate_idx(int page_size_log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) struct hstate *h = hstate_sizelog(page_size_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) if (!h)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) return h - hstates;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) * Note that size should be aligned to proper hugepage size in caller side,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) * otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) struct file *hugetlb_file_setup(const char *name, size_t size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) vm_flags_t acctflag, struct user_struct **user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) int creat_flags, int page_size_log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) struct vfsmount *mnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) int hstate_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) hstate_idx = get_hstate_idx(page_size_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) if (hstate_idx < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) return ERR_PTR(-ENODEV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) *user = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) mnt = hugetlbfs_vfsmount[hstate_idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) if (!mnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) return ERR_PTR(-ENOENT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) *user = current_user();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) if (user_shm_lock(size, *user)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) task_lock(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) current->comm, current->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) task_unlock(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) *user = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) return ERR_PTR(-EPERM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) file = ERR_PTR(-ENOSPC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) inode = hugetlbfs_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) if (!inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) if (creat_flags == HUGETLB_SHMFS_INODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) inode->i_flags |= S_PRIVATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) inode->i_size = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) clear_nlink(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) if (hugetlb_reserve_pages(inode, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) size >> huge_page_shift(hstate_inode(inode)), NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) acctflag))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) file = ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) file = alloc_file_pseudo(inode, mnt, name, O_RDWR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) &hugetlbfs_file_operations);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) if (!IS_ERR(file))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) return file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) if (*user) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) user_shm_unlock(size, *user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) *user = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) return file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) struct fs_context *fc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) struct vfsmount *mnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) fc = fs_context_for_mount(&hugetlbfs_fs_type, SB_KERNMOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) if (IS_ERR(fc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) mnt = ERR_CAST(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) struct hugetlbfs_fs_context *ctx = fc->fs_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) ctx->hstate = h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) mnt = fc_mount(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) put_fs_context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) if (IS_ERR(mnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) pr_err("Cannot mount internal hugetlbfs for page size %uK",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 1U << (h->order + PAGE_SHIFT - 10));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) return mnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) static int __init init_hugetlbfs_fs(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) struct vfsmount *mnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) struct hstate *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) if (!hugepages_supported()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) pr_info("disabling because there are no supported hugepage sizes\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) return -ENOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) error = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) sizeof(struct hugetlbfs_inode_info),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 0, SLAB_ACCOUNT, init_once);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) if (hugetlbfs_inode_cachep == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) error = register_filesystem(&hugetlbfs_fs_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) /* default hstate mount is required */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) mnt = mount_one_hugetlbfs(&hstates[default_hstate_idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) if (IS_ERR(mnt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) error = PTR_ERR(mnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) goto out_unreg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) hugetlbfs_vfsmount[default_hstate_idx] = mnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) /* other hstates are optional */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) for_each_hstate(h) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) if (i == default_hstate_idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) mnt = mount_one_hugetlbfs(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) if (IS_ERR(mnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) hugetlbfs_vfsmount[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) hugetlbfs_vfsmount[i] = mnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) out_unreg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) (void)unregister_filesystem(&hugetlbfs_fs_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) kmem_cache_destroy(hugetlbfs_inode_cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) fs_initcall(init_hugetlbfs_fs)