^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * Resizable virtual memory filesystem for Linux.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2000 Linus Torvalds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * 2000 Transmeta Corp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * 2000-2001 Christoph Rohland
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * 2000-2001 SAP AG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * 2002 Red Hat Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Copyright (C) 2002-2011 Hugh Dickins.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Copyright (C) 2011 Google Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * Copyright (C) 2002-2005 VERITAS Software Corporation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * Copyright (C) 2004 Andi Kleen, SuSE Labs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * Extended attribute support for tmpfs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * tiny-shmem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * This file is released under the GPL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/vfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/mount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/ramfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <linux/khugepaged.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include <linux/frontswap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <linux/fs_parser.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <linux/mm_inline.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #undef CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #include <trace/hooks/shmem_fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) static struct vfsmount *shm_mnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #ifdef CONFIG_SHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * This virtual memory filesystem is heavily based on the ramfs. It
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * extends ramfs by the ability to use swap and honor resource limits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * which makes it a completely usable filesystem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #include <linux/xattr.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #include <linux/exportfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #include <linux/posix_acl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #include <linux/posix_acl_xattr.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #include <linux/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #include <linux/shmem_fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #include <linux/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #include <linux/pagevec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #include <linux/percpu_counter.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #include <linux/falloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #include <linux/splice.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #include <linux/security.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #include <linux/swapops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) #include <linux/mempolicy.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) #include <linux/namei.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #include <linux/ctype.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #include <linux/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #include <linux/seq_file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #include <linux/magic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) #include <linux/fcntl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) #include <uapi/linux/memfd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) #include <linux/userfaultfd_k.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) #include <linux/rmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) #include <linux/uuid.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) #define BLOCKS_PER_PAGE (PAGE_SIZE/512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) #define VM_ACCT(size) (PAGE_ALIGN(size) >> PAGE_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) /* Pretend that each entry is of this size in directory's i_size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) #define BOGO_DIRENT_SIZE 20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) /* Symlink up to this size is kmalloc'ed instead of using a swappable page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) #define SHORT_SYMLINK_LEN 128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * shmem_fallocate communicates with shmem_fault or shmem_writepage via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * inode->i_private (with i_mutex making sure that it has only one user at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * a time): we would prefer not to enlarge the shmem inode just for that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) struct shmem_falloc {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) wait_queue_head_t *waitq; /* faults into hole wait for punch to end */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) pgoff_t start; /* start of range currently being fallocated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) pgoff_t next; /* the next page offset to be fallocated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) pgoff_t nr_falloced; /* how many new pages have been fallocated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) pgoff_t nr_unswapped; /* how often writepage refused to swap out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) struct shmem_options {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) unsigned long long blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) unsigned long long inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) struct mempolicy *mpol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) kuid_t uid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) kgid_t gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) umode_t mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) bool full_inums;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) int huge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) int seen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) #define SHMEM_SEEN_BLOCKS 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) #define SHMEM_SEEN_INODES 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) #define SHMEM_SEEN_HUGE 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) #define SHMEM_SEEN_INUMS 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) #ifdef CONFIG_TMPFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) static unsigned long shmem_default_max_blocks(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) return totalram_pages() / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) static unsigned long shmem_default_max_inodes(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) unsigned long nr_pages = totalram_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) return min(nr_pages - totalhigh_pages(), nr_pages / 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) static int shmem_replace_page(struct page **pagep, gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) struct shmem_inode_info *info, pgoff_t index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) static int shmem_swapin_page(struct inode *inode, pgoff_t index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) struct page **pagep, enum sgp_type sgp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) gfp_t gfp, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) vm_fault_t *fault_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) struct page **pagep, enum sgp_type sgp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) gfp_t gfp, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) struct vm_fault *vmf, vm_fault_t *fault_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) int shmem_getpage(struct inode *inode, pgoff_t index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) struct page **pagep, enum sgp_type sgp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) return shmem_getpage_gfp(inode, index, pagep, sgp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) return sb->s_fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * shmem_file_setup pre-accounts the whole fixed size of a VM object,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * for shared memory and for shared anonymous (/dev/zero) mappings
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * consistent with the pre-accounting of private mappings ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) static inline int shmem_acct_size(unsigned long flags, loff_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) return (flags & VM_NORESERVE) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) static inline void shmem_unacct_size(unsigned long flags, loff_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) if (!(flags & VM_NORESERVE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) vm_unacct_memory(VM_ACCT(size));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) static inline int shmem_reacct_size(unsigned long flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) loff_t oldsize, loff_t newsize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) if (!(flags & VM_NORESERVE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) if (VM_ACCT(newsize) > VM_ACCT(oldsize))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) return security_vm_enough_memory_mm(current->mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) VM_ACCT(newsize) - VM_ACCT(oldsize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * ... whereas tmpfs objects are accounted incrementally as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * pages are allocated, in order to allow large sparse files.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) static inline int shmem_acct_block(unsigned long flags, long pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) if (!(flags & VM_NORESERVE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) return security_vm_enough_memory_mm(current->mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) pages * VM_ACCT(PAGE_SIZE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) static inline void shmem_unacct_blocks(unsigned long flags, long pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) if (flags & VM_NORESERVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) static inline bool shmem_inode_acct_block(struct inode *inode, long pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) if (shmem_acct_block(info->flags, pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if (sbinfo->max_blocks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) if (percpu_counter_compare(&sbinfo->used_blocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) sbinfo->max_blocks - pages) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) goto unacct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) percpu_counter_add(&sbinfo->used_blocks, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) unacct:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) shmem_unacct_blocks(info->flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) if (sbinfo->max_blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) percpu_counter_sub(&sbinfo->used_blocks, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) shmem_unacct_blocks(info->flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) static const struct super_operations shmem_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) static const struct address_space_operations shmem_aops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) static const struct file_operations shmem_file_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) static const struct inode_operations shmem_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) static const struct inode_operations shmem_dir_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) static const struct inode_operations shmem_special_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) static const struct vm_operations_struct shmem_vm_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) static struct file_system_type shmem_fs_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) bool vma_is_shmem(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) return vma->vm_ops == &shmem_vm_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) static LIST_HEAD(shmem_swaplist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) static DEFINE_MUTEX(shmem_swaplist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * produces a novel ino for the newly allocated inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * It may also be called when making a hard link to permit the space needed by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) * each dentry. However, in that case, no new inode number is needed since that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) * internally draws from another pool of inode numbers (currently global
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * get_next_ino()). This case is indicated by passing NULL as inop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) #define SHMEM_INO_BATCH 1024
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) ino_t ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) if (!(sb->s_flags & SB_KERNMOUNT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) spin_lock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) if (sbinfo->max_inodes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) if (!sbinfo->free_inodes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) spin_unlock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) sbinfo->free_inodes--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) if (inop) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) ino = sbinfo->next_ino++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) if (unlikely(is_zero_ino(ino)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) ino = sbinfo->next_ino++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) if (unlikely(!sbinfo->full_inums &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) ino > UINT_MAX)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * Emulate get_next_ino uint wraparound for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * compatibility
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) if (IS_ENABLED(CONFIG_64BIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) pr_warn("%s: inode number overflow on device %d, consider using inode64 mount option\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) __func__, MINOR(sb->s_dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) sbinfo->next_ino = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) ino = sbinfo->next_ino++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) *inop = ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) spin_unlock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) } else if (inop) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * __shmem_file_setup, one of our callers, is lock-free: it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * doesn't hold stat_lock in shmem_reserve_inode since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * max_inodes is always 0, and is called from potentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * unknown contexts. As such, use a per-cpu batched allocator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * which doesn't require the per-sb stat_lock unless we are at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * the batch boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) * We don't need to worry about inode{32,64} since SB_KERNMOUNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * shmem mounts are not exposed to userspace, so we don't need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * to worry about things like glibc compatibility.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) ino_t *next_ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) ino = *next_ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) if (unlikely(ino % SHMEM_INO_BATCH == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) spin_lock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) ino = sbinfo->next_ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) sbinfo->next_ino += SHMEM_INO_BATCH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) spin_unlock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) if (unlikely(is_zero_ino(ino)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) ino++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) *inop = ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) *next_ino = ++ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) put_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) static void shmem_free_inode(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) if (sbinfo->max_inodes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) spin_lock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) sbinfo->free_inodes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) spin_unlock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) * shmem_recalc_inode - recalculate the block usage of an inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * @inode: inode to recalc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) * We have to calculate the free blocks since the mm can drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) * undirtied hole pages behind our back.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) * But normally info->alloced == inode->i_mapping->nrpages + info->swapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) * It has to be called with the spinlock held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) static void shmem_recalc_inode(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) long freed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) if (freed > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) info->alloced -= freed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) inode->i_blocks -= freed * BLOCKS_PER_PAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) shmem_inode_unacct_blocks(inode, freed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) bool shmem_charge(struct inode *inode, long pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) if (!shmem_inode_acct_block(inode, pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) /* nrpages adjustment first, then shmem_recalc_inode() when balanced */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) inode->i_mapping->nrpages += pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) spin_lock_irqsave(&info->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) info->alloced += pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) inode->i_blocks += pages * BLOCKS_PER_PAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) shmem_recalc_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) spin_unlock_irqrestore(&info->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) void shmem_uncharge(struct inode *inode, long pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) /* nrpages adjustment done by __delete_from_page_cache() or caller */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) spin_lock_irqsave(&info->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) info->alloced -= pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) inode->i_blocks -= pages * BLOCKS_PER_PAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) shmem_recalc_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) spin_unlock_irqrestore(&info->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) shmem_inode_unacct_blocks(inode, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) * Replace item expected in xarray by a new item, while holding xa_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) static int shmem_replace_entry(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) pgoff_t index, void *expected, void *replacement)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) XA_STATE(xas, &mapping->i_pages, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) void *item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) VM_BUG_ON(!expected);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) VM_BUG_ON(!replacement);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) item = xas_load(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) if (item != expected)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) xas_store(&xas, replacement);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) * Sometimes, before we decide whether to proceed or to fail, we must check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) * that an entry was not already brought back from swap by a racing thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * Checking page is not enough: by the time a SwapCache page is locked, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) * might be reused, and again be SwapCache, using the same swap as before.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) static bool shmem_confirm_swap(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) pgoff_t index, swp_entry_t swap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) return xa_load(&mapping->i_pages, index) == swp_to_radix_entry(swap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) * Definitions for "huge tmpfs": tmpfs mounted with the huge= option
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * SHMEM_HUGE_NEVER:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * disables huge pages for the mount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * SHMEM_HUGE_ALWAYS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * enables huge pages for the mount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * SHMEM_HUGE_WITHIN_SIZE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * only allocate huge pages if the page will be fully within i_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) * also respect fadvise()/madvise() hints;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) * SHMEM_HUGE_ADVISE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) * only allocate huge pages if requested with fadvise()/madvise();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) #define SHMEM_HUGE_NEVER 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) #define SHMEM_HUGE_ALWAYS 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) #define SHMEM_HUGE_WITHIN_SIZE 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) #define SHMEM_HUGE_ADVISE 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * Special values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) * Only can be set via /sys/kernel/mm/transparent_hugepage/shmem_enabled:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) * SHMEM_HUGE_DENY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) * disables huge on shm_mnt and all mounts, for emergency use;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * SHMEM_HUGE_FORCE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) * enables huge on shm_mnt and all mounts, w/o needing option, for testing;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) #define SHMEM_HUGE_DENY (-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) #define SHMEM_HUGE_FORCE (-2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) /* ifdef here to avoid bloating shmem.o when not necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) static int shmem_huge __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) #if defined(CONFIG_SYSFS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) static int shmem_parse_huge(const char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) if (!strcmp(str, "never"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) return SHMEM_HUGE_NEVER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) if (!strcmp(str, "always"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) return SHMEM_HUGE_ALWAYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) if (!strcmp(str, "within_size"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) return SHMEM_HUGE_WITHIN_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) if (!strcmp(str, "advise"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) return SHMEM_HUGE_ADVISE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) if (!strcmp(str, "deny"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) return SHMEM_HUGE_DENY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) if (!strcmp(str, "force"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) return SHMEM_HUGE_FORCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) #if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) static const char *shmem_format_huge(int huge)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) switch (huge) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) case SHMEM_HUGE_NEVER:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) return "never";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) case SHMEM_HUGE_ALWAYS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) return "always";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) case SHMEM_HUGE_WITHIN_SIZE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) return "within_size";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) case SHMEM_HUGE_ADVISE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) return "advise";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) case SHMEM_HUGE_DENY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) return "deny";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) case SHMEM_HUGE_FORCE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) return "force";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) VM_BUG_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) return "bad_val";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) struct shrink_control *sc, unsigned long nr_to_split)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) LIST_HEAD(list), *pos, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) LIST_HEAD(to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) struct shmem_inode_info *info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) unsigned long batch = sc ? sc->nr_to_scan : 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) int split = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) if (list_empty(&sbinfo->shrinklist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) return SHRINK_STOP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) spin_lock(&sbinfo->shrinklist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) list_for_each_safe(pos, next, &sbinfo->shrinklist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) info = list_entry(pos, struct shmem_inode_info, shrinklist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) /* pin the inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) inode = igrab(&info->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) /* inode is about to be evicted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) if (!inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) list_del_init(&info->shrinklist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) /* Check if there's anything to gain */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) if (round_up(inode->i_size, PAGE_SIZE) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) round_up(inode->i_size, HPAGE_PMD_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) list_move(&info->shrinklist, &to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) list_move(&info->shrinklist, &list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) sbinfo->shrinklist_len--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) if (!--batch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) spin_unlock(&sbinfo->shrinklist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) list_for_each_safe(pos, next, &to_remove) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) info = list_entry(pos, struct shmem_inode_info, shrinklist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) inode = &info->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) list_del_init(&info->shrinklist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) list_for_each_safe(pos, next, &list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) info = list_entry(pos, struct shmem_inode_info, shrinklist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) inode = &info->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) if (nr_to_split && split >= nr_to_split)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) goto move_back;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) page = find_get_page(inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) /* No huge page at the end of the file: nothing to split */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) if (!PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) * Move the inode on the list back to shrinklist if we failed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) * to lock the page at this time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) * Waiting for the lock may lead to deadlock in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) * reclaim path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) if (!trylock_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) goto move_back;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) ret = split_huge_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) /* If split failed move the inode on the list back to shrinklist */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) goto move_back;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) split++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) drop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) list_del_init(&info->shrinklist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) goto put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) move_back:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) * Make sure the inode is either on the global list or deleted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) * from any local list before iput() since it could be deleted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) * in another thread once we put the inode (then the local list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) * is corrupted).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) spin_lock(&sbinfo->shrinklist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) list_move(&info->shrinklist, &sbinfo->shrinklist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) sbinfo->shrinklist_len++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) spin_unlock(&sbinfo->shrinklist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) return split;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) static long shmem_unused_huge_scan(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) struct shrink_control *sc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) if (!READ_ONCE(sbinfo->shrinklist_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) return SHRINK_STOP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) return shmem_unused_huge_shrink(sbinfo, sc, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) static long shmem_unused_huge_count(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) struct shrink_control *sc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) return READ_ONCE(sbinfo->shrinklist_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) #else /* !CONFIG_TRANSPARENT_HUGEPAGE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) #define shmem_huge SHMEM_HUGE_DENY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) struct shrink_control *sc, unsigned long nr_to_split)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) static inline bool is_huge_enabled(struct shmem_sb_info *sbinfo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) (shmem_huge == SHMEM_HUGE_FORCE || sbinfo->huge) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) shmem_huge != SHMEM_HUGE_DENY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) * Like add_to_page_cache_locked, but error if expected item has gone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) static int shmem_add_to_page_cache(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) pgoff_t index, void *expected, gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) struct mm_struct *charge_mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) unsigned long i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) unsigned long nr = compound_nr(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) VM_BUG_ON_PAGE(PageTail(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) VM_BUG_ON_PAGE(index != round_down(index, nr), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) VM_BUG_ON(expected && PageTransHuge(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) page_ref_add(page, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) page->mapping = mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) page->index = index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) if (!PageSwapCache(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) error = mem_cgroup_charge(page, charge_mm, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) if (PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) count_vm_event(THP_FILE_FALLBACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) count_vm_event(THP_FILE_FALLBACK_CHARGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) cgroup_throttle_swaprate(page, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) void *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) xas_lock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) entry = xas_find_conflict(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) if (entry != expected)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) xas_set_err(&xas, -EEXIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) xas_create_range(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) if (xas_error(&xas))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) xas_store(&xas, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) if (++i < nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) xas_next(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) if (PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) count_vm_event(THP_FILE_ALLOC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) __inc_node_page_state(page, NR_SHMEM_THPS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) mapping->nrpages += nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) __mod_lruvec_page_state(page, NR_FILE_PAGES, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) __mod_lruvec_page_state(page, NR_SHMEM, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) xas_unlock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) } while (xas_nomem(&xas, gfp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) if (xas_error(&xas)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) error = xas_error(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) page_ref_sub(page, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) * Like delete_from_page_cache, but substitutes swap for page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) static void shmem_delete_from_page_cache(struct page *page, void *radswap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) struct address_space *mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) VM_BUG_ON_PAGE(PageCompound(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) xa_lock_irq(&mapping->i_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) error = shmem_replace_entry(mapping, page->index, page, radswap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) mapping->nrpages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) __dec_lruvec_page_state(page, NR_FILE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) __dec_lruvec_page_state(page, NR_SHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) xa_unlock_irq(&mapping->i_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) BUG_ON(error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) * Remove swap entry from page cache, free the swap and its page cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) static int shmem_free_swap(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) pgoff_t index, void *radswap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) void *old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) if (old != radswap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) free_swap_and_cache(radix_to_swp_entry(radswap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) * Determine (in bytes) how many of the shmem object's pages mapped by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) * given offsets are swapped out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) * as long as the inode doesn't go away and racy results are not a problem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) unsigned long shmem_partial_swap_usage(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) pgoff_t start, pgoff_t end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) XA_STATE(xas, &mapping->i_pages, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) unsigned long swapped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) xas_for_each(&xas, page, end - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) if (xas_retry(&xas, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) if (xa_is_value(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) swapped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) if (need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) xas_pause(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) cond_resched_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) return swapped << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) * Determine (in bytes) how many of the shmem object's pages mapped by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) * given vma is swapped out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) * as long as the inode doesn't go away and racy results are not a problem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) unsigned long shmem_swap_usage(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) struct inode *inode = file_inode(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) unsigned long swapped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) /* Be careful as we don't hold info->lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) swapped = READ_ONCE(info->swapped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) * The easier cases are when the shmem object has nothing in swap, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) * the vma maps it whole. Then we can simply use the stats that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) * already track.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) if (!swapped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) return swapped << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) /* Here comes the more involved part */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) return shmem_partial_swap_usage(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) linear_page_index(vma, vma->vm_start),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) linear_page_index(vma, vma->vm_end));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) void shmem_unlock_mapping(struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) struct pagevec pvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) pgoff_t indices[PAGEVEC_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) pgoff_t index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) pagevec_init(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) * Minor point, but we might as well stop if someone else SHM_LOCKs it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) while (!mapping_unevictable(mapping)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) pvec.nr = find_get_entries(mapping, index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) PAGEVEC_SIZE, pvec.pages, indices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) if (!pvec.nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) index = indices[pvec.nr - 1] + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) pagevec_remove_exceptionals(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) check_move_unevictable_pages(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) pagevec_release(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) * Check whether a hole-punch or truncation needs to split a huge page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) * returning true if no split was required, or the split has been successful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) * Eviction (or truncation to 0 size) should never need to split a huge page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) * but in rare cases might do so, if shmem_undo_range() failed to trylock on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) * head, and then succeeded to trylock on tail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) * A split can only succeed when there are no additional references on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) * huge page: so the split below relies upon find_get_entries() having stopped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) * when it found a subpage of the huge page, without getting further references.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) static bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) if (!PageTransCompound(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) /* Just proceed to delete a huge page wholly within the range punched */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) if (PageHead(page) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) page->index >= start && page->index + HPAGE_PMD_NR <= end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) /* Try to split huge page, so we can truly punch the hole or truncate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) return split_huge_page(page) >= 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) * Remove range of pages and swap entries from page cache, and free them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) bool unfalloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) pgoff_t end = (lend + 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) unsigned int partial_start = lstart & (PAGE_SIZE - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) struct pagevec pvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) pgoff_t indices[PAGEVEC_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) long nr_swaps_freed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) pgoff_t index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) if (lend == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) end = -1; /* unsigned, so actually very big */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) pagevec_init(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) index = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) while (index < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) pvec.nr = find_get_entries(mapping, index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) min(end - index, (pgoff_t)PAGEVEC_SIZE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) pvec.pages, indices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) if (!pvec.nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) for (i = 0; i < pagevec_count(&pvec); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) struct page *page = pvec.pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) index = indices[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) if (index >= end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) if (xa_is_value(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) if (unfalloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) nr_swaps_freed += !shmem_free_swap(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) index, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) if (!trylock_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) if ((!unfalloc || !PageUptodate(page)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) page_mapping(page) == mapping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) VM_BUG_ON_PAGE(PageWriteback(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) if (shmem_punch_compound(page, start, end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) truncate_inode_page(mapping, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) pagevec_remove_exceptionals(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) pagevec_release(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) index++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) if (partial_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) shmem_getpage(inode, start - 1, &page, SGP_READ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) unsigned int top = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) if (start > end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) top = partial_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) partial_end = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) zero_user_segment(page, partial_start, top);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) if (partial_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) shmem_getpage(inode, end, &page, SGP_READ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) zero_user_segment(page, 0, partial_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) if (start >= end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) index = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) while (index < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) pvec.nr = find_get_entries(mapping, index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) min(end - index, (pgoff_t)PAGEVEC_SIZE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) pvec.pages, indices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) if (!pvec.nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) /* If all gone or hole-punch or unfalloc, we're done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) if (index == start || end != -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) /* But if truncating, restart to make sure all gone */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) index = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) for (i = 0; i < pagevec_count(&pvec); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) struct page *page = pvec.pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) index = indices[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) if (index >= end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) if (xa_is_value(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) if (unfalloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) if (shmem_free_swap(mapping, index, page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) /* Swap was replaced by page: retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) index--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) nr_swaps_freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) if (!unfalloc || !PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) if (page_mapping(page) != mapping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) /* Page was replaced by swap: retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) index--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) VM_BUG_ON_PAGE(PageWriteback(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) if (shmem_punch_compound(page, start, end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) truncate_inode_page(mapping, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) /* Wipe the page and don't get stuck */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) clear_highpage(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) if (index <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) round_up(start, HPAGE_PMD_NR))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) start = index + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) pagevec_remove_exceptionals(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) pagevec_release(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) index++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) spin_lock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) info->swapped -= nr_swaps_freed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) shmem_recalc_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) spin_unlock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) shmem_undo_range(inode, lstart, lend, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) inode->i_ctime = inode->i_mtime = current_time(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) EXPORT_SYMBOL_GPL(shmem_truncate_range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) static int shmem_getattr(const struct path *path, struct kstat *stat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) u32 request_mask, unsigned int query_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) struct inode *inode = path->dentry->d_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) struct shmem_sb_info *sb_info = SHMEM_SB(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) spin_lock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) shmem_recalc_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) spin_unlock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) generic_fillattr(inode, stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) if (is_huge_enabled(sb_info))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) stat->blksize = HPAGE_PMD_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) struct inode *inode = d_inode(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) error = setattr_prepare(dentry, attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) loff_t oldsize = inode->i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) loff_t newsize = attr->ia_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) /* protected by i_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) (newsize > oldsize && (info->seals & F_SEAL_GROW)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) if (newsize != oldsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) error = shmem_reacct_size(SHMEM_I(inode)->flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) oldsize, newsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) i_size_write(inode, newsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) inode->i_ctime = inode->i_mtime = current_time(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) if (newsize <= oldsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) loff_t holebegin = round_up(newsize, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) if (oldsize > holebegin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) unmap_mapping_range(inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) holebegin, 0, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) if (info->alloced)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) shmem_truncate_range(inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) newsize, (loff_t)-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) /* unmap again to remove racily COWed private pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) if (oldsize > holebegin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) unmap_mapping_range(inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) holebegin, 0, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) * Part of the huge page can be beyond i_size: subject
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) * to shrink under memory pressure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) spin_lock(&sbinfo->shrinklist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) * _careful to defend against unlocked access to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) * ->shrink_list in shmem_unused_huge_shrink()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) if (list_empty_careful(&info->shrinklist)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) list_add_tail(&info->shrinklist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) &sbinfo->shrinklist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) sbinfo->shrinklist_len++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) spin_unlock(&sbinfo->shrinklist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) setattr_copy(inode, attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) if (attr->ia_valid & ATTR_MODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) error = posix_acl_chmod(inode, inode->i_mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) static void shmem_evict_inode(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) if (inode->i_mapping->a_ops == &shmem_aops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) shmem_unacct_size(info->flags, inode->i_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) inode->i_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) shmem_truncate_range(inode, 0, (loff_t)-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) if (!list_empty(&info->shrinklist)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) spin_lock(&sbinfo->shrinklist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) if (!list_empty(&info->shrinklist)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) list_del_init(&info->shrinklist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) sbinfo->shrinklist_len--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) spin_unlock(&sbinfo->shrinklist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) while (!list_empty(&info->swaplist)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) /* Wait while shmem_unuse() is scanning this inode... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) wait_var_event(&info->stop_eviction,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) !atomic_read(&info->stop_eviction));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) mutex_lock(&shmem_swaplist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) /* ...but beware of the race if we peeked too early */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) if (!atomic_read(&info->stop_eviction))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) list_del_init(&info->swaplist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) mutex_unlock(&shmem_swaplist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) simple_xattrs_free(&info->xattrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) WARN_ON(inode->i_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) shmem_free_inode(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) clear_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) extern struct swap_info_struct *swap_info[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) static int shmem_find_swap_entries(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) pgoff_t start, unsigned int nr_entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) struct page **entries, pgoff_t *indices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) unsigned int type, bool frontswap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) XA_STATE(xas, &mapping->i_pages, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) unsigned int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) if (!nr_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) xas_for_each(&xas, page, ULONG_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) if (xas_retry(&xas, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) if (!xa_is_value(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) entry = radix_to_swp_entry(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) if (swp_type(entry) != type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) if (frontswap &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) !frontswap_test(swap_info[type], swp_offset(entry)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) indices[ret] = xas.xa_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) entries[ret] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) if (need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) xas_pause(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) cond_resched_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) if (++ret == nr_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) * Move the swapped pages for an inode to page cache. Returns the count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) * of pages swapped in, or the error in case of failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) pgoff_t *indices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) for (i = 0; i < pvec.nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) struct page *page = pvec.pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) if (!xa_is_value(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) error = shmem_swapin_page(inode, indices[i],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) &page, SGP_CACHE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) mapping_gfp_mask(mapping),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) if (error == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) ret++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) if (error == -ENOMEM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) return error ? error : ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) * If swap found in inode, free it and move page from swapcache to filecache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) static int shmem_unuse_inode(struct inode *inode, unsigned int type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) bool frontswap, unsigned long *fs_pages_to_unuse)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) pgoff_t start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) struct pagevec pvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) pgoff_t indices[PAGEVEC_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) bool frontswap_partial = (frontswap && *fs_pages_to_unuse > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) pagevec_init(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) unsigned int nr_entries = PAGEVEC_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) if (frontswap_partial && *fs_pages_to_unuse < PAGEVEC_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) nr_entries = *fs_pages_to_unuse;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) pvec.pages, indices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) type, frontswap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) if (pvec.nr == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) ret = shmem_unuse_swap_entries(inode, pvec, indices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) if (frontswap_partial) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) *fs_pages_to_unuse -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) if (*fs_pages_to_unuse == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) ret = FRONTSWAP_PAGES_UNUSED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) start = indices[pvec.nr - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) } while (true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) * Read all the shared memory data that resides in the swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) * device 'type' back into memory, so the swap device can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) * unused.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) int shmem_unuse(unsigned int type, bool frontswap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) unsigned long *fs_pages_to_unuse)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) struct shmem_inode_info *info, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) if (list_empty(&shmem_swaplist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) mutex_lock(&shmem_swaplist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) if (!info->swapped) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) list_del_init(&info->swaplist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) * Drop the swaplist mutex while searching the inode for swap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) * but before doing so, make sure shmem_evict_inode() will not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) * remove placeholder inode from swaplist, nor let it be freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) * (igrab() would protect from unlink, but not from unmount).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) atomic_inc(&info->stop_eviction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) mutex_unlock(&shmem_swaplist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) error = shmem_unuse_inode(&info->vfs_inode, type, frontswap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) fs_pages_to_unuse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) mutex_lock(&shmem_swaplist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) next = list_next_entry(info, swaplist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) if (!info->swapped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) list_del_init(&info->swaplist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) if (atomic_dec_and_test(&info->stop_eviction))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) wake_up_var(&info->stop_eviction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) mutex_unlock(&shmem_swaplist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) * Move the page from the page cache to the swap cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) static int shmem_writepage(struct page *page, struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) struct shmem_inode_info *info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) swp_entry_t swap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) pgoff_t index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) VM_BUG_ON_PAGE(PageCompound(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) BUG_ON(!PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) index = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) if (info->flags & VM_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) goto redirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) if (!total_swap_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) goto redirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) * Our capabilities prevent regular writeback or sync from ever calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) * shmem_writepage; but a stacking filesystem might use ->writepage of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) * its underlying filesystem, in which case tmpfs should write out to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) * swap only in response to memory pressure, and not for the writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) * threads or sync.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) if (!wbc->for_reclaim) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) goto redirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) * value into swapfile.c, the only way we can correctly account for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) * fallocated page arriving here is now to initialize it and write it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) * That's okay for a page already fallocated earlier, but if we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) * not yet completed the fallocation, then (a) we want to keep track
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) * of this page in case we have to undo it, and (b) it may not be a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) * good idea to continue anyway, once we're pushing into swap. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) * reactivate the page, and let shmem_fallocate() quit when too many.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) if (inode->i_private) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) struct shmem_falloc *shmem_falloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) shmem_falloc = inode->i_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) if (shmem_falloc &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) !shmem_falloc->waitq &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) index >= shmem_falloc->start &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) index < shmem_falloc->next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) shmem_falloc->nr_unswapped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) shmem_falloc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) if (shmem_falloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) goto redirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) clear_highpage(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) swap = get_swap_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) if (!swap.val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) goto redirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) * Add inode to shmem_unuse()'s list of swapped-out inodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) * if it's not already there. Do it now before the page is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) * moved to swap cache, when its pagelock no longer protects
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) * the inode from eviction. But don't unlock the mutex until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) * we've incremented swapped, because shmem_unuse_inode() will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) * prune a !swapped inode from the swaplist under this mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) mutex_lock(&shmem_swaplist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) if (list_empty(&info->swaplist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) list_add(&info->swaplist, &shmem_swaplist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) if (add_to_swap_cache(page, swap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) NULL) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) spin_lock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) shmem_recalc_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) info->swapped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) spin_unlock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) swap_shmem_alloc(swap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) mutex_unlock(&shmem_swaplist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) BUG_ON(page_mapped(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) swap_writepage(page, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) mutex_unlock(&shmem_swaplist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) put_swap_page(page, swap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) redirty:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) if (wbc->for_reclaim)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) return AOP_WRITEPAGE_ACTIVATE; /* Return with page locked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) #if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) char buffer[64];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) if (!mpol || mpol->mode == MPOL_DEFAULT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) return; /* show nothing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) mpol_to_str(buffer, sizeof(buffer), mpol);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) seq_printf(seq, ",mpol=%s", buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) struct mempolicy *mpol = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) if (sbinfo->mpol) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) mpol = sbinfo->mpol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) mpol_get(mpol);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) spin_unlock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) return mpol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) #else /* !CONFIG_NUMA || !CONFIG_TMPFS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) #endif /* CONFIG_NUMA && CONFIG_TMPFS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) #ifndef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) #define vm_policy vm_private_data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) static void shmem_pseudo_vma_init(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) struct shmem_inode_info *info, pgoff_t index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) /* Create a pseudo vma that just contains the policy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) vma_init(vma, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) /* Bias interleave by inode number to distribute better across nodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) vma->vm_pgoff = index + info->vfs_inode.i_ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) static void shmem_pseudo_vma_destroy(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) /* Drop reference taken by mpol_shared_policy_lookup() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) mpol_cond_put(vma->vm_policy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) struct shmem_inode_info *info, pgoff_t index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) struct vm_area_struct pvma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) struct vm_fault vmf = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) .vma = &pvma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) shmem_pseudo_vma_init(&pvma, info, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) page = swap_cluster_readahead(swap, gfp, &vmf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) shmem_pseudo_vma_destroy(&pvma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) static struct page *shmem_alloc_hugepage(gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) struct shmem_inode_info *info, pgoff_t index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) struct vm_area_struct pvma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) struct address_space *mapping = info->vfs_inode.i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) pgoff_t hindex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) hindex = round_down(index, HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) XA_PRESENT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) shmem_pseudo_vma_init(&pvma, info, hindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) shmem_pseudo_vma_destroy(&pvma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) prep_transhuge_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) count_vm_event(THP_FILE_FALLBACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) static struct page *shmem_alloc_page(gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) struct shmem_inode_info *info, pgoff_t index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) struct vm_area_struct pvma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) trace_android_vh_shmem_alloc_page(&page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) shmem_pseudo_vma_init(&pvma, info, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) page = alloc_page_vma(gfp, &pvma, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) shmem_pseudo_vma_destroy(&pvma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) pgoff_t index, bool huge)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) int err = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) huge = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) nr = huge ? HPAGE_PMD_NR : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) if (!shmem_inode_acct_block(inode, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) if (huge)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) page = shmem_alloc_hugepage(gfp, info, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) page = shmem_alloc_page(gfp, info, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) __SetPageLocked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) __SetPageSwapBacked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) shmem_inode_unacct_blocks(inode, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) failed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) return ERR_PTR(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) * When a page is moved from swapcache to shmem filecache (either by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) * shmem_unuse_inode()), it may have been read in earlier from swap, in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) * ignorance of the mapping it belongs to. If that mapping has special
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) * constraints (like the gma500 GEM driver, which requires RAM below 4GB),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) * we may need to copy to a suitable page before moving to filecache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) * In a future release, this may well be extended to respect cpuset and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) * NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) * but for now it is a simple matter of zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) return page_zonenum(page) > gfp_zone(gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) static int shmem_replace_page(struct page **pagep, gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) struct shmem_inode_info *info, pgoff_t index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) struct page *oldpage, *newpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) struct address_space *swap_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) pgoff_t swap_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) oldpage = *pagep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) entry.val = page_private(oldpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) swap_index = swp_offset(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) swap_mapping = page_mapping(oldpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) * We have arrived here because our zones are constrained, so don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) * limit chance of success by further cpuset and node constraints.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) gfp &= ~GFP_CONSTRAINT_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) newpage = shmem_alloc_page(gfp, info, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) if (!newpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) get_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) copy_highpage(newpage, oldpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) flush_dcache_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) __SetPageLocked(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) __SetPageSwapBacked(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) SetPageUptodate(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) set_page_private(newpage, entry.val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) SetPageSwapCache(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) * Our caller will very soon move newpage out of swapcache, but it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) * a nice clean interface for us to replace oldpage by newpage there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) xa_lock_irq(&swap_mapping->i_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) error = shmem_replace_entry(swap_mapping, swap_index, oldpage, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) if (!error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) mem_cgroup_migrate(oldpage, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) __inc_lruvec_page_state(newpage, NR_FILE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) __dec_lruvec_page_state(oldpage, NR_FILE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) xa_unlock_irq(&swap_mapping->i_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) if (unlikely(error)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) * Is this possible? I think not, now that our callers check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) * both PageSwapCache and page_private after getting page lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) * but be defensive. Reverse old to newpage for clear and free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) oldpage = newpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) lru_cache_add(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) *pagep = newpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) ClearPageSwapCache(oldpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) set_page_private(oldpage, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) unlock_page(oldpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) put_page(oldpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) put_page(oldpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) * Swap in the page pointed to by *pagep.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) * Caller has to make sure that *pagep contains a valid swapped page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) * Returns 0 and the page in pagep if success. On failure, returns the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) * error code and NULL in *pagep.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) static int shmem_swapin_page(struct inode *inode, pgoff_t index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) struct page **pagep, enum sgp_type sgp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) gfp_t gfp, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) vm_fault_t *fault_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) swp_entry_t swap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) VM_BUG_ON(!*pagep || !xa_is_value(*pagep));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) swap = radix_to_swp_entry(*pagep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) *pagep = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) /* Look it up and read it in.. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) page = lookup_swap_cache(swap, NULL, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) /* Or update major stats only when swapin succeeds?? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) if (fault_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) *fault_type |= VM_FAULT_MAJOR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) count_vm_event(PGMAJFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) count_memcg_event_mm(charge_mm, PGMAJFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) /* Here we actually start the io */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) page = shmem_swapin(swap, gfp, info, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) error = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) /* We have to do this with page locked to prevent races */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) if (!PageSwapCache(page) || page_private(page) != swap.val ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) !shmem_confirm_swap(mapping, index, swap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) error = -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) error = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) wait_on_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) * Some architectures may have to restore extra metadata to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) * physical page after reading from swap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) arch_swap_restore(swap, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) if (shmem_should_replace_page(page, gfp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) error = shmem_replace_page(&page, gfp, info, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) error = shmem_add_to_page_cache(page, mapping, index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) swp_to_radix_entry(swap), gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) charge_mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) spin_lock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) info->swapped--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) shmem_recalc_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) spin_unlock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) if (sgp == SGP_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) mark_page_accessed(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) delete_from_swap_cache(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) swap_free(swap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) *pagep = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) failed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) if (!shmem_confirm_swap(mapping, index, swap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) error = -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) * If we allocate a new one we do not mark it dirty. That's up to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) * vm. If we swap it in we mark it dirty since we also free the swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) * entry since a page cannot live in both the swap and page cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) * vma, vmf, and fault_type are only supplied by shmem_fault:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) * otherwise they are NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) struct page **pagep, enum sgp_type sgp, gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) struct vm_area_struct *vma, struct vm_fault *vmf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) vm_fault_t *fault_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) struct shmem_sb_info *sbinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) struct mm_struct *charge_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) enum sgp_type sgp_huge = sgp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) pgoff_t hindex = index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) int once = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) int alloced = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) return -EFBIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) if (sgp == SGP_NOHUGE || sgp == SGP_HUGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) sgp = SGP_CACHE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) if (sgp <= SGP_CACHE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) sbinfo = SHMEM_SB(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) charge_mm = vma ? vma->vm_mm : current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) page = find_lock_entry(mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) if (page && vma && userfaultfd_minor(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) if (!xa_is_value(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) *fault_type = handle_userfault(vmf, VM_UFFD_MINOR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) if (xa_is_value(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) error = shmem_swapin_page(inode, index, &page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) sgp, gfp, vma, fault_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) if (error == -EEXIST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) *pagep = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) hindex = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) if (page && sgp == SGP_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) mark_page_accessed(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) /* fallocated page? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) if (page && !PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) if (sgp != SGP_READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) goto clear;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) hindex = index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) if (page || sgp == SGP_READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) * Fast cache lookup did not find it:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) * bring it back from swap or allocate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) if (vma && userfaultfd_missing(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) *fault_type = handle_userfault(vmf, VM_UFFD_MISSING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) /* shmem_symlink() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) if (mapping->a_ops != &shmem_aops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) goto alloc_nohuge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) goto alloc_nohuge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) if (shmem_huge == SHMEM_HUGE_FORCE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) goto alloc_huge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) switch (sbinfo->huge) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) case SHMEM_HUGE_NEVER:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) goto alloc_nohuge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) case SHMEM_HUGE_WITHIN_SIZE: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) loff_t i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) pgoff_t off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) off = round_up(index, HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) i_size = round_up(i_size_read(inode), PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) if (i_size >= HPAGE_PMD_SIZE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) i_size >> PAGE_SHIFT >= off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) goto alloc_huge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) case SHMEM_HUGE_ADVISE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) if (sgp_huge == SGP_HUGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) goto alloc_huge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) /* TODO: implement fadvise() hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) goto alloc_nohuge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) alloc_huge:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) page = shmem_alloc_and_acct_page(gfp, inode, index, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) if (IS_ERR(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) alloc_nohuge:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) page = shmem_alloc_and_acct_page(gfp, inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) index, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) if (IS_ERR(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) int retry = 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) error = PTR_ERR(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) if (error != -ENOSPC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) * Try to reclaim some space by splitting a huge page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) * beyond i_size on the filesystem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) while (retry--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) ret = shmem_unused_huge_shrink(sbinfo, NULL, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) if (ret == SHRINK_STOP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) goto alloc_nohuge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) if (PageTransHuge(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) hindex = round_down(index, HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) hindex = index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) if (sgp == SGP_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) __SetPageReferenced(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) error = shmem_add_to_page_cache(page, mapping, hindex,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) NULL, gfp & GFP_RECLAIM_MASK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) charge_mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) goto unacct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) lru_cache_add(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) spin_lock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) info->alloced += compound_nr(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) shmem_recalc_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) spin_unlock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) alloced = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) if (PageTransHuge(page) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) hindex + HPAGE_PMD_NR - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) * Part of the huge page is beyond i_size: subject
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) * to shrink under memory pressure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) spin_lock(&sbinfo->shrinklist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) * _careful to defend against unlocked access to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) * ->shrink_list in shmem_unused_huge_shrink()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) if (list_empty_careful(&info->shrinklist)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) list_add_tail(&info->shrinklist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) &sbinfo->shrinklist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) sbinfo->shrinklist_len++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) spin_unlock(&sbinfo->shrinklist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) if (sgp == SGP_FALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) sgp = SGP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) clear:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) * Let SGP_WRITE caller clear ends if write does not fill page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) * but SGP_FALLOC on a page fallocated earlier must initialize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) * it now, lest undo on failure cancel our earlier guarantee.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) if (sgp != SGP_WRITE && !PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) for (i = 0; i < compound_nr(page); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) clear_highpage(page + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) flush_dcache_page(page + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) /* Perhaps the file has been truncated since we checked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) if (sgp <= SGP_CACHE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) if (alloced) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) ClearPageDirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) delete_from_page_cache(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) spin_lock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) shmem_recalc_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) spin_unlock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) *pagep = page + index - hindex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) * Error recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) unacct:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) shmem_inode_unacct_blocks(inode, compound_nr(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) if (PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) goto alloc_nohuge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) if (error == -ENOSPC && !once++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) spin_lock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) shmem_recalc_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) spin_unlock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) if (error == -EEXIST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) * This is like autoremove_wake_function, but it removes the wait queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) * entry unconditionally - even if something else had already woken the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) * target.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) int ret = default_wake_function(wait, mode, sync, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) list_del_init(&wait->entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) static vm_fault_t shmem_fault(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) struct vm_area_struct *vma = vmf->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) struct inode *inode = file_inode(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) enum sgp_type sgp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) vm_fault_t ret = VM_FAULT_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) * Trinity finds that probing a hole which tmpfs is punching can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) * prevent the hole-punch from ever completing: which in turn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) * locks writers out with its hold on i_mutex. So refrain from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) * faulting pages into the hole while it's being punched. Although
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) * shmem_undo_range() does remove the additions, it may be unable to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) * keep up, as each new page needs its own unmap_mapping_range() call,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) * and the i_mmap tree grows ever slower to scan if new vmas are added.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) * It does not matter if we sometimes reach this check just before the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) * hole-punch begins, so that one fault then races with the punch:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) * we just need to make racing faults a rare case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) * The implementation below would be much simpler if we just used a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) * standard mutex or completion: but we cannot take i_mutex in fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) * and bloating every shmem inode for this unlikely case would be sad.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) if (unlikely(inode->i_private)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) struct shmem_falloc *shmem_falloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) shmem_falloc = inode->i_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) if (shmem_falloc &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) shmem_falloc->waitq &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) vmf->pgoff >= shmem_falloc->start &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) vmf->pgoff < shmem_falloc->next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) struct file *fpin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) wait_queue_head_t *shmem_falloc_waitq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) ret = VM_FAULT_NOPAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) fpin = maybe_unlock_mmap_for_io(vmf, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) if (fpin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) ret = VM_FAULT_RETRY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) shmem_falloc_waitq = shmem_falloc->waitq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) * shmem_falloc_waitq points into the shmem_fallocate()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) * stack of the hole-punching task: shmem_falloc_waitq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) * is usually invalid by the time we reach here, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) * finish_wait() does not dereference it in that case;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) * though i_lock needed lest racing with wake_up_all().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) if (fpin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) fput(fpin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) sgp = SGP_CACHE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) if ((vma->vm_flags & VM_NOHUGEPAGE) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) sgp = SGP_NOHUGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) else if (vma->vm_flags & VM_HUGEPAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) sgp = SGP_HUGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) gfp, vma, vmf, &ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) return vmf_error(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) unsigned long shmem_get_unmapped_area(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) unsigned long uaddr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) unsigned long pgoff, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) unsigned long (*get_area)(struct file *,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) unsigned long, unsigned long, unsigned long, unsigned long);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) unsigned long offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) unsigned long inflated_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) unsigned long inflated_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) unsigned long inflated_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) if (len > TASK_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) get_area = current->mm->get_unmapped_area;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) addr = get_area(file, uaddr, len, pgoff, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) if (IS_ERR_VALUE(addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) if (addr & ~PAGE_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) if (addr > TASK_SIZE - len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) if (shmem_huge == SHMEM_HUGE_DENY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) if (len < HPAGE_PMD_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) if (flags & MAP_FIXED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) * Our priority is to support MAP_SHARED mapped hugely;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) * and support MAP_PRIVATE mapped hugely too, until it is COWed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) * But if caller specified an address hint and we allocated area there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) * successfully, respect that as before.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) if (uaddr == addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) if (shmem_huge != SHMEM_HUGE_FORCE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) struct super_block *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) VM_BUG_ON(file->f_op != &shmem_file_operations);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) sb = file_inode(file)->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) * Called directly from mm/mmap.c, or drivers/char/mem.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) * for "/dev/zero", to create a shared anonymous object.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) if (IS_ERR(shm_mnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) sb = shm_mnt->mnt_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) if (inflated_len > TASK_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) if (inflated_len < len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) if (IS_ERR_VALUE(inflated_addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) if (inflated_addr & ~PAGE_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) inflated_addr += offset - inflated_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) if (inflated_offset > offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) inflated_addr += HPAGE_PMD_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) if (inflated_addr > TASK_SIZE - len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) return inflated_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) struct inode *inode = file_inode(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) struct inode *inode = file_inode(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) pgoff_t index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) int shmem_lock(struct file *file, int lock, struct user_struct *user)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) int retval = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) * What serializes the accesses to info->flags?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) * ipc_lock_object() when called from shmctl_do_lock(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) * no serialization needed when called from shm_destroy().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) if (lock && !(info->flags & VM_LOCKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) if (!user_shm_lock(inode->i_size, user))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) goto out_nomem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) info->flags |= VM_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) mapping_set_unevictable(file->f_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) if (!lock && (info->flags & VM_LOCKED) && user) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) user_shm_unlock(inode->i_size, user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) info->flags &= ~VM_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) mapping_clear_unevictable(file->f_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) out_nomem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) struct shmem_inode_info *info = SHMEM_I(file_inode(file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) ret = seal_check_future_write(info->seals, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) /* arm64 - allow memory tagging on RAM-based files */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) vma->vm_flags |= VM_MTE_ALLOWED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) file_accessed(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) vma->vm_ops = &shmem_vm_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) (vma->vm_end & HPAGE_PMD_MASK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) khugepaged_enter(vma, vma->vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) umode_t mode, dev_t dev, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) struct shmem_inode_info *info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) ino_t ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) if (shmem_reserve_inode(sb, &ino))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) inode = new_inode(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) if (inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) inode->i_ino = ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) inode_init_owner(inode, dir, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) inode->i_blocks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) inode->i_generation = prandom_u32();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) memset(info, 0, (char *)inode - (char *)info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) spin_lock_init(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) atomic_set(&info->stop_eviction, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) info->seals = F_SEAL_SEAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) info->flags = flags & VM_NORESERVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) INIT_LIST_HEAD(&info->shrinklist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) INIT_LIST_HEAD(&info->swaplist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) simple_xattrs_init(&info->xattrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) cache_no_acl(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) switch (mode & S_IFMT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) inode->i_op = &shmem_special_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) init_special_inode(inode, mode, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) case S_IFREG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) inode->i_mapping->a_ops = &shmem_aops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) inode->i_op = &shmem_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) inode->i_fop = &shmem_file_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) mpol_shared_policy_init(&info->policy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) shmem_get_sbmpol(sbinfo));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) case S_IFDIR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) inc_nlink(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) /* Some things misbehave if size == 0 on a directory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) inode->i_size = 2 * BOGO_DIRENT_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) inode->i_op = &shmem_dir_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) inode->i_fop = &simple_dir_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) case S_IFLNK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) * Must not load anything in the rbtree,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) * mpol_free_shared_policy will not be called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) mpol_shared_policy_init(&info->policy, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) lockdep_annotate_inode_mutex_key(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) shmem_free_inode(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) return inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) bool shmem_mapping(struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) return mapping->a_ops == &shmem_aops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) #ifdef CONFIG_USERFAULTFD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) pmd_t *dst_pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) struct vm_area_struct *dst_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) unsigned long dst_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) unsigned long src_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) bool zeropage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) struct page **pagep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) struct inode *inode = file_inode(dst_vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) gfp_t gfp = mapping_gfp_mask(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) void *page_kaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) pgoff_t max_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) if (!shmem_inode_acct_block(inode, 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) * We may have got a page, returned -ENOENT triggering a retry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) * and now we find ourselves with -ENOMEM. Release the page, to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) * avoid a BUG_ON in our caller.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) if (unlikely(*pagep)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) put_page(*pagep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) *pagep = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) if (!*pagep) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) page = shmem_alloc_page(gfp, info, pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) goto out_unacct_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) if (!zeropage) { /* COPY */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) page_kaddr = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) ret = copy_from_user(page_kaddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) (const void __user *)src_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) kunmap_atomic(page_kaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) /* fallback to copy_from_user outside mmap_lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) *pagep = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) /* don't free the page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) goto out_unacct_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) } else { /* ZEROPAGE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) clear_highpage(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) page = *pagep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) *pagep = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) VM_BUG_ON(PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) VM_BUG_ON(PageSwapBacked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) __SetPageLocked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) __SetPageSwapBacked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) __SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) if (unlikely(pgoff >= max_off))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) goto out_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) gfp & GFP_RECLAIM_MASK, dst_mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) goto out_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) page, true, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) goto out_delete_from_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) spin_lock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) info->alloced++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) inode->i_blocks += BLOCKS_PER_PAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) shmem_recalc_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) spin_unlock_irq(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) SetPageDirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) out_delete_from_cache:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) delete_from_page_cache(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) out_release:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) out_unacct_blocks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) shmem_inode_unacct_blocks(inode, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) #endif /* CONFIG_USERFAULTFD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) #ifdef CONFIG_TMPFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) static const struct inode_operations shmem_symlink_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) static const struct inode_operations shmem_short_symlink_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) #ifdef CONFIG_TMPFS_XATTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) #define shmem_initxattrs NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) shmem_write_begin(struct file *file, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) loff_t pos, unsigned len, unsigned flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) struct page **pagep, void **fsdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) pgoff_t index = pos >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) /* i_mutex is held by caller */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) if (unlikely(info->seals & (F_SEAL_GROW |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) return shmem_getpage(inode, index, pagep, SGP_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) shmem_write_end(struct file *file, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) loff_t pos, unsigned len, unsigned copied,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) struct page *page, void *fsdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) if (pos + copied > inode->i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) i_size_write(inode, pos + copied);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) struct page *head = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) if (PageTransCompound(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) for (i = 0; i < HPAGE_PMD_NR; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) if (head + i == page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) clear_highpage(head + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) flush_dcache_page(head + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) if (copied < PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) unsigned from = pos & (PAGE_SIZE - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) zero_user_segments(page, 0, from,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) from + copied, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) SetPageUptodate(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) struct file *file = iocb->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) pgoff_t index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) unsigned long offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) enum sgp_type sgp = SGP_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) ssize_t retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) loff_t *ppos = &iocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) * Might this read be for a stacking filesystem? Then when reading
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) * holes of a sparse file, we actually need to allocate those pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) * and even mark them dirty, so it cannot exceed the max_blocks limit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) if (!iter_is_iovec(to))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) sgp = SGP_CACHE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) index = *ppos >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) offset = *ppos & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) pgoff_t end_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) unsigned long nr, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) loff_t i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) end_index = i_size >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) if (index > end_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) if (index == end_index) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) nr = i_size & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) if (nr <= offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) error = shmem_getpage(inode, index, &page, sgp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) if (error == -EINVAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) if (sgp == SGP_CACHE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) * We must evaluate after, since reads (unlike writes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) * are called without i_mutex protection against truncate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) nr = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) end_index = i_size >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) if (index == end_index) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) nr = i_size & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) if (nr <= offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) nr -= offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) * If users can be writing to this page using arbitrary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) * virtual addresses, take care about potential aliasing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) * before reading the page on the kernel side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) if (mapping_writably_mapped(mapping))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) * Mark the page accessed if we read the beginning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) if (!offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) mark_page_accessed(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) page = ZERO_PAGE(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) * Ok, we have the page, and it's up-to-date, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) * now we can copy it to user space...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) ret = copy_page_to_iter(page, offset, nr, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) retval += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) offset += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) index += offset >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) offset &= ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) if (!iov_iter_count(to))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) if (ret < nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) error = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) *ppos = ((loff_t) index << PAGE_SHIFT) + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) file_accessed(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) return retval ? retval : error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) * llseek SEEK_DATA or SEEK_HOLE through the page cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) pgoff_t index, pgoff_t end, int whence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) struct pagevec pvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) pgoff_t indices[PAGEVEC_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) bool done = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) pagevec_init(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) pvec.nr = 1; /* start small: we may be there already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) while (!done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) pvec.nr = find_get_entries(mapping, index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) pvec.nr, pvec.pages, indices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) if (!pvec.nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) if (whence == SEEK_DATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) index = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) for (i = 0; i < pvec.nr; i++, index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) if (index < indices[i]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) if (whence == SEEK_HOLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) index = indices[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) page = pvec.pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) if (page && !xa_is_value(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) if (!PageUptodate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) if (index >= end ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) (page && whence == SEEK_DATA) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) (!page && whence == SEEK_HOLE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) pagevec_remove_exceptionals(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) pagevec_release(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) pvec.nr = PAGEVEC_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) return index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) pgoff_t start, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) loff_t new_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) if (whence != SEEK_DATA && whence != SEEK_HOLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) return generic_file_llseek_size(file, offset, whence,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) MAX_LFS_FILESIZE, i_size_read(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) inode_lock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) /* We're holding i_mutex so we can access i_size directly */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) if (offset < 0 || offset >= inode->i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) offset = -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) start = offset >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) end = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) new_offset = shmem_seek_hole_data(mapping, start, end, whence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) new_offset <<= PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) if (new_offset > offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) if (new_offset < inode->i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) offset = new_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) else if (whence == SEEK_DATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) offset = -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) offset = inode->i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) if (offset >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) inode_unlock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) return offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) static long shmem_fallocate(struct file *file, int mode, loff_t offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) loff_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) struct shmem_falloc shmem_falloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) pgoff_t start, index, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) inode_lock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) if (mode & FALLOC_FL_PUNCH_HOLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) loff_t unmap_start = round_up(offset, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) /* protected by i_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) error = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) shmem_falloc.waitq = &shmem_falloc_waitq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) shmem_falloc.start = (u64)unmap_start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) inode->i_private = &shmem_falloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) if ((u64)unmap_end > (u64)unmap_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) unmap_mapping_range(mapping, unmap_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) 1 + unmap_end - unmap_start, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) shmem_truncate_range(inode, offset, offset + len - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) /* No need to unmap again: hole-punching leaves COWed pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) inode->i_private = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) wake_up_all(&shmem_falloc_waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) error = inode_newsize_ok(inode, offset + len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) error = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) start = offset >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) /* Try to avoid a swapstorm if len is impossible to satisfy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) error = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) shmem_falloc.waitq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) shmem_falloc.start = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) shmem_falloc.next = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) shmem_falloc.nr_falloced = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) shmem_falloc.nr_unswapped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) inode->i_private = &shmem_falloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) for (index = start; index < end; index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) * Good, the fallocate(2) manpage permits EINTR: we may have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) * been interrupted because we are using up too much memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) if (signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) error = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) error = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) error = shmem_getpage(inode, index, &page, SGP_FALLOC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) /* Remove the !PageUptodate pages we added */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) if (index > start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) shmem_undo_range(inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) (loff_t)start << PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) ((loff_t)index << PAGE_SHIFT) - 1, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) goto undone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) * Inform shmem_writepage() how far we have reached.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) * No need for lock or barrier: we have the page lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) shmem_falloc.next++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) if (!PageUptodate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) shmem_falloc.nr_falloced++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) * If !PageUptodate, leave it that way so that freeable pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) * can be recognized if we need to rollback on error later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) * But set_page_dirty so that memory pressure will swap rather
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) * than free the pages we are allocating (and SGP_CACHE pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) * might still be clean: we now need to mark those dirty too).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) i_size_write(inode, offset + len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) inode->i_ctime = current_time(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) undone:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) inode->i_private = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) inode_unlock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) buf->f_type = TMPFS_MAGIC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) buf->f_bsize = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) buf->f_namelen = NAME_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) if (sbinfo->max_blocks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) buf->f_blocks = sbinfo->max_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) buf->f_bavail =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) buf->f_bfree = sbinfo->max_blocks -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) percpu_counter_sum(&sbinfo->used_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) if (sbinfo->max_inodes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) buf->f_files = sbinfo->max_inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) buf->f_ffree = sbinfo->free_inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) /* else leave those fields 0 like simple_statfs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) * File creation. Allocate an inode, and we're done..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) int error = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) if (inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) error = simple_acl_create(dir, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) goto out_iput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) error = security_inode_init_security(inode, dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) &dentry->d_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) shmem_initxattrs, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) if (error && error != -EOPNOTSUPP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) goto out_iput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) dir->i_size += BOGO_DIRENT_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) dir->i_ctime = dir->i_mtime = current_time(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) d_instantiate(dentry, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) dget(dentry); /* Extra count - pin the dentry in core */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) out_iput:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) int error = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) if (inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) error = security_inode_init_security(inode, dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) shmem_initxattrs, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) if (error && error != -EOPNOTSUPP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) goto out_iput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) error = simple_acl_create(dir, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) goto out_iput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) d_tmpfile(dentry, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) out_iput:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) inc_nlink(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) bool excl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) * Link a file..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) struct inode *inode = d_inode(old_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) * No ordinary (disk based) filesystem counts links as inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) * but each new link needs a new dentry, pinning lowmem, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) * tmpfs dentries cannot be pruned until they are unlinked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) * But if an O_TMPFILE file is linked into the tmpfs, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) * first link must skip that, to get the accounting right.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) if (inode->i_nlink) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) ret = shmem_reserve_inode(inode->i_sb, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) dir->i_size += BOGO_DIRENT_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) inc_nlink(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) ihold(inode); /* New dentry reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) dget(dentry); /* Extra pinning count for the created dentry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) d_instantiate(dentry, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) static int shmem_unlink(struct inode *dir, struct dentry *dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) struct inode *inode = d_inode(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) shmem_free_inode(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) dir->i_size -= BOGO_DIRENT_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) drop_nlink(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) dput(dentry); /* Undo the count from "create" - this does all the work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) if (!simple_empty(dentry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) return -ENOTEMPTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) drop_nlink(d_inode(dentry));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) drop_nlink(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) return shmem_unlink(dir, dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) bool old_is_dir = d_is_dir(old_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) bool new_is_dir = d_is_dir(new_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) if (old_dir != new_dir && old_is_dir != new_is_dir) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) if (old_is_dir) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) drop_nlink(old_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) inc_nlink(new_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) drop_nlink(new_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) inc_nlink(old_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) old_dir->i_ctime = old_dir->i_mtime =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) new_dir->i_ctime = new_dir->i_mtime =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) d_inode(old_dentry)->i_ctime =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) d_inode(new_dentry)->i_ctime = current_time(old_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) struct dentry *whiteout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) if (!whiteout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) error = shmem_mknod(old_dir, whiteout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) dput(whiteout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) * Cheat and hash the whiteout while the old dentry is still in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) * place, instead of playing games with FS_RENAME_DOES_D_MOVE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) * d_lookup() will consistently find one of them at this point,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) * not sure which one, but that isn't even important.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) d_rehash(whiteout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) * The VFS layer already does all the dentry stuff for rename,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) * we just have to decrement the usage count for the target if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) * it exists so that the VFS layer correctly free's it when it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) * gets overwritten.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) struct inode *inode = d_inode(old_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) int they_are_dirs = S_ISDIR(inode->i_mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) if (flags & RENAME_EXCHANGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) if (!simple_empty(new_dentry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) return -ENOTEMPTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) if (flags & RENAME_WHITEOUT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) error = shmem_whiteout(old_dir, old_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) if (d_really_is_positive(new_dentry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) (void) shmem_unlink(new_dir, new_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) if (they_are_dirs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) drop_nlink(d_inode(new_dentry));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) drop_nlink(old_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) } else if (they_are_dirs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) drop_nlink(old_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) inc_nlink(new_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) old_dir->i_size -= BOGO_DIRENT_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) new_dir->i_size += BOGO_DIRENT_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) old_dir->i_ctime = old_dir->i_mtime =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) new_dir->i_ctime = new_dir->i_mtime =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) inode->i_ctime = current_time(old_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) len = strlen(symname) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) if (len > PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) return -ENAMETOOLONG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK | 0777, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) VM_NORESERVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) if (!inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) error = security_inode_init_security(inode, dir, &dentry->d_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) shmem_initxattrs, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) if (error && error != -EOPNOTSUPP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) inode->i_size = len-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) if (len <= SHORT_SYMLINK_LEN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) inode->i_link = kmemdup(symname, len, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) if (!inode->i_link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) inode->i_op = &shmem_short_symlink_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) inode_nohighmem(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) error = shmem_getpage(inode, 0, &page, SGP_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) inode->i_mapping->a_ops = &shmem_aops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) inode->i_op = &shmem_symlink_inode_operations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) memcpy(page_address(page), symname, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) dir->i_size += BOGO_DIRENT_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) dir->i_ctime = dir->i_mtime = current_time(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) d_instantiate(dentry, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) dget(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) static void shmem_put_link(void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) mark_page_accessed(arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) put_page(arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) static const char *shmem_get_link(struct dentry *dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) struct delayed_call *done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) if (!dentry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) page = find_get_page(inode->i_mapping, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) return ERR_PTR(-ECHILD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) return ERR_PTR(-ECHILD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) error = shmem_getpage(inode, 0, &page, SGP_READ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) return ERR_PTR(error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) set_delayed_call(done, shmem_put_link, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) return page_address(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) #ifdef CONFIG_TMPFS_XATTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) * Superblocks without xattr inode operations may get some security.* xattr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) * support from the LSM "for free". As soon as we have any other xattrs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) * like ACLs, we also need to implement the security.* handlers at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) * filesystem level, though.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) * Callback for security_inode_init_security() for acquiring xattrs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) static int shmem_initxattrs(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) const struct xattr *xattr_array,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) void *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) const struct xattr *xattr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) struct simple_xattr *new_xattr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) for (xattr = xattr_array; xattr->name != NULL; xattr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) if (!new_xattr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) len = strlen(xattr->name) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) if (!new_xattr->name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) kvfree(new_xattr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) XATTR_SECURITY_PREFIX_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) xattr->name, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) simple_xattr_list_add(&info->xattrs, new_xattr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) static int shmem_xattr_handler_get(const struct xattr_handler *handler,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) struct dentry *unused, struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) const char *name, void *buffer, size_t size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) name = xattr_full_name(handler, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) return simple_xattr_get(&info->xattrs, name, buffer, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) static int shmem_xattr_handler_set(const struct xattr_handler *handler,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) struct dentry *unused, struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) const char *name, const void *value,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) size_t size, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) struct shmem_inode_info *info = SHMEM_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) name = xattr_full_name(handler, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) return simple_xattr_set(&info->xattrs, name, value, size, flags, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) static const struct xattr_handler shmem_security_xattr_handler = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) .prefix = XATTR_SECURITY_PREFIX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) .get = shmem_xattr_handler_get,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) .set = shmem_xattr_handler_set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) static const struct xattr_handler shmem_trusted_xattr_handler = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) .prefix = XATTR_TRUSTED_PREFIX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) .get = shmem_xattr_handler_get,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) .set = shmem_xattr_handler_set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) static const struct xattr_handler *shmem_xattr_handlers[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) #ifdef CONFIG_TMPFS_POSIX_ACL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) &posix_acl_access_xattr_handler,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) &posix_acl_default_xattr_handler,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) &shmem_security_xattr_handler,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) &shmem_trusted_xattr_handler,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) #endif /* CONFIG_TMPFS_XATTR */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) static const struct inode_operations shmem_short_symlink_operations = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) .get_link = simple_get_link,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) #ifdef CONFIG_TMPFS_XATTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) .listxattr = shmem_listxattr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) static const struct inode_operations shmem_symlink_inode_operations = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) .get_link = shmem_get_link,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) #ifdef CONFIG_TMPFS_XATTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) .listxattr = shmem_listxattr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) static struct dentry *shmem_get_parent(struct dentry *child)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) return ERR_PTR(-ESTALE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) static int shmem_match(struct inode *ino, void *vfh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) __u32 *fh = vfh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) __u64 inum = fh[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) inum = (inum << 32) | fh[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) return ino->i_ino == inum && fh[0] == ino->i_generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) /* Find any alias of inode, but prefer a hashed alias */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) static struct dentry *shmem_find_alias(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) struct dentry *alias = d_find_alias(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) return alias ?: d_find_any_alias(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) struct fid *fid, int fh_len, int fh_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) struct dentry *dentry = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) u64 inum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) if (fh_len < 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) inum = fid->raw[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) inum = (inum << 32) | fid->raw[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) shmem_match, fid->raw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) if (inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) dentry = shmem_find_alias(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) return dentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) struct inode *parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) if (*len < 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) *len = 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) return FILEID_INVALID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) if (inode_unhashed(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) /* Unfortunately insert_inode_hash is not idempotent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) * so as we hash inodes here rather than at creation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) * time, we need a lock to ensure we only try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) * to do it once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) static DEFINE_SPINLOCK(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) spin_lock(&lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) if (inode_unhashed(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) __insert_inode_hash(inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) inode->i_ino + inode->i_generation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) spin_unlock(&lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) fh[0] = inode->i_generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) fh[1] = inode->i_ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) fh[2] = ((__u64)inode->i_ino) >> 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) *len = 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) static const struct export_operations shmem_export_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) .get_parent = shmem_get_parent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) .encode_fh = shmem_encode_fh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) .fh_to_dentry = shmem_fh_to_dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) enum shmem_param {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) Opt_gid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) Opt_huge,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) Opt_mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) Opt_mpol,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) Opt_nr_blocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) Opt_nr_inodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) Opt_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) Opt_uid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) Opt_inode32,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) Opt_inode64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) static const struct constant_table shmem_param_enums_huge[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) {"never", SHMEM_HUGE_NEVER },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) {"always", SHMEM_HUGE_ALWAYS },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) {"within_size", SHMEM_HUGE_WITHIN_SIZE },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) {"advise", SHMEM_HUGE_ADVISE },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) const struct fs_parameter_spec shmem_fs_parameters[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) fsparam_u32 ("gid", Opt_gid),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) fsparam_enum ("huge", Opt_huge, shmem_param_enums_huge),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) fsparam_u32oct("mode", Opt_mode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) fsparam_string("mpol", Opt_mpol),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) fsparam_string("nr_blocks", Opt_nr_blocks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) fsparam_string("nr_inodes", Opt_nr_inodes),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) fsparam_string("size", Opt_size),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) fsparam_u32 ("uid", Opt_uid),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) fsparam_flag ("inode32", Opt_inode32),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) fsparam_flag ("inode64", Opt_inode64),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) struct shmem_options *ctx = fc->fs_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) struct fs_parse_result result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) unsigned long long size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) char *rest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) int opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) opt = fs_parse(fc, shmem_fs_parameters, param, &result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) if (opt < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) return opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) switch (opt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) case Opt_size:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) size = memparse(param->string, &rest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) if (*rest == '%') {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) size <<= PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) size *= totalram_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) do_div(size, 100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) rest++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) if (*rest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) goto bad_value;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) ctx->blocks = DIV_ROUND_UP(size, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) ctx->seen |= SHMEM_SEEN_BLOCKS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) case Opt_nr_blocks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) ctx->blocks = memparse(param->string, &rest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) if (*rest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) goto bad_value;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) ctx->seen |= SHMEM_SEEN_BLOCKS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) case Opt_nr_inodes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) ctx->inodes = memparse(param->string, &rest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) if (*rest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) goto bad_value;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) ctx->seen |= SHMEM_SEEN_INODES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) case Opt_mode:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) ctx->mode = result.uint_32 & 07777;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) case Opt_uid:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) ctx->uid = make_kuid(current_user_ns(), result.uint_32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) if (!uid_valid(ctx->uid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) goto bad_value;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) case Opt_gid:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) ctx->gid = make_kgid(current_user_ns(), result.uint_32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) if (!gid_valid(ctx->gid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) goto bad_value;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) case Opt_huge:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) ctx->huge = result.uint_32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) if (ctx->huge != SHMEM_HUGE_NEVER &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) !(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) has_transparent_hugepage()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) goto unsupported_parameter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) ctx->seen |= SHMEM_SEEN_HUGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) case Opt_mpol:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) if (IS_ENABLED(CONFIG_NUMA)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) mpol_put(ctx->mpol);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) ctx->mpol = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) if (mpol_parse_str(param->string, &ctx->mpol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) goto bad_value;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) goto unsupported_parameter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) case Opt_inode32:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) ctx->full_inums = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) ctx->seen |= SHMEM_SEEN_INUMS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) case Opt_inode64:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) if (sizeof(ino_t) < 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) return invalfc(fc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) "Cannot use inode64 with <64bit inums in kernel\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) ctx->full_inums = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) ctx->seen |= SHMEM_SEEN_INUMS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) unsupported_parameter:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) return invalfc(fc, "Unsupported parameter '%s'", param->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) bad_value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) return invalfc(fc, "Bad value for '%s'", param->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) static int shmem_parse_options(struct fs_context *fc, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) char *options = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) if (options) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) int err = security_sb_eat_lsm_opts(options, &fc->security);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) while (options != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) char *this_char = options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) * NUL-terminate this option: unfortunately,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) * mount options form a comma-separated list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) * but mpol's nodelist may also contain commas.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) options = strchr(options, ',');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) if (options == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) options++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) if (!isdigit(*options)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) options[-1] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) if (*this_char) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) char *value = strchr(this_char,'=');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) size_t len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) if (value) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) *value++ = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) len = strlen(value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) err = vfs_parse_fs_string(fc, this_char, value, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) * Reconfigure a shmem filesystem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) * Note that we disallow change from limited->unlimited blocks/inodes while any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) * are in use; but we must separately disallow unlimited->limited, because in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) * that case we have no record of how much is already in use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) static int shmem_reconfigure(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) struct shmem_options *ctx = fc->fs_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) unsigned long inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) const char *err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) spin_lock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) inodes = sbinfo->max_inodes - sbinfo->free_inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) if (!sbinfo->max_blocks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) err = "Cannot retroactively limit size";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) if (percpu_counter_compare(&sbinfo->used_blocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) ctx->blocks) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) err = "Too small a size for current use";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) if ((ctx->seen & SHMEM_SEEN_INODES) && ctx->inodes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) if (!sbinfo->max_inodes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) err = "Cannot retroactively limit inodes";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) if (ctx->inodes < inodes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) err = "Too few inodes for current use";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) if ((ctx->seen & SHMEM_SEEN_INUMS) && !ctx->full_inums &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) sbinfo->next_ino > UINT_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) err = "Current inum too high to switch to 32-bit inums";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) if (ctx->seen & SHMEM_SEEN_HUGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) sbinfo->huge = ctx->huge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) if (ctx->seen & SHMEM_SEEN_INUMS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) sbinfo->full_inums = ctx->full_inums;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) if (ctx->seen & SHMEM_SEEN_BLOCKS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) sbinfo->max_blocks = ctx->blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) if (ctx->seen & SHMEM_SEEN_INODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) sbinfo->max_inodes = ctx->inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) sbinfo->free_inodes = ctx->inodes - inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) * Preserve previous mempolicy unless mpol remount option was specified.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) if (ctx->mpol) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) mpol_put(sbinfo->mpol);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) sbinfo->mpol = ctx->mpol; /* transfers initial ref */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) ctx->mpol = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) spin_unlock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) spin_unlock(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) return invalfc(fc, "%s", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) static int shmem_show_options(struct seq_file *seq, struct dentry *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) if (sbinfo->max_blocks != shmem_default_max_blocks())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) seq_printf(seq, ",size=%luk",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) sbinfo->max_blocks << (PAGE_SHIFT - 10));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) if (sbinfo->max_inodes != shmem_default_max_inodes())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) if (sbinfo->mode != (0777 | S_ISVTX))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) seq_printf(seq, ",mode=%03ho", sbinfo->mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) seq_printf(seq, ",uid=%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) from_kuid_munged(&init_user_ns, sbinfo->uid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) seq_printf(seq, ",gid=%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) from_kgid_munged(&init_user_ns, sbinfo->gid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) * Showing inode{64,32} might be useful even if it's the system default,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) * since then people don't have to resort to checking both here and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) * /proc/config.gz to confirm 64-bit inums were successfully applied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) * (which may not even exist if IKCONFIG_PROC isn't enabled).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) * We hide it when inode64 isn't the default and we are using 32-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) * inodes, since that probably just means the feature isn't even under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) * consideration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) * As such:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) * +-----------------+-----------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) * | TMPFS_INODE64=y | TMPFS_INODE64=n |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) * +------------------+-----------------+-----------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) * | full_inums=true | show | show |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) * | full_inums=false | show | hide |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) * +------------------+-----------------+-----------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) if (IS_ENABLED(CONFIG_TMPFS_INODE64) || sbinfo->full_inums)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) seq_printf(seq, ",inode%d", (sbinfo->full_inums ? 64 : 32));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) /* Rightly or wrongly, show huge mount option unmasked by shmem_huge */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) if (sbinfo->huge)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) shmem_show_mpol(seq, sbinfo->mpol);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) #endif /* CONFIG_TMPFS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) static void shmem_put_super(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) free_percpu(sbinfo->ino_batch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) percpu_counter_destroy(&sbinfo->used_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) mpol_put(sbinfo->mpol);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) kfree(sbinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) sb->s_fs_info = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) struct shmem_options *ctx = fc->fs_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) struct shmem_sb_info *sbinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) int err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) /* Round up to L1_CACHE_BYTES to resist false sharing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) L1_CACHE_BYTES), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) if (!sbinfo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) sb->s_fs_info = sbinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) #ifdef CONFIG_TMPFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) * Per default we only allow half of the physical ram per
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) * tmpfs instance, limiting inodes to one per page of lowmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) * but the internal instance is left unlimited.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) if (!(sb->s_flags & SB_KERNMOUNT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) if (!(ctx->seen & SHMEM_SEEN_BLOCKS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) ctx->blocks = shmem_default_max_blocks();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) if (!(ctx->seen & SHMEM_SEEN_INODES))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) ctx->inodes = shmem_default_max_inodes();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) if (!(ctx->seen & SHMEM_SEEN_INUMS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) sb->s_flags |= SB_NOUSER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) sb->s_export_op = &shmem_export_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) sb->s_flags |= SB_NOSEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) sb->s_flags |= SB_NOUSER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) sbinfo->max_blocks = ctx->blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) if (sb->s_flags & SB_KERNMOUNT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) sbinfo->ino_batch = alloc_percpu(ino_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) if (!sbinfo->ino_batch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) sbinfo->uid = ctx->uid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) sbinfo->gid = ctx->gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) sbinfo->full_inums = ctx->full_inums;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) sbinfo->mode = ctx->mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) sbinfo->huge = ctx->huge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) sbinfo->mpol = ctx->mpol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) ctx->mpol = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) spin_lock_init(&sbinfo->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) spin_lock_init(&sbinfo->shrinklist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) INIT_LIST_HEAD(&sbinfo->shrinklist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) sb->s_maxbytes = MAX_LFS_FILESIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) sb->s_blocksize = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) sb->s_blocksize_bits = PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) sb->s_magic = TMPFS_MAGIC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) sb->s_op = &shmem_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) sb->s_time_gran = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) #ifdef CONFIG_TMPFS_XATTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) sb->s_xattr = shmem_xattr_handlers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) #ifdef CONFIG_TMPFS_POSIX_ACL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750) sb->s_flags |= SB_POSIXACL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) uuid_gen(&sb->s_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) if (!inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) inode->i_uid = sbinfo->uid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) inode->i_gid = sbinfo->gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) sb->s_root = d_make_root(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) if (!sb->s_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) failed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) shmem_put_super(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) static int shmem_get_tree(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) return get_tree_nodev(fc, shmem_fill_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774) static void shmem_free_fc(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) struct shmem_options *ctx = fc->fs_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) if (ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) mpol_put(ctx->mpol);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) static const struct fs_context_operations shmem_fs_context_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) .free = shmem_free_fc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) .get_tree = shmem_get_tree,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) #ifdef CONFIG_TMPFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) .parse_monolithic = shmem_parse_options,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) .parse_param = shmem_parse_one,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) .reconfigure = shmem_reconfigure,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) static struct kmem_cache *shmem_inode_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) static struct inode *shmem_alloc_inode(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) struct shmem_inode_info *info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) if (!info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) return &info->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) static void shmem_free_in_core_inode(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) if (S_ISLNK(inode->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) kfree(inode->i_link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) static void shmem_destroy_inode(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) if (S_ISREG(inode->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) mpol_free_shared_policy(&SHMEM_I(inode)->policy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) static void shmem_init_inode(void *foo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) struct shmem_inode_info *info = foo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) inode_init_once(&info->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) static void shmem_init_inodecache(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) sizeof(struct shmem_inode_info),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) 0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) static void shmem_destroy_inodecache(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) kmem_cache_destroy(shmem_inode_cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) static const struct address_space_operations shmem_aops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) .writepage = shmem_writepage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) .set_page_dirty = __set_page_dirty_no_writeback,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) #ifdef CONFIG_TMPFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) .write_begin = shmem_write_begin,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) .write_end = shmem_write_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) #ifdef CONFIG_MIGRATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) .migratepage = migrate_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) .error_remove_page = generic_error_remove_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) static const struct file_operations shmem_file_operations = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) .mmap = shmem_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) .get_unmapped_area = shmem_get_unmapped_area,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) #ifdef CONFIG_TMPFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) .llseek = shmem_file_llseek,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) .read_iter = shmem_file_read_iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855) .write_iter = generic_file_write_iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) .fsync = noop_fsync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) .splice_read = generic_file_splice_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) .splice_write = iter_file_splice_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) .fallocate = shmem_fallocate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) static const struct inode_operations shmem_inode_operations = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) .getattr = shmem_getattr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) .setattr = shmem_setattr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) #ifdef CONFIG_TMPFS_XATTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) .listxattr = shmem_listxattr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) .set_acl = simple_set_acl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) static const struct inode_operations shmem_dir_inode_operations = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) #ifdef CONFIG_TMPFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) .create = shmem_create,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) .lookup = simple_lookup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) .link = shmem_link,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) .unlink = shmem_unlink,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) .symlink = shmem_symlink,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) .mkdir = shmem_mkdir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) .rmdir = shmem_rmdir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) .mknod = shmem_mknod,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) .rename = shmem_rename2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) .tmpfile = shmem_tmpfile,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) #ifdef CONFIG_TMPFS_XATTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) .listxattr = shmem_listxattr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) #ifdef CONFIG_TMPFS_POSIX_ACL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) .setattr = shmem_setattr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) .set_acl = simple_set_acl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) static const struct inode_operations shmem_special_inode_operations = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) #ifdef CONFIG_TMPFS_XATTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) .listxattr = shmem_listxattr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) #ifdef CONFIG_TMPFS_POSIX_ACL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) .setattr = shmem_setattr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) .set_acl = simple_set_acl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) static const struct super_operations shmem_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) .alloc_inode = shmem_alloc_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) .free_inode = shmem_free_in_core_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) .destroy_inode = shmem_destroy_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) #ifdef CONFIG_TMPFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) .statfs = shmem_statfs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) .show_options = shmem_show_options,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) .evict_inode = shmem_evict_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) .drop_inode = generic_delete_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) .put_super = shmem_put_super,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) .nr_cached_objects = shmem_unused_huge_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) .free_cached_objects = shmem_unused_huge_scan,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) static const struct vm_operations_struct shmem_vm_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) .fault = shmem_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) .map_pages = filemap_map_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) .set_policy = shmem_set_policy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) .get_policy = shmem_get_policy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) #ifdef CONFIG_SPECULATIVE_PAGE_FAULT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) .allow_speculation = filemap_allow_speculation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) int shmem_init_fs_context(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) struct shmem_options *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) ctx = kzalloc(sizeof(struct shmem_options), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) if (!ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) ctx->mode = 0777 | S_ISVTX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) ctx->uid = current_fsuid();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) ctx->gid = current_fsgid();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) fc->fs_private = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) fc->ops = &shmem_fs_context_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) static struct file_system_type shmem_fs_type = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) .owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) .name = "tmpfs",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953) .init_fs_context = shmem_init_fs_context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) #ifdef CONFIG_TMPFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) .parameters = shmem_fs_parameters,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) .kill_sb = kill_litter_super,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) .fs_flags = FS_USERNS_MOUNT | FS_THP_SUPPORT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) int __init shmem_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) shmem_init_inodecache();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) error = register_filesystem(&shmem_fs_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) pr_err("Could not register tmpfs\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) goto out2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) shm_mnt = kern_mount(&shmem_fs_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) if (IS_ERR(shm_mnt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) error = PTR_ERR(shm_mnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) pr_err("Could not kern_mount tmpfs\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) goto out1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) shmem_huge = 0; /* just in case it was patched */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) out1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) unregister_filesystem(&shmem_fs_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) out2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) shmem_destroy_inodecache();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992) shm_mnt = ERR_PTR(error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997) static ssize_t shmem_enabled_show(struct kobject *kobj,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) struct kobj_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) static const int values[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) SHMEM_HUGE_ALWAYS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) SHMEM_HUGE_WITHIN_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) SHMEM_HUGE_ADVISE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) SHMEM_HUGE_NEVER,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) SHMEM_HUGE_DENY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) SHMEM_HUGE_FORCE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) int i, count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) for (i = 0, count = 0; i < ARRAY_SIZE(values); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) const char *fmt = shmem_huge == values[i] ? "[%s] " : "%s ";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) count += sprintf(buf + count, fmt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) shmem_format_huge(values[i]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) buf[count - 1] = '\n';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) static ssize_t shmem_enabled_store(struct kobject *kobj,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) struct kobj_attribute *attr, const char *buf, size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) char tmp[16];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) int huge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) if (count + 1 > sizeof(tmp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) memcpy(tmp, buf, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) tmp[count] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) if (count && tmp[count - 1] == '\n')
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) tmp[count - 1] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) huge = shmem_parse_huge(tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) if (huge == -EINVAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) if (!has_transparent_hugepage() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) shmem_huge = huge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) if (shmem_huge > SHMEM_HUGE_DENY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) struct kobj_attribute shmem_enabled_attr =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) #endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) bool shmem_huge_enabled(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) struct inode *inode = file_inode(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) loff_t i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) pgoff_t off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) if (!transhuge_vma_enabled(vma, vma->vm_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) if (shmem_huge == SHMEM_HUGE_FORCE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) if (shmem_huge == SHMEM_HUGE_DENY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) switch (sbinfo->huge) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) case SHMEM_HUGE_NEVER:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) case SHMEM_HUGE_ALWAYS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) case SHMEM_HUGE_WITHIN_SIZE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) off = round_up(vma->vm_pgoff, HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) i_size = round_up(i_size_read(inode), PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072) if (i_size >= HPAGE_PMD_SIZE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) i_size >> PAGE_SHIFT >= off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076) case SHMEM_HUGE_ADVISE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) /* TODO: implement fadvise() hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078) return (vma->vm_flags & VM_HUGEPAGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) VM_BUG_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086) #else /* !CONFIG_SHMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) * tiny-shmem: simple shmemfs and tmpfs using ramfs code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) * This is intended for small system where the benefits of the full
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092) * shmem code (swap-backed and resource-limited) are outweighed by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093) * their complexity. On systems without swap this code should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) * effectively equivalent, but much lighter weight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) static struct file_system_type shmem_fs_type = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) .name = "tmpfs",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099) .init_fs_context = ramfs_init_fs_context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) .parameters = ramfs_fs_parameters,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101) .kill_sb = kill_litter_super,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) .fs_flags = FS_USERNS_MOUNT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) int __init shmem_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107) BUG_ON(register_filesystem(&shmem_fs_type) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109) shm_mnt = kern_mount(&shmem_fs_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110) BUG_ON(IS_ERR(shm_mnt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115) int shmem_unuse(unsigned int type, bool frontswap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) unsigned long *fs_pages_to_unuse)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121) int shmem_lock(struct file *file, int lock, struct user_struct *user)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126) void shmem_unlock_mapping(struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130) #ifdef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131) unsigned long shmem_get_unmapped_area(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) unsigned long addr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) unsigned long pgoff, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135) return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141) truncate_inode_pages_range(inode->i_mapping, lstart, lend);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143) EXPORT_SYMBOL_GPL(shmem_truncate_range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) #define shmem_vm_ops generic_file_vm_ops
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146) #define shmem_file_operations ramfs_file_operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147) #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) #define shmem_acct_size(flags, size) 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149) #define shmem_unacct_size(flags, size) do {} while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) #endif /* CONFIG_SHMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) /* common code */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) unsigned long flags, unsigned int i_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) struct file *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) if (IS_ERR(mnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162) return ERR_CAST(mnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) if (size < 0 || size > MAX_LFS_FILESIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167) if (shmem_acct_size(flags, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170) inode = shmem_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172) if (unlikely(!inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173) shmem_unacct_size(flags, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174) return ERR_PTR(-ENOSPC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176) inode->i_flags |= i_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177) inode->i_size = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178) clear_nlink(inode); /* It is unlinked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179) res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180) if (!IS_ERR(res))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) res = alloc_file_pseudo(inode, mnt, name, O_RDWR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182) &shmem_file_operations);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) if (IS_ERR(res))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185) return res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189) * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190) * kernel internal. There will be NO LSM permission checks against the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) * underlying inode. So users of this interface must do LSM checks at a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192) * higher layer. The users are the big_key and shm implementations. LSM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) * checks are provided at the key or shm level rather than the inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) * @name: name for dentry (to be seen in /proc/<pid>/maps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195) * @size: size to be set for the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196) * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198) struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) * shmem_file_setup - get an unlinked file living in tmpfs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205) * @name: name for dentry (to be seen in /proc/<pid>/maps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) * @size: size to be set for the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) return __shmem_file_setup(shm_mnt, name, size, flags, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213) EXPORT_SYMBOL_GPL(shmem_file_setup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216) * shmem_file_setup_with_mnt - get an unlinked file living in tmpfs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217) * @mnt: the tmpfs mount where the file will be created
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218) * @name: name for dentry (to be seen in /proc/<pid>/maps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) * @size: size to be set for the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220) * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222) struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) loff_t size, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225) return __shmem_file_setup(mnt, name, size, flags, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227) EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) * shmem_zero_setup - setup a shared anonymous mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) * @vma: the vma to be mmapped is prepared by do_mmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233) int shmem_zero_setup(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) loff_t size = vma->vm_end - vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) * Cloning a new file under mmap_lock leads to a lock ordering conflict
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240) * between XFS directory reading and selinux: since this file is only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) * accessible to the user through its mapping, use S_PRIVATE flag to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) * bypass file security, in the same way as shmem_kernel_file_setup().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245) if (IS_ERR(file))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246) return PTR_ERR(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) if (vma->vm_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) fput(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) vma->vm_file = file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) vma->vm_ops = &shmem_vm_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254) ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) (vma->vm_end & HPAGE_PMD_MASK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256) khugepaged_enter(vma, vma->vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263) * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264) * @mapping: the page's address_space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) * @index: the page index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266) * @gfp: the page allocator flags to use if allocating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268) * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269) * with any new page allocations done using the specified allocation flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270) * But read_cache_page_gfp() uses the ->readpage() method: which does not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271) * suit tmpfs, since it may have pages in swapcache, and needs to find those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272) * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274) * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) pgoff_t index, gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) #ifdef CONFIG_SHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) BUG_ON(mapping->a_ops != &shmem_aops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287) gfp, NULL, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) page = ERR_PTR(error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) * The tiny !SHMEM case uses ramfs without swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) return read_cache_page_gfp(mapping, index, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302) void shmem_mark_page_lazyfree(struct page *page, bool tail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304) mark_page_lazyfree_movetail(page, tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) EXPORT_SYMBOL_GPL(shmem_mark_page_lazyfree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308) int reclaim_shmem_address_space(struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310) #ifdef CONFIG_SHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311) pgoff_t start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313) LIST_HEAD(page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314) int reclaimed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315) XA_STATE(xas, &mapping->i_pages, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317) if (!shmem_mapping(mapping))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320) lru_add_drain();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323) xas_for_each(&xas, page, ULONG_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324) if (xas_retry(&xas, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326) if (xa_is_value(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) if (isolate_lru_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331) list_add(&page->lru, &page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332) inc_node_page_state(page, NR_ISOLATED_ANON +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333) page_is_file_lru(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335) if (need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336) xas_pause(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337) cond_resched_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) reclaimed = reclaim_pages_from_list(&page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) return reclaimed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348) EXPORT_SYMBOL_GPL(reclaim_shmem_address_space);