^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * mm/fadvise.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2002, Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * 11Jan2003 Andrew Morton
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Initial version.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/pagevec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/fadvise.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <asm/unistd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * deactivate the pages and clear PG_Referenced.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) struct backing_dev_info *bdi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) loff_t endbyte; /* inclusive */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) pgoff_t start_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) pgoff_t end_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) unsigned long nrpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) if (S_ISFIFO(inode->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) return -ESPIPE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) if (!mapping || len < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) bdi = inode_to_bdi(mapping->host);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) switch (advice) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) case POSIX_FADV_NORMAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) case POSIX_FADV_RANDOM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) case POSIX_FADV_SEQUENTIAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) case POSIX_FADV_WILLNEED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) case POSIX_FADV_NOREUSE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) case POSIX_FADV_DONTNEED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) /* no bad return value, but ignore advice */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * Careful about overflows. Len == 0 means "as much as possible". Use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * unsigned math because signed overflows are undefined and UBSan
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * complains.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) endbyte = (u64)offset + (u64)len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) if (!len || endbyte < len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) endbyte = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) endbyte--; /* inclusive */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) switch (advice) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) case POSIX_FADV_NORMAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) file->f_ra.ra_pages = bdi->ra_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) spin_lock(&file->f_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) file->f_mode &= ~FMODE_RANDOM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) spin_unlock(&file->f_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) case POSIX_FADV_RANDOM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) spin_lock(&file->f_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) file->f_mode |= FMODE_RANDOM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) spin_unlock(&file->f_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) case POSIX_FADV_SEQUENTIAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) file->f_ra.ra_pages = bdi->ra_pages * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) spin_lock(&file->f_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) file->f_mode &= ~FMODE_RANDOM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) spin_unlock(&file->f_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) case POSIX_FADV_WILLNEED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) /* First and last PARTIAL page! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) start_index = offset >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) end_index = endbyte >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) /* Careful about overflow on the "+1" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) nrpages = end_index - start_index + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) if (!nrpages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) nrpages = ~0UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) force_page_cache_readahead(mapping, file, start_index, nrpages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) case POSIX_FADV_NOREUSE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) case POSIX_FADV_DONTNEED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) if (!inode_write_congested(mapping->host))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) __filemap_fdatawrite_range(mapping, offset, endbyte,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) WB_SYNC_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * First and last FULL page! Partial pages are deliberately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * preserved on the expectation that it is better to preserve
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * needed memory than to discard unneeded memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) end_index = (endbyte >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * The page at end_index will be inclusively discarded according
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * by invalidate_mapping_pages(), so subtracting 1 from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * end_index means we will skip the last page. But if endbyte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * is page aligned or is at the end of file, we should not skip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * that page - discarding the last page is safe enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) endbyte != inode->i_size - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) /* First page is tricky as 0 - 1 = -1, but pgoff_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * is unsigned, so the end_index >= start_index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * check below would be true and we'll discard the whole
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * file cache which is not what was asked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) if (end_index == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) end_index--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) if (end_index >= start_index) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) unsigned long nr_pagevec = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * It's common to FADV_DONTNEED right after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * the read or write that instantiates the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * pages, in which case there will be some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) * sitting on the local LRU cache. Try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * avoid the expensive remote drain and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * second cache tree walk below by flushing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * them out right away.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) lru_add_drain();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) invalidate_mapping_pagevec(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) start_index, end_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) &nr_pagevec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * If fewer pages were invalidated than expected then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * it is possible that some of the pages were on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * a per-cpu pagevec for a remote CPU. Drain all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * pagevecs and try again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) if (nr_pagevec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) lru_add_drain_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) invalidate_mapping_pages(mapping, start_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) end_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) EXPORT_SYMBOL(generic_fadvise);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) if (file->f_op->fadvise)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) return file->f_op->fadvise(file, offset, len, advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) return generic_fadvise(file, offset, len, advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) EXPORT_SYMBOL(vfs_fadvise);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) #ifdef CONFIG_ADVISE_SYSCALLS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) struct fd f = fdget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) if (!f.file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) ret = vfs_fadvise(f.file, offset, len, advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) return ksys_fadvise64_64(fd, offset, len, advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) #ifdef __ARCH_WANT_SYS_FADVISE64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) return ksys_fadvise64_64(fd, offset, len, advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) #endif