^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* -*- mode: c; c-basic-offset: 8; -*-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * vim: noexpandtab sw=8 ts=8 sts=0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * mmap.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Code to deal with the mess that is clustered mmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Copyright (C) 2002, 2004 Oracle. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/rbtree.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <cluster/masklog.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include "ocfs2.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include "aops.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include "dlmglue.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include "file.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include "inode.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include "mmap.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include "super.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include "ocfs2_trace.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) static vm_fault_t ocfs2_fault(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) struct vm_area_struct *vma = vmf->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) sigset_t oldset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) vm_fault_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) ocfs2_block_signals(&oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) ret = filemap_fault(vmf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) ocfs2_unblock_signals(&oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) trace_ocfs2_fault(OCFS2_I(vma->vm_file->f_mapping->host)->ip_blkno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) vma, vmf->page, vmf->pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) struct buffer_head *di_bh, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) vm_fault_t ret = VM_FAULT_NOPAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) loff_t pos = page_offset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) unsigned int len = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) pgoff_t last_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) struct page *locked_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) void *fsdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) loff_t size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) last_index = (size - 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * There are cases that lead to the page no longer belonging to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * 1) pagecache truncates locally due to memory pressure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * 2) pagecache truncates when another is taking EX lock against
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * inode lock. see ocfs2_data_convert_worker.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * The i_size check doesn't catch the case where nodes truncated and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * then re-extended the file. We'll re-check the page mapping after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * taking the page lock inside of ocfs2_write_begin_nolock().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * Let VM retry with these cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) if ((page->mapping != inode->i_mapping) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) (!PageUptodate(page)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) (page_offset(page) >= size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * Call ocfs2_write_begin() and ocfs2_write_end() to take
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * advantage of the allocation code there. We pass a write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * length of the whole page (chopped to i_size) to make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * the whole thing is allocated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * Since we know the page is up to date, we don't have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * worry about ocfs2_write_begin() skipping some buffer reads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * because the "write" would invalidate their data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) if (page->index == last_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) len = ((size - 1) & ~PAGE_MASK) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) &locked_page, &fsdata, di_bh, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) if (err != -ENOSPC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) mlog_errno(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) ret = vmf_error(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) if (!locked_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) ret = VM_FAULT_NOPAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) err = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) BUG_ON(err != len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) ret = VM_FAULT_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) struct page *page = vmf->page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) struct inode *inode = file_inode(vmf->vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) struct buffer_head *di_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) sigset_t oldset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) vm_fault_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) sb_start_pagefault(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) ocfs2_block_signals(&oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * The cluster locks taken will block a truncate from another
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * node. Taking the data lock will also ensure that we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * attempt page truncation as part of a downconvert.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) err = ocfs2_inode_lock(inode, &di_bh, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) mlog_errno(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) ret = vmf_error(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * The alloc sem should be enough to serialize with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * ocfs2_truncate_file() changing i_size as well as any thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * modifying the inode btree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) down_write(&OCFS2_I(inode)->ip_alloc_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) up_write(&OCFS2_I(inode)->ip_alloc_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) brelse(di_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) ocfs2_inode_unlock(inode, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) ocfs2_unblock_signals(&oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) sb_end_pagefault(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) static const struct vm_operations_struct ocfs2_file_vm_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) .fault = ocfs2_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) .page_mkwrite = ocfs2_page_mkwrite,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) int ret = 0, lock_level = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) ret = ocfs2_inode_lock_atime(file_inode(file),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) file->f_path.mnt, &lock_level, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) ocfs2_inode_unlock(file_inode(file), lock_level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) vma->vm_ops = &ocfs2_file_vm_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)