^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * (C) 2001 Clemson University and The University of Chicago
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright 2018 Omnibond Systems, L.L.C.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * See COPYING in top-level directory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Linux VFS file operations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "protocol.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "orangefs-kernel.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "orangefs-bufmap.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) static int flush_racache(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) struct orangefs_kernel_op_s *new_op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) gossip_debug(GOSSIP_UTILS_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) "%s: %pU: Handle is %pU | fs_id %d\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) get_khandle_from_ino(inode), &orangefs_inode->refn.khandle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) orangefs_inode->refn.fs_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) new_op = op_alloc(ORANGEFS_VFS_OP_RA_FLUSH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) if (!new_op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) new_op->upcall.req.ra_cache_flush.refn = orangefs_inode->refn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) ret = service_operation(new_op, "orangefs_flush_racache",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) get_interruptible_flag(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) gossip_debug(GOSSIP_UTILS_DEBUG, "%s: got return value of %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) op_release(new_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * Post and wait for the I/O upcall to finish
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) loff_t *offset, struct iov_iter *iter, size_t total_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) loff_t readahead_size, struct orangefs_write_range *wr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) int *index_return, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) struct orangefs_kernel_op_s *new_op = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) int buffer_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) size_t copy_amount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) int open_for_read;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) int open_for_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) if (!new_op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) /* synchronous I/O */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) new_op->upcall.req.io.readahead_size = readahead_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) new_op->upcall.req.io.io_type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) new_op->upcall.req.io.refn = orangefs_inode->refn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) populate_shared_memory:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) /* get a shared buffer index */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) buffer_index = orangefs_bufmap_get();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) if (buffer_index < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) ret = buffer_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) "%s: orangefs_bufmap_get failure (%zd)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) "%s(%pU): GET op %p -> buffer_index %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) new_op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) buffer_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) new_op->uses_shared_memory = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) new_op->upcall.req.io.buf_index = buffer_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) new_op->upcall.req.io.count = total_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) new_op->upcall.req.io.offset = *offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) if (type == ORANGEFS_IO_WRITE && wr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) new_op->upcall.uid = from_kuid(&init_user_ns, wr->uid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) new_op->upcall.gid = from_kgid(&init_user_ns, wr->gid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * Orangefs has no open, and orangefs checks file permissions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * on each file access. Posix requires that file permissions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * be checked on open and nowhere else. Orangefs-through-the-kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * needs to seem posix compliant.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) * The VFS opens files, even if the filesystem provides no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * method. We can see if a file was successfully opened for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * read and or for write by looking at file->f_mode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * When writes are flowing from the page cache, file is no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * longer available. We can trust the VFS to have checked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * file->f_mode before writing to the page cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * The mode of a file might change between when it is opened
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * and IO commences, or it might be created with an arbitrary mode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * We'll make sure we don't hit EACCES during the IO stage by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * using UID 0. Some of the time we have access without changing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * to UID 0 - how to check?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) open_for_write = file->f_mode & FMODE_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) open_for_read = file->f_mode & FMODE_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) open_for_write = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) open_for_read = 0; /* not relevant? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) if ((type == ORANGEFS_IO_WRITE) && open_for_write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) new_op->upcall.uid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) if ((type == ORANGEFS_IO_READ) && open_for_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) new_op->upcall.uid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) "%s(%pU): offset: %llu total_size: %zd\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) llu(*offset),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) total_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * Stage 1: copy the buffers into client-core's address space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) if (type == ORANGEFS_IO_WRITE && total_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) ret = orangefs_bufmap_copy_from_iovec(iter, buffer_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) total_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) __func__, (long)ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) "%s(%pU): Calling post_io_request with tag (%llu)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) llu(new_op->tag));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) /* Stage 2: Service the I/O operation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) ret = service_operation(new_op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) type == ORANGEFS_IO_WRITE ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) "file_write" :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) "file_read",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) get_interruptible_flag(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * If service_operation() returns -EAGAIN #and# the operation was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * purged from orangefs_request_list or htable_ops_in_progress, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * we know that the client was restarted, causing the shared memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * area to be wiped clean. To restart a write operation in this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * case, we must re-copy the data from the user's iovec to a NEW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * shared memory location. To restart a read operation, we must get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * a new shared memory location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) if (ret == -EAGAIN && op_state_purged(new_op)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) orangefs_bufmap_put(buffer_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) if (type == ORANGEFS_IO_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) iov_iter_revert(iter, total_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) "%s:going to repopulate_shared_memory.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) goto populate_shared_memory;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) if (ret == -EINTR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * We can't return EINTR if any data was written,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * it's not POSIX. It is minimally acceptable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * to give a partial write, the way NFS does.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * It would be optimal to return all or nothing,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * but if a userspace write is bigger than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * an IO buffer, and the interrupt occurs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) * between buffer writes, that would not be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) switch (new_op->op_state - OP_VFS_STATE_GIVEN_UP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * If the op was waiting when the interrupt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * occurred, then the client-core did not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * trigger the write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) case OP_VFS_STATE_WAITING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) if (*offset == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * If the op was in progress when the interrupt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) * occurred, then the client-core was able to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * trigger the write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) case OP_VFS_STATE_INPROGR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) if (type == ORANGEFS_IO_READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) ret = total_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) gossip_err("%s: unexpected op state :%d:.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) new_op->op_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) "%s: got EINTR, state:%d: %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) new_op->op_state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) new_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) gossip_err("%s: error in %s handle %pU, returning %zd\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) type == ORANGEFS_IO_READ ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) "read from" : "write to",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) handle, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) if (orangefs_cancel_op_in_progress(new_op))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * Stage 3: Post copy buffers from client-core's address space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) if (type == ORANGEFS_IO_READ && new_op->downcall.resp.io.amt_complete) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * NOTE: the iovector can either contain addresses which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * can futher be kernel-space or user-space addresses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * or it can pointers to struct page's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * When reading, readahead_size will only be zero when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * we're doing O_DIRECT, otherwise we got here from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * orangefs_readpage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * If we got here from orangefs_readpage we want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * copy either a page or the whole file into the io
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) * vector, whichever is smaller.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) if (readahead_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) copy_amount =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) min(new_op->downcall.resp.io.amt_complete,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) (__s64)PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) copy_amount = new_op->downcall.resp.io.amt_complete;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) copy_amount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) __func__, (long)ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) "%s(%pU): Amount %s, returned by the sys-io call:%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) type == ORANGEFS_IO_READ ? "read" : "written",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) (int)new_op->downcall.resp.io.amt_complete);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) ret = new_op->downcall.resp.io.amt_complete;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) if (buffer_index >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) if ((readahead_size) && (type == ORANGEFS_IO_READ)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) /* readpage */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) *index_return = buffer_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) "%s: hold on to buffer_index :%d:\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) __func__, buffer_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) /* O_DIRECT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) orangefs_bufmap_put(buffer_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) "%s(%pU): PUT buffer_index %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) __func__, handle, buffer_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) op_release(new_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) int orangefs_revalidate_mapping(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) unsigned long *bitlock = &orangefs_inode->bitlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) ret = wait_on_bit(bitlock, 1, TASK_KILLABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) if (test_bit(1, bitlock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) if (!time_before(jiffies, orangefs_inode->mapping_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) set_bit(1, bitlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) unmap_mapping_range(mapping, 0, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) ret = filemap_write_and_wait(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) ret = invalidate_inode_pages2(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) orangefs_inode->mapping_time = jiffies +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) orangefs_cache_timeout_msecs*HZ/1000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) clear_bit(1, bitlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) wake_up_bit(bitlock, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) static ssize_t orangefs_file_read_iter(struct kiocb *iocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) orangefs_stats.reads++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) down_read(&file_inode(iocb->ki_filp)->i_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) ret = generic_file_read_iter(iocb, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) up_read(&file_inode(iocb->ki_filp)->i_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) static ssize_t orangefs_file_write_iter(struct kiocb *iocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) orangefs_stats.writes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) if (iocb->ki_pos > i_size_read(file_inode(iocb->ki_filp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) ret = generic_file_write_iter(iocb, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) static int orangefs_getflags(struct inode *inode, unsigned long *uval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) __u64 val = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) ret = orangefs_inode_getxattr(inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) "user.pvfs2.meta_hint",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) &val, sizeof(val));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) if (ret < 0 && ret != -ENODATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) else if (ret == -ENODATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) val = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) *uval = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) * Perform a miscellaneous operation on a file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) int ret = -ENOTTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) __u64 val = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) unsigned long uval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) "orangefs_ioctl: called with cmd %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) cmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * we understand some general ioctls on files, such as the immutable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) * and append flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) if (cmd == FS_IOC_GETFLAGS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) ret = orangefs_getflags(inode, &uval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) "orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) (unsigned long long)uval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) return put_user(uval, (int __user *)arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) } else if (cmd == FS_IOC_SETFLAGS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) unsigned long old_uval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) if (get_user(uval, (int __user *)arg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) * ORANGEFS_MIRROR_FL is set internally when the mirroring mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) * is turned on for a file. The user is not allowed to turn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) * on this bit, but the bit is present if the user first gets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) * the flags and then updates the flags with some new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) * settings. So, we ignore it in the following edit. bligon.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) if ((uval & ~ORANGEFS_MIRROR_FL) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) (~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) ret = orangefs_getflags(inode, &old_uval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) ret = vfs_ioc_setflags_prepare(inode, old_uval, uval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) val = uval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) (unsigned long long)val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) ret = orangefs_inode_setxattr(inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) "user.pvfs2.meta_hint",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) &val, sizeof(val), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) static vm_fault_t orangefs_fault(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) struct file *file = vmf->vma->vm_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) ret = orangefs_inode_getattr(file->f_mapping->host,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) ORANGEFS_GETATTR_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) if (ret == -ESTALE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) gossip_err("%s: orangefs_inode_getattr failed, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) "ret:%d:.\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) return VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) return filemap_fault(vmf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) static const struct vm_operations_struct orangefs_file_vm_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) .fault = orangefs_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) .map_pages = filemap_map_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) .page_mkwrite = orangefs_page_mkwrite,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) * Memory map a region of a file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) ret = orangefs_revalidate_mapping(file_inode(file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) "orangefs_file_mmap: called on %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) (file ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) (char *)file->f_path.dentry->d_name.name :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) (char *)"Unknown"));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) /* set the sequential readahead hint */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) vma->vm_flags |= VM_SEQ_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) vma->vm_flags &= ~VM_RAND_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) file_accessed(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) vma->vm_ops = &orangefs_file_vm_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) #define mapping_nrpages(idata) ((idata)->nrpages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) * Called to notify the module that there are no more references to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) * this file (i.e. no processes have it open).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * \note Not called when each file is closed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) static int orangefs_file_release(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) "orangefs_file_release: called on %pD\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * remove all associated inode pages from the page cache and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) * readahead cache (if any); this forces an expensive refresh of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * data for the next caller of mmap (or 'get_block' accesses)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) if (file_inode(file) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) file_inode(file)->i_mapping &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) mapping_nrpages(&file_inode(file)->i_data)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) gossip_debug(GOSSIP_INODE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) "calling flush_racache on %pU\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) get_khandle_from_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) flush_racache(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) gossip_debug(GOSSIP_INODE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) "flush_racache finished\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) * Push all data for a specific file onto permanent storage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) static int orangefs_fsync(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) loff_t start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) loff_t end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) int datasync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) struct orangefs_inode_s *orangefs_inode =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) ORANGEFS_I(file_inode(file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) struct orangefs_kernel_op_s *new_op = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) ret = filemap_write_and_wait_range(file_inode(file)->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) if (!new_op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) new_op->upcall.req.fsync.refn = orangefs_inode->refn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) ret = service_operation(new_op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) "orangefs_fsync",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) get_interruptible_flag(file_inode(file)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) "orangefs_fsync got return value of %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) op_release(new_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) * Change the file pointer position for an instance of an open file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) * \note If .llseek is overriden, we must acquire lock as described in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) * Documentation/filesystems/locking.rst.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) * Future upgrade could support SEEK_DATA and SEEK_HOLE but would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) * require much changes to the FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) int ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) if (origin == SEEK_END) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) * revalidate the inode's file size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) * NOTE: We are only interested in file size here,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) * so we set mask accordingly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) ret = orangefs_inode_getattr(file->f_mapping->host,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) ORANGEFS_GETATTR_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) if (ret == -ESTALE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) "%s:%s:%d calling make bad inode\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) __FILE__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) __LINE__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) gossip_debug(GOSSIP_FILE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) "orangefs_file_llseek: offset is %ld | origin is %d"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) " | inode size is %lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) (long)offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) origin,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) (unsigned long)i_size_read(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) return generic_file_llseek(file, offset, origin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) * Support local locks (locks that only this kernel knows about)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) * if Orangefs was mounted -o local_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) static int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) int rc = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) if (ORANGEFS_SB(file_inode(filp)->i_sb)->flags & ORANGEFS_OPT_LOCAL_LOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) if (cmd == F_GETLK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) rc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) posix_test_lock(filp, fl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) rc = posix_lock_file(filp, fl, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) static int orangefs_flush(struct file *file, fl_owner_t id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) * This is vfs_fsync_range(file, 0, LLONG_MAX, 0) without the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) * service_operation in orangefs_fsync.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) * Do not send fsync to OrangeFS server on a close. Do send fsync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) * on an explicit fsync call. This duplicates historical OrangeFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) * behavior.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) r = filemap_write_and_wait_range(file->f_mapping, 0, LLONG_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) if (r > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) /** ORANGEFS implementation of VFS file operations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) const struct file_operations orangefs_file_operations = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) .llseek = orangefs_file_llseek,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) .read_iter = orangefs_file_read_iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) .write_iter = orangefs_file_write_iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) .lock = orangefs_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) .unlocked_ioctl = orangefs_ioctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) .mmap = orangefs_file_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) .open = generic_file_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) .flush = orangefs_flush,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) .release = orangefs_file_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) .fsync = orangefs_fsync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) };