^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2007 Oracle. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #ifndef BTRFS_INODE_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #define BTRFS_INODE_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/refcount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "extent_map.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "extent_io.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "ordered-data.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "delayed-inode.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * ordered_data_close is set by truncate when a file that used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * to have good data has been truncated to zero. When it is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * the btrfs file release call will add this inode to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * ordered operations list so that we make sure to flush out any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * new data the application may have written before commit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) BTRFS_INODE_FLUSH_ON_CLOSE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) BTRFS_INODE_DUMMY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) BTRFS_INODE_IN_DEFRAG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) BTRFS_INODE_HAS_ASYNC_EXTENT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * Always set under the VFS' inode lock, otherwise it can cause races
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * during fsync (we start as a fast fsync and then end up in a full
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * fsync racing with ordered extent completion).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) BTRFS_INODE_NEEDS_FULL_SYNC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) BTRFS_INODE_COPY_EVERYTHING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) BTRFS_INODE_IN_DELALLOC_LIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) BTRFS_INODE_HAS_PROPS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) BTRFS_INODE_SNAPSHOT_FLUSH,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * Set and used when logging an inode and it serves to signal that an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * inode does not have xattrs, so subsequent fsyncs can avoid searching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * for xattrs to log. This bit must be cleared whenever a xattr is added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * to an inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) BTRFS_INODE_NO_XATTRS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * Set when we are in a context where we need to start a transaction and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * have dirty pages with the respective file range locked. This is to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * ensure that when reserving space for the transaction, if we are low
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * on available space and need to flush delalloc, we will not flush
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * delalloc for this inode, because that could result in a deadlock (on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * the file range, inode's io_tree).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) BTRFS_INODE_NO_DELALLOC_FLUSH,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) /* in memory btrfs inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) struct btrfs_inode {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) /* which subvolume this inode belongs to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) struct btrfs_root *root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) /* key used to find this inode on disk. This is used by the code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * to read in roots of subvolumes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct btrfs_key location;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * Lock for counters and all fields used to determine if the inode is in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * the log or not (last_trans, last_sub_trans, last_log_commit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * logged_trans).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /* the extent_tree has caches of all the extent mappings to disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) struct extent_map_tree extent_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) /* the io_tree does range state (DIRTY, LOCKED etc) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) struct extent_io_tree io_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) /* special utility tree used to record which mirrors have already been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * tried when checksums fail for a given block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) struct extent_io_tree io_failure_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * Keep track of where the inode has extent items mapped in order to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * make sure the i_size adjustments are accurate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) struct extent_io_tree file_extent_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) /* held while logging the inode in tree-log.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) struct mutex log_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) /* used to order data wrt metadata */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) struct btrfs_ordered_inode_tree ordered_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /* list of all the delalloc inodes in the FS. There are times we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * to write all the delalloc pages to disk, and this list is used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * to walk them all.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) struct list_head delalloc_inodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) /* node for the red-black tree that links inodes in subvolume root */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) struct rb_node rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) unsigned long runtime_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) /* Keep track of who's O_SYNC/fsyncing currently */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) atomic_t sync_writers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) /* full 64 bit generation number, struct vfs_inode doesn't have a big
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * enough field for this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) u64 generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * transid of the trans_handle that last modified this inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) u64 last_trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * transid that last logged this inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) u64 logged_trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * log transid when this inode was last modified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) int last_sub_trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) /* a local copy of root's last_log_commit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) int last_log_commit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) /* total number of bytes pending delalloc, used by stat to calc the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * real block usage of the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) u64 delalloc_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * Total number of bytes pending delalloc that fall within a file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * range that is either a hole or beyond EOF (and no prealloc extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * exists in the range). This is always <= delalloc_bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) u64 new_delalloc_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * total number of bytes pending defrag, used by stat to check whether
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * it needs COW.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) u64 defrag_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * the size of the file stored in the metadata on disk. data=ordered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * means the in-memory i_size might be larger than the size on disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * because not all the blocks are written yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) u64 disk_i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * if this is a directory then index_cnt is the counter for the index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * number for new files that are created
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) u64 index_cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) /* Cache the directory index number to speed the dir/file remove */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) u64 dir_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) /* the fsync log has some corner cases that mean we have to check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * directories to see if any unlinks have been done before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * the directory was logged. See tree-log.c for all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * details
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) u64 last_unlink_trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * The id/generation of the last transaction where this inode was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * either the source or the destination of a clone/dedupe operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * Used when logging an inode to know if there are shared extents that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * need special care when logging checksum items, to avoid duplicate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) * checksum items in a log (which can lead to a corruption where we end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * up with missing checksum ranges after log replay).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * Protected by the vfs inode lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) u64 last_reflink_trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * Number of bytes outstanding that are going to need csums. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * used in ENOSPC accounting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) u64 csum_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) /* flags field from the on disk inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) u32 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * Counters to keep track of the number of extent item's we may use due
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * to delalloc and such. outstanding_extents is the number of extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * items we think we'll end up using, and reserved_extents is the number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * of extent items we've reserved metadata for.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) unsigned outstanding_extents;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) struct btrfs_block_rsv block_rsv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * Cached values of inode properties
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) unsigned prop_compress; /* per-file compression algorithm */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * Force compression on the file using the defrag ioctl, could be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) * different from prop_compress and takes precedence if set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) unsigned defrag_compress;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) struct btrfs_delayed_node *delayed_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) /* File creation time. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) struct timespec64 i_otime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) /* Hook into fs_info->delayed_iputs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) struct list_head delayed_iput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * To avoid races between lockless (i_mutex not held) direct IO writes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * and concurrent fsync requests. Direct IO writes must acquire read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * access on this semaphore for creating an extent map and its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * corresponding ordered extent. The fast fsync path must acquire write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * access on this semaphore before it collects ordered extents and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * extent maps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) struct rw_semaphore dio_sem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) struct inode vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) static inline u32 btrfs_inode_sectorsize(const struct btrfs_inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) return inode->root->fs_info->sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) static inline struct btrfs_inode *BTRFS_I(const struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) return container_of(inode, struct btrfs_inode, vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) static inline unsigned long btrfs_inode_hash(u64 objectid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) const struct btrfs_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) u64 h = objectid ^ (root->root_key.objectid * GOLDEN_RATIO_PRIME);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) #if BITS_PER_LONG == 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) h = (h >> 32) ^ (h & 0xffffffff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) return (unsigned long)h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) static inline void btrfs_insert_inode_hash(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) unsigned long h = btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) __insert_inode_hash(inode, h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) static inline u64 btrfs_ino(const struct btrfs_inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) u64 ino = inode->location.objectid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * !ino: btree_inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * type == BTRFS_ROOT_ITEM_KEY: subvol dir
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) if (!ino || inode->location.type == BTRFS_ROOT_ITEM_KEY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) ino = inode->vfs_inode.i_ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) return ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) i_size_write(&inode->vfs_inode, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) inode->disk_i_size = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) struct btrfs_root *root = inode->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) if (root == root->fs_info->tree_root &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) if (inode->location.objectid == BTRFS_FREE_INO_OBJECTID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) static inline bool is_data_inode(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) return btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) int mod)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) lockdep_assert_held(&inode->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) inode->outstanding_extents += mod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) if (btrfs_is_free_space_inode(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) mod);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) * Called every time after doing a buffered, direct IO or memory mapped write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * This is to ensure that if we write to a file that was previously fsynced in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * the current transaction, then try to fsync it again in the same transaction,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * we will know that there were changes in the file and that it needs to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * logged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) spin_lock(&inode->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) inode->last_sub_trans = inode->root->log_transid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) spin_unlock(&inode->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) spin_lock(&inode->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) if (inode->logged_trans == generation &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) inode->last_sub_trans <= inode->last_log_commit &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) inode->last_sub_trans <= inode->root->last_log_commit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) * After a ranged fsync we might have left some extent maps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * (that fall outside the fsync's range). So return false
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * here if the list isn't empty, to make sure btrfs_log_inode()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * will be called and process those extent maps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) if (list_empty(&inode->extent_tree.modified_extents))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) spin_unlock(&inode->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) struct btrfs_dio_private {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) u64 logical_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) u64 disk_bytenr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) u64 bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) * References to this structure. There is one reference per in-flight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) * bio plus one while we're still setting up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) refcount_t refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) /* dio_bio came from fs/direct-io.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) struct bio *dio_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) /* Array of checksums */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) u8 csums[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) /* Array of bytes with variable length, hexadecimal format 0x1234 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) #define CSUM_FMT "0x%*phN"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) #define CSUM_FMT_VALUE(size, bytes) size, bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) struct btrfs_root *root = inode->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) struct btrfs_super_block *sb = root->fs_info->super_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) const u16 csum_size = btrfs_super_csum_size(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) /* Output minus objectid, which is more meaningful */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) btrfs_warn_rl(root->fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) "csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) root->root_key.objectid, btrfs_ino(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) logical_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) CSUM_FMT_VALUE(csum_size, csum),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) CSUM_FMT_VALUE(csum_size, csum_expected),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) mirror_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) btrfs_warn_rl(root->fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) "csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) root->root_key.objectid, btrfs_ino(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) logical_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) CSUM_FMT_VALUE(csum_size, csum),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) CSUM_FMT_VALUE(csum_size, csum_expected),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) mirror_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) #endif