^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2008 Oracle. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/spinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/page-flags.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <asm/bug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "misc.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "ctree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "extent_io.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "locking.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * Extent buffer locking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * =====================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * The locks use a custom scheme that allows to do more operations than are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * available fromt current locking primitives. The building blocks are still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * rwlock and wait queues.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * Required semantics:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * - reader/writer exclusion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * - writer/writer exclusion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * - reader/reader sharing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * - spinning lock semantics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * - blocking lock semantics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * - try-lock semantics for readers and writers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * - one level nesting, allowing read lock to be taken by the same thread that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * already has write lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * The extent buffer locks (also called tree locks) manage access to eb data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * related to the storage in the b-tree (keys, items, but not the individual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * members of eb).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * We want concurrency of many readers and safe updates. The underlying locking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * is done by read-write spinlock and the blocking part is implemented using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * counters and wait queues.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * spinning semantics - the low-level rwlock is held so all other threads that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * want to take it are spinning on it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * blocking semantics - the low-level rwlock is not held but the counter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * denotes how many times the blocking lock was held;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * sleeping is possible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * Write lock always allows only one thread to access the data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * Debugging
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * ---------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * There are additional state counters that are asserted in various contexts,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * removed from non-debug build to reduce extent_buffer size and for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * performance reasons.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * Lock recursion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * --------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * A write operation on a tree might indirectly start a look up on the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * tree. This can happen when btrfs_cow_block locks the tree and needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * lookup free extents.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * btrfs_cow_block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * ..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * alloc_tree_block_no_bg_flush
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * btrfs_alloc_tree_block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * btrfs_reserve_extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * ..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * load_free_space_cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * ..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * btrfs_lookup_file_extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * btrfs_search_slot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * Locking pattern - spinning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * --------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * The simple locking scenario, the +--+ denotes the spinning section.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * +- btrfs_tree_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * | - extent_buffer::rwlock is held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * | - no heavy operations should happen, eg. IO, memory allocations, large
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * | structure traversals
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * +- btrfs_tree_unock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * Locking pattern - blocking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * --------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * The blocking write uses the following scheme. The +--+ denotes the spinning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * section.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * +- btrfs_tree_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * +- btrfs_set_lock_blocking_write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * - allowed: IO, memory allocations, etc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * -- btrfs_tree_unlock - note, no explicit unblocking necessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * Blocking read is similar.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * +- btrfs_tree_read_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * +- btrfs_set_lock_blocking_read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * - heavy operations allowed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * +- btrfs_tree_read_unlock_blocking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * +- btrfs_tree_read_unlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) #ifdef CONFIG_BTRFS_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) static inline void btrfs_assert_spinning_writers_get(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) WARN_ON(eb->spinning_writers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) eb->spinning_writers++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) static inline void btrfs_assert_spinning_writers_put(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) WARN_ON(eb->spinning_writers != 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) eb->spinning_writers--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) static inline void btrfs_assert_no_spinning_writers(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) WARN_ON(eb->spinning_writers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) static inline void btrfs_assert_spinning_readers_get(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) atomic_inc(&eb->spinning_readers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) static inline void btrfs_assert_spinning_readers_put(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) WARN_ON(atomic_read(&eb->spinning_readers) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) atomic_dec(&eb->spinning_readers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) static inline void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) atomic_inc(&eb->read_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) static inline void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) atomic_dec(&eb->read_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) static inline void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) BUG_ON(!atomic_read(&eb->read_locks));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) static inline void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) eb->write_locks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) static inline void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) eb->write_locks--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * Mark already held read lock as blocking. Can be nested in write lock by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * same thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * Use when there are potentially long operations ahead so other thread waiting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) * on the lock will not actively spin but sleep instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * The rwlock is released and blocking reader counter is increased.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) trace_btrfs_set_lock_blocking_read(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * No lock is required. The lock owner may change if we have a read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) * lock, but it won't change to or away from us. If we have the write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * lock, we are the owner and it'll never change.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) if (eb->lock_recursed && current->pid == eb->lock_owner)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) btrfs_assert_tree_read_locked(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) atomic_inc(&eb->blocking_readers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) btrfs_assert_spinning_readers_put(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) read_unlock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) * Mark already held write lock as blocking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * Use when there are potentially long operations ahead so other threads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * waiting on the lock will not actively spin but sleep instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * The rwlock is released and blocking writers is set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) trace_btrfs_set_lock_blocking_write(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * No lock is required. The lock owner may change if we have a read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * lock, but it won't change to or away from us. If we have the write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * lock, we are the owner and it'll never change.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) if (eb->lock_recursed && current->pid == eb->lock_owner)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if (eb->blocking_writers == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) btrfs_assert_spinning_writers_put(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) btrfs_assert_tree_locked(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) WRITE_ONCE(eb->blocking_writers, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) write_unlock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * Lock the extent buffer for read. Wait for any writers (spinning or blocking).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) * Can be nested in write lock by the same thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * Use when the locked section does only lightweight actions and busy waiting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * would be cheaper than making other threads do the wait/wake loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * The rwlock is held upon exit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) bool recurse)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) u64 start_ns = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) if (trace_btrfs_tree_read_lock_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) start_ns = ktime_get_ns();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) read_lock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) BUG_ON(eb->blocking_writers == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) current->pid == eb->lock_owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) if (eb->blocking_writers) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) if (current->pid == eb->lock_owner) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * This extent is already write-locked by our thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * We allow an additional read lock to be added because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * it's for the same thread. btrfs_find_all_roots()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * depends on this as it may be called on a partly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * (write-)locked tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) WARN_ON(!recurse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) BUG_ON(eb->lock_recursed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) eb->lock_recursed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) read_unlock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) trace_btrfs_tree_read_lock(eb, start_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) read_unlock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) wait_event(eb->write_lock_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) READ_ONCE(eb->blocking_writers) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) btrfs_assert_tree_read_locks_get(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) btrfs_assert_spinning_readers_get(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) trace_btrfs_tree_read_lock(eb, start_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) void btrfs_tree_read_lock(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) * Lock extent buffer for read, optimistically expecting that there are no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) * contending blocking writers. If there are, don't wait.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * Return 1 if the rwlock has been taken, 0 otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) if (READ_ONCE(eb->blocking_writers))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) read_lock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) /* Refetch value after lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) if (READ_ONCE(eb->blocking_writers)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) read_unlock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) btrfs_assert_tree_read_locks_get(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) btrfs_assert_spinning_readers_get(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) trace_btrfs_tree_read_lock_atomic(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * Try-lock for read. Don't block or wait for contending writers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * Retrun 1 if the rwlock has been taken, 0 otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) int btrfs_try_tree_read_lock(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) if (READ_ONCE(eb->blocking_writers))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) if (!read_trylock(&eb->lock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) /* Refetch value after lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) if (READ_ONCE(eb->blocking_writers)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) read_unlock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) btrfs_assert_tree_read_locks_get(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) btrfs_assert_spinning_readers_get(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) trace_btrfs_try_tree_read_lock(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * Try-lock for write. May block until the lock is uncontended, but does not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * wait until it is free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) * Retrun 1 if the rwlock has been taken, 0 otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) int btrfs_try_tree_write_lock(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) if (READ_ONCE(eb->blocking_writers) || atomic_read(&eb->blocking_readers))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) write_lock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) /* Refetch value after lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) if (READ_ONCE(eb->blocking_writers) || atomic_read(&eb->blocking_readers)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) write_unlock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) btrfs_assert_tree_write_locks_get(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) btrfs_assert_spinning_writers_get(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) eb->lock_owner = current->pid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) trace_btrfs_try_tree_write_lock(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) * Release read lock. Must be used only if the lock is in spinning mode. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) * the read lock is nested, must pair with read lock before the write unlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * The rwlock is not held upon exit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) void btrfs_tree_read_unlock(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) trace_btrfs_tree_read_unlock(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) * if we're nested, we have the write lock. No new locking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) * is needed as long as we are the lock owner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) * The write unlock will do a barrier for us, and the lock_recursed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) * field only matters to the lock owner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) if (eb->lock_recursed && current->pid == eb->lock_owner) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) eb->lock_recursed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) btrfs_assert_tree_read_locked(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) btrfs_assert_spinning_readers_put(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) btrfs_assert_tree_read_locks_put(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) read_unlock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) * Release read lock, previously set to blocking by a pairing call to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) * btrfs_set_lock_blocking_read(). Can be nested in write lock by the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) * thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) * State of rwlock is unchanged, last reader wakes waiting threads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) trace_btrfs_tree_read_unlock_blocking(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) * if we're nested, we have the write lock. No new locking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) * is needed as long as we are the lock owner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) * The write unlock will do a barrier for us, and the lock_recursed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) * field only matters to the lock owner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) if (eb->lock_recursed && current->pid == eb->lock_owner) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) eb->lock_recursed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) btrfs_assert_tree_read_locked(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) WARN_ON(atomic_read(&eb->blocking_readers) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) /* atomic_dec_and_test implies a barrier */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) if (atomic_dec_and_test(&eb->blocking_readers))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) cond_wake_up_nomb(&eb->read_lock_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) btrfs_assert_tree_read_locks_put(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) * Lock for write. Wait for all blocking and spinning readers and writers. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * starts context where reader lock could be nested by the same thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * The rwlock is held for write upon exit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) __acquires(&eb->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) u64 start_ns = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) if (trace_btrfs_tree_lock_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) start_ns = ktime_get_ns();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) WARN_ON(eb->lock_owner == current->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) wait_event(eb->write_lock_wq, READ_ONCE(eb->blocking_writers) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) write_lock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) /* Refetch value after lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) if (atomic_read(&eb->blocking_readers) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) READ_ONCE(eb->blocking_writers)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) write_unlock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) btrfs_assert_spinning_writers_get(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) btrfs_assert_tree_write_locks_get(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) eb->lock_owner = current->pid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) trace_btrfs_tree_lock(eb, start_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) void btrfs_tree_lock(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) __btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) * Release the write lock, either blocking or spinning (ie. there's no need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * for an explicit blocking unlock, like btrfs_tree_read_unlock_blocking).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * This also ends the context for nesting, the read lock must have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * released already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * Tasks blocked and waiting are woken, rwlock is not held upon exit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) void btrfs_tree_unlock(struct extent_buffer *eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) * This is read both locked and unlocked but always by the same thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) * that already owns the lock so we don't need to use READ_ONCE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) int blockers = eb->blocking_writers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) BUG_ON(blockers > 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) btrfs_assert_tree_locked(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) trace_btrfs_tree_unlock(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) eb->lock_owner = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) btrfs_assert_tree_write_locks_put(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) if (blockers) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) btrfs_assert_no_spinning_writers(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) /* Unlocked write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) WRITE_ONCE(eb->blocking_writers, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) * We need to order modifying blocking_writers above with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) * actually waking up the sleepers to ensure they see the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) * updated value of blocking_writers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) cond_wake_up(&eb->write_lock_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) btrfs_assert_spinning_writers_put(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) write_unlock(&eb->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) * Set all locked nodes in the path to blocking locks. This should be done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) * before scheduling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) void btrfs_set_path_blocking(struct btrfs_path *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) if (!p->nodes[i] || !p->locks[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) * If we currently have a spinning reader or writer lock this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * will bump the count of blocking holders and drop the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) * spinlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) if (p->locks[i] == BTRFS_READ_LOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) btrfs_set_lock_blocking_read(p->nodes[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) } else if (p->locks[i] == BTRFS_WRITE_LOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) btrfs_set_lock_blocking_write(p->nodes[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) * This releases any locks held in the path starting at level and going all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) * way up to the root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * btrfs_search_slot will keep the lock held on higher nodes in a few corner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * cases, such as COW of the block at slot zero in the node. This ignores
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) * those rules, and it should only be called when there are no more updates to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * be done higher up in the tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) if (path->keep_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) for (i = level; i < BTRFS_MAX_LEVEL; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) if (!path->nodes[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) if (!path->locks[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) path->locks[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) * Loop around taking references on and locking the root node of the tree until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) * we end up with a lock on the root node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) * Return: root extent buffer with write lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) struct extent_buffer *eb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) eb = btrfs_root_node(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) btrfs_tree_lock(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) if (eb == root->node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) btrfs_tree_unlock(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) free_extent_buffer(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) return eb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) * Loop around taking references on and locking the root node of the tree until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) * we end up with a lock on the root node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) * Return: root extent buffer with read lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) struct extent_buffer *__btrfs_read_lock_root_node(struct btrfs_root *root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) bool recurse)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) struct extent_buffer *eb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) eb = btrfs_root_node(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL, recurse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) if (eb == root->node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) btrfs_tree_read_unlock(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) free_extent_buffer(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) return eb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) * DREW locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) * ==========
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) * DREW stands for double-reader-writer-exclusion lock. It's used in situation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) * where you want to provide A-B exclusion but not AA or BB.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) * Currently implementation gives more priority to reader. If a reader and a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) * writer both race to acquire their respective sides of the lock the writer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) * would yield its lock as soon as it detects a concurrent reader. Additionally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) * if there are pending readers no new writers would be allowed to come in and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) * acquire the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) int btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) atomic_set(&lock->readers, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) init_waitqueue_head(&lock->pending_readers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) init_waitqueue_head(&lock->pending_writers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) percpu_counter_destroy(&lock->writers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) /* Return true if acquisition is successful, false otherwise */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) if (atomic_read(&lock->readers))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) percpu_counter_inc(&lock->writers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) /* Ensure writers count is updated before we check for pending readers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) if (atomic_read(&lock->readers)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) btrfs_drew_write_unlock(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) if (btrfs_drew_try_write_lock(lock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) wait_event(lock->pending_writers, !atomic_read(&lock->readers));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) percpu_counter_dec(&lock->writers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) cond_wake_up(&lock->pending_readers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) atomic_inc(&lock->readers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) * Ensure the pending reader count is perceieved BEFORE this reader
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) * goes to sleep in case of active writers. This guarantees new writers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) * won't be allowed and that the current reader will be woken up when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) * the last active writer finishes its jobs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) wait_event(lock->pending_readers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) percpu_counter_sum(&lock->writers) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) * atomic_dec_and_test implies a full barrier, so woken up writers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) * are guaranteed to see the decrement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) if (atomic_dec_and_test(&lock->readers))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) wake_up(&lock->pending_writers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) }