^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include "misc.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include "ctree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include "space-info.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include "sysfs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include "volumes.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include "free-space-cache.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "ordered-data.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "transaction.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "block-group.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * HOW DOES SPACE RESERVATION WORK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * If you want to know about delalloc specifically, there is a separate comment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * for that with the delalloc code. This comment is about how the whole system
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * works generally.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * BASIC CONCEPTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * 1) space_info. This is the ultimate arbiter of how much space we can use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * There's a description of the bytes_ fields with the struct declaration,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * refer to that for specifics on each field. Suffice it to say that for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * reservations we care about total_bytes - SUM(space_info->bytes_) when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * determining if there is space to make an allocation. There is a space_info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * for METADATA, SYSTEM, and DATA areas.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * 2) block_rsv's. These are basically buckets for every different type of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * metadata reservation we have. You can see the comment in the block_rsv
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * code on the rules for each type, but generally block_rsv->reserved is how
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * much space is accounted for in space_info->bytes_may_use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * 3) btrfs_calc*_size. These are the worst case calculations we used based
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * on the number of items we will want to modify. We have one for changing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * items, and one for inserting new items. Generally we use these helpers to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * determine the size of the block reserves, and then use the actual bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * values to adjust the space_info counters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * MAKING RESERVATIONS, THE NORMAL CASE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * We call into either btrfs_reserve_data_bytes() or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * btrfs_reserve_metadata_bytes(), depending on which we're looking for, with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * num_bytes we want to reserve.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * ->reserve
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * space_info->bytes_may_reserve += num_bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * ->extent allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * Call btrfs_add_reserved_bytes() which does
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * space_info->bytes_may_reserve -= num_bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * space_info->bytes_reserved += extent_bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * ->insert reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * Call btrfs_update_block_group() which does
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * space_info->bytes_reserved -= extent_bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * space_info->bytes_used += extent_bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * MAKING RESERVATIONS, FLUSHING NORMALLY (non-priority)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * Assume we are unable to simply make the reservation because we do not have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * enough space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * -> __reserve_bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * create a reserve_ticket with ->bytes set to our reservation, add it to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * the tail of space_info->tickets, kick async flush thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * ->handle_reserve_ticket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * wait on ticket->wait for ->bytes to be reduced to 0, or ->error to be set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * on the ticket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * -> btrfs_async_reclaim_metadata_space/btrfs_async_reclaim_data_space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * Flushes various things attempting to free up space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * -> btrfs_try_granting_tickets()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * This is called by anything that either subtracts space from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * space_info->bytes_may_use, ->bytes_pinned, etc, or adds to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * space_info->total_bytes. This loops through the ->priority_tickets and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * then the ->tickets list checking to see if the reservation can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * completed. If it can the space is added to space_info->bytes_may_use and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * the ticket is woken up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * -> ticket wakeup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * Check if ->bytes == 0, if it does we got our reservation and we can carry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * on, if not return the appropriate error (ENOSPC, but can be EINTR if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * were interrupted.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * MAKING RESERVATIONS, FLUSHING HIGH PRIORITY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * Same as the above, except we add ourselves to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * space_info->priority_tickets, and we do not use ticket->wait, we simply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * call flush_space() ourselves for the states that are safe for us to call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * without deadlocking and hope for the best.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * THE FLUSHING STATES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * Generally speaking we will have two cases for each state, a "nice" state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * and a "ALL THE THINGS" state. In btrfs we delay a lot of work in order to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * reduce the locking over head on the various trees, and even to keep from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * doing any work at all in the case of delayed refs. Each of these delayed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * things however hold reservations, and so letting them run allows us to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) * reclaim space so we can make new reservations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * FLUSH_DELAYED_ITEMS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * Every inode has a delayed item to update the inode. Take a simple write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * for example, we would update the inode item at write time to update the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * mtime, and then again at finish_ordered_io() time in order to update the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * isize or bytes. We keep these delayed items to coalesce these operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * into a single operation done on demand. These are an easy way to reclaim
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * metadata space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * FLUSH_DELALLOC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * Look at the delalloc comment to get an idea of how much space is reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * for delayed allocation. We can reclaim some of this space simply by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * running delalloc, but usually we need to wait for ordered extents to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * reclaim the bulk of this space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * FLUSH_DELAYED_REFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * We have a block reserve for the outstanding delayed refs space, and every
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * delayed ref operation holds a reservation. Running these is a quick way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * to reclaim space, but we want to hold this until the end because COW can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * churn a lot and we can avoid making some extent tree modifications if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * are able to delay for as long as possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * ALLOC_CHUNK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * We will skip this the first time through space reservation, because of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * overcommit and we don't want to have a lot of useless metadata space when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * our worst case reservations will likely never come true.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * RUN_DELAYED_IPUTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * If we're freeing inodes we're likely freeing checksums, file extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * items, and extent tree items. Loads of space could be freed up by these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * operations, however they won't be usable until the transaction commits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * COMMIT_TRANS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * may_commit_transaction() is the ultimate arbiter on whether we commit the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * transaction or not. In order to avoid constantly churning we do all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * above flushing first and then commit the transaction as the last resort.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * However we need to take into account things like pinned space that would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * be freed, plus any delayed work we may not have gotten rid of in the case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * of metadata.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * OVERCOMMIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * Because we hold so many reservations for metadata we will allow you to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * reserve more space than is currently free in the currently allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * metadata space. This only happens with metadata, data does not allow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * overcommitting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) * You can see the current logic for when we allow overcommit in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * btrfs_can_overcommit(), but it only applies to unallocated space. If there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * is no unallocated space to be had, all reservations are kept within the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * free space in the allocated metadata chunks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * Because of overcommitting, you generally want to use the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * btrfs_can_overcommit() logic for metadata allocations, as it does the right
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * thing with or without extra unallocated space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) bool may_use_included)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) ASSERT(s_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) return s_info->bytes_used + s_info->bytes_reserved +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) s_info->bytes_pinned + s_info->bytes_readonly +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) (may_use_included ? s_info->bytes_may_use : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * after adding space to the filesystem, we need to clear the full flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * on all the space infos.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) struct list_head *head = &info->space_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) struct btrfs_space_info *found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) list_for_each_entry(found, head, list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) found->full = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) static int create_space_info(struct btrfs_fs_info *info, u64 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) struct btrfs_space_info *space_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) if (!space_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) kfree(space_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) INIT_LIST_HEAD(&space_info->block_groups[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) init_rwsem(&space_info->groups_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) spin_lock_init(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) INIT_LIST_HEAD(&space_info->ro_bgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) INIT_LIST_HEAD(&space_info->tickets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) INIT_LIST_HEAD(&space_info->priority_tickets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) ret = btrfs_sysfs_add_space_info_type(info, space_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) list_add(&space_info->list, &info->space_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) if (flags & BTRFS_BLOCK_GROUP_DATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) info->data_sinfo = space_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) struct btrfs_super_block *disk_super;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) u64 features;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) u64 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) int mixed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) disk_super = fs_info->super_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if (!btrfs_super_root(disk_super))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) features = btrfs_super_incompat_flags(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) mixed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) flags = BTRFS_BLOCK_GROUP_SYSTEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) ret = create_space_info(fs_info, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) if (mixed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) ret = create_space_info(fs_info, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) flags = BTRFS_BLOCK_GROUP_METADATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) ret = create_space_info(fs_info, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) flags = BTRFS_BLOCK_GROUP_DATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) ret = create_space_info(fs_info, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) u64 total_bytes, u64 bytes_used,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) u64 bytes_readonly,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) struct btrfs_space_info **space_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) struct btrfs_space_info *found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) int factor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) factor = btrfs_bg_type_to_factor(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) found = btrfs_find_space_info(info, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) ASSERT(found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) spin_lock(&found->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) found->total_bytes += total_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) found->disk_total += total_bytes * factor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) found->bytes_used += bytes_used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) found->disk_used += bytes_used * factor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) found->bytes_readonly += bytes_readonly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) if (total_bytes > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) found->full = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) btrfs_try_granting_tickets(info, found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) spin_unlock(&found->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) *space_info = found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) u64 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) struct list_head *head = &info->space_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) struct btrfs_space_info *found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) list_for_each_entry(found, head, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) if (found->flags & flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) return found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) struct btrfs_space_info *space_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) enum btrfs_reserve_flush_enum flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) u64 profile;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) u64 avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) int factor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) profile = btrfs_system_alloc_profile(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) profile = btrfs_metadata_alloc_profile(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) avail = atomic64_read(&fs_info->free_chunk_space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * If we have dup, raid1 or raid10 then only half of the free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * space is actually usable. For raid56, the space info used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * doesn't include the parity drive, so we don't have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * change the math
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) factor = btrfs_bg_type_to_factor(profile);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) avail = div_u64(avail, factor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) * If we aren't flushing all things, let us overcommit up to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * 1/2th of the space. If we can flush, don't let us overcommit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * too much, let it overcommit up to 1/8 of the space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) if (flush == BTRFS_RESERVE_FLUSH_ALL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) avail >>= 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) avail >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) return avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) struct btrfs_space_info *space_info, u64 bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) enum btrfs_reserve_flush_enum flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) u64 avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) u64 used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) /* Don't overcommit when in mixed mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) used = btrfs_space_info_used(space_info, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) avail = calc_available_free_space(fs_info, space_info, flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) if (used + bytes < space_info->total_bytes + avail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) static void remove_ticket(struct btrfs_space_info *space_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) struct reserve_ticket *ticket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) if (!list_empty(&ticket->list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) list_del_init(&ticket->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) ASSERT(space_info->reclaim_size >= ticket->bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) space_info->reclaim_size -= ticket->bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * This is for space we already have accounted in space_info->bytes_may_use, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) * basically when we're returning space from block_rsv's.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) struct btrfs_space_info *space_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) struct list_head *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) lockdep_assert_held(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) head = &space_info->priority_tickets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) while (!list_empty(head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) struct reserve_ticket *ticket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) u64 used = btrfs_space_info_used(space_info, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) ticket = list_first_entry(head, struct reserve_ticket, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) /* Check and see if our ticket can be satisified now. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) if ((used + ticket->bytes <= space_info->total_bytes) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) flush)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) btrfs_space_info_update_bytes_may_use(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) space_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) ticket->bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) remove_ticket(space_info, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) ticket->bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) space_info->tickets_id++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) wake_up(&ticket->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) if (head == &space_info->priority_tickets) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) head = &space_info->tickets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) flush = BTRFS_RESERVE_FLUSH_ALL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) #define DUMP_BLOCK_RSV(fs_info, rsv_name) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) spin_lock(&__rsv->lock); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) __rsv->size, __rsv->reserved); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) spin_unlock(&__rsv->lock); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) struct btrfs_space_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) lockdep_assert_held(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) /* The free space could be negative in case of overcommit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) btrfs_info(fs_info, "space_info %llu has %lld free, is %sfull",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) info->flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) (s64)(info->total_bytes - btrfs_space_info_used(info, true)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) info->full ? "" : "not ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) btrfs_info(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) info->total_bytes, info->bytes_used, info->bytes_pinned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) info->bytes_reserved, info->bytes_may_use,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) info->bytes_readonly);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) DUMP_BLOCK_RSV(fs_info, global_block_rsv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) struct btrfs_space_info *info, u64 bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) int dump_block_groups)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) struct btrfs_block_group *cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) int index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) spin_lock(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) __btrfs_dump_space_info(fs_info, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) spin_unlock(&info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) if (!dump_block_groups)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) down_read(&info->groups_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) list_for_each_entry(cache, &info->block_groups[index], list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) spin_lock(&cache->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) btrfs_info(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) cache->start, cache->length, cache->used, cache->pinned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) cache->reserved, cache->ro ? "[readonly]" : "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) spin_unlock(&cache->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) btrfs_dump_free_space(cache, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) if (++index < BTRFS_NR_RAID_TYPES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) up_read(&info->groups_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) u64 to_reclaim)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) u64 bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) u64 nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) nr = div64_u64(to_reclaim, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) if (!nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) nr = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) return nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) #define EXTENT_SIZE_PER_ITEM SZ_256K
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) * shrink metadata reservation for delalloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) static void shrink_delalloc(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) struct btrfs_space_info *space_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) u64 to_reclaim, bool wait_ordered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) struct btrfs_trans_handle *trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) u64 delalloc_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) u64 dio_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) u64 items;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) long time_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) int loops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) /* Calc the number of the pages we need flush for space reservation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) if (to_reclaim == U64_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) items = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) * to_reclaim is set to however much metadata we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) * reclaim, but reclaiming that much data doesn't really track
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) * exactly, so increase the amount to reclaim by 2x in order to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) * make sure we're flushing enough delalloc to hopefully reclaim
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) * some metadata reservations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) items = calc_reclaim_items_nr(fs_info, to_reclaim) * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) to_reclaim = items * EXTENT_SIZE_PER_ITEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) trans = (struct btrfs_trans_handle *)current->journal_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) delalloc_bytes = percpu_counter_sum_positive(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) &fs_info->delalloc_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) if (delalloc_bytes == 0 && dio_bytes == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) if (trans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) if (wait_ordered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * If we are doing more ordered than delalloc we need to just wait on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * ordered extents, otherwise we'll waste time trying to flush delalloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * that likely won't give us the space back we need.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) if (dio_bytes > delalloc_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) wait_ordered = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) loops = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) while ((delalloc_bytes || dio_bytes) && loops < 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) u64 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) btrfs_start_delalloc_roots(fs_info, nr_pages, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) loops++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) if (wait_ordered && !trans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) time_left = schedule_timeout_killable(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) if (time_left)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) if (list_empty(&space_info->tickets) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) list_empty(&space_info->priority_tickets)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) delalloc_bytes = percpu_counter_sum_positive(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) &fs_info->delalloc_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) * maybe_commit_transaction - possibly commit the transaction if its ok to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) * @root - the root we're allocating for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * @bytes - the number of bytes we want to reserve
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) * @force - force the commit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * This will check to make sure that committing the transaction will actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) * get us somewhere and then commit the transaction if it does. Otherwise it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) * will return -ENOSPC.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) static int may_commit_transaction(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) struct btrfs_space_info *space_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) struct reserve_ticket *ticket = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) struct btrfs_trans_handle *trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) u64 reclaim_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) u64 bytes_needed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) u64 cur_free_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) trans = (struct btrfs_trans_handle *)current->journal_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) if (trans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) cur_free_bytes = btrfs_space_info_used(space_info, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) if (cur_free_bytes < space_info->total_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) cur_free_bytes = space_info->total_bytes - cur_free_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) cur_free_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) if (!list_empty(&space_info->priority_tickets))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) ticket = list_first_entry(&space_info->priority_tickets,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) struct reserve_ticket, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) else if (!list_empty(&space_info->tickets))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) ticket = list_first_entry(&space_info->tickets,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) struct reserve_ticket, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) if (ticket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) bytes_needed = ticket->bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) if (bytes_needed > cur_free_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) bytes_needed -= cur_free_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) bytes_needed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) if (!bytes_needed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) trans = btrfs_join_transaction(fs_info->extent_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) if (IS_ERR(trans))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) return PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) * See if there is enough pinned space to make this reservation, or if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) * we have block groups that are going to be freed, allowing us to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) * possibly do a chunk allocation the next loop through.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) __percpu_counter_compare(&space_info->total_bytes_pinned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) bytes_needed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) goto commit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) * See if there is some space in the delayed insertion reserve for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) * reservation. If the space_info's don't match (like for DATA or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) * SYSTEM) then just go enospc, reclaiming this space won't recover any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) * space to satisfy those reservations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) if (space_info != delayed_rsv->space_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) goto enospc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) spin_lock(&delayed_rsv->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) reclaim_bytes += delayed_rsv->reserved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) spin_unlock(&delayed_rsv->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) spin_lock(&delayed_refs_rsv->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) reclaim_bytes += delayed_refs_rsv->reserved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) spin_unlock(&delayed_refs_rsv->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) spin_lock(&trans_rsv->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) reclaim_bytes += trans_rsv->reserved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) spin_unlock(&trans_rsv->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) if (reclaim_bytes >= bytes_needed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) goto commit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) bytes_needed -= reclaim_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) if (__percpu_counter_compare(&space_info->total_bytes_pinned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) bytes_needed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) goto enospc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) commit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) return btrfs_commit_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) enospc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) * Try to flush some data based on policy set by @state. This is only advisory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) * and may fail for various reasons. The caller is supposed to examine the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) * state of @space_info to detect the outcome.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) static void flush_space(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) struct btrfs_space_info *space_info, u64 num_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) int state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) struct btrfs_root *root = fs_info->extent_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) struct btrfs_trans_handle *trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) switch (state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) case FLUSH_DELAYED_ITEMS_NR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) case FLUSH_DELAYED_ITEMS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) if (state == FLUSH_DELAYED_ITEMS_NR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) nr = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) trans = btrfs_join_transaction(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) if (IS_ERR(trans)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) ret = PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) ret = btrfs_run_delayed_items_nr(trans, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) case FLUSH_DELALLOC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) case FLUSH_DELALLOC_WAIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) shrink_delalloc(fs_info, space_info, num_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) state == FLUSH_DELALLOC_WAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) case FLUSH_DELAYED_REFS_NR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) case FLUSH_DELAYED_REFS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) trans = btrfs_join_transaction(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) if (IS_ERR(trans)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) ret = PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) if (state == FLUSH_DELAYED_REFS_NR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) nr = calc_reclaim_items_nr(fs_info, num_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) nr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) btrfs_run_delayed_refs(trans, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) case ALLOC_CHUNK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) case ALLOC_CHUNK_FORCE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) trans = btrfs_join_transaction(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) if (IS_ERR(trans)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) ret = PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) ret = btrfs_chunk_alloc(trans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) btrfs_get_alloc_profile(fs_info, space_info->flags),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) CHUNK_ALLOC_FORCE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) if (ret > 0 || ret == -ENOSPC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) case RUN_DELAYED_IPUTS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) * If we have pending delayed iputs then we could free up a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) * bunch of pinned space, so make sure we run the iputs before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) * we do our pinned bytes check below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) btrfs_run_delayed_iputs(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) btrfs_wait_on_delayed_iputs(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) case COMMIT_TRANS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) ret = may_commit_transaction(fs_info, space_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) static inline u64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) struct btrfs_space_info *space_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) u64 used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) u64 avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) u64 expected;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) u64 to_reclaim = space_info->reclaim_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) lockdep_assert_held(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) avail = calc_available_free_space(fs_info, space_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) BTRFS_RESERVE_FLUSH_ALL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) used = btrfs_space_info_used(space_info, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) * We may be flushing because suddenly we have less space than we had
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) * before, and now we're well over-committed based on our current free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) * space. If that's the case add in our overage so we make sure to put
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) * appropriate pressure on the flushing state machine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) if (space_info->total_bytes + avail < used)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) to_reclaim += used - (space_info->total_bytes + avail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) if (to_reclaim)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) return to_reclaim;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) if (btrfs_can_overcommit(fs_info, space_info, to_reclaim,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) BTRFS_RESERVE_FLUSH_ALL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) used = btrfs_space_info_used(space_info, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) if (btrfs_can_overcommit(fs_info, space_info, SZ_1M,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) BTRFS_RESERVE_FLUSH_ALL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) expected = div_factor_fine(space_info->total_bytes, 95);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) expected = div_factor_fine(space_info->total_bytes, 90);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) if (used > expected)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) to_reclaim = used - expected;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) to_reclaim = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) to_reclaim = min(to_reclaim, space_info->bytes_may_use +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) space_info->bytes_reserved);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) return to_reclaim;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) struct btrfs_space_info *space_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) u64 used)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) u64 thresh = div_factor_fine(space_info->total_bytes, 98);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) /* If we're just plain full then async reclaim just slows us down. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) return (used >= thresh && !btrfs_fs_closing(fs_info) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) struct btrfs_space_info *space_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) struct reserve_ticket *ticket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) u64 min_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) if (global_rsv->space_info != space_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) spin_lock(&global_rsv->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) min_bytes = div_factor(global_rsv->size, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) if (global_rsv->reserved < min_bytes + ticket->bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) spin_unlock(&global_rsv->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) global_rsv->reserved -= ticket->bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) remove_ticket(space_info, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) ticket->bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) wake_up(&ticket->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) space_info->tickets_id++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) if (global_rsv->reserved < global_rsv->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) global_rsv->full = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) spin_unlock(&global_rsv->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) * maybe_fail_all_tickets - we've exhausted our flushing, start failing tickets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) * @fs_info - fs_info for this fs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) * @space_info - the space info we were flushing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) * We call this when we've exhausted our flushing ability and haven't made
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) * progress in satisfying tickets. The reservation code handles tickets in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) * order, so if there is a large ticket first and then smaller ones we could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) * very well satisfy the smaller tickets. This will attempt to wake up any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) * tickets in the list to catch this case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) * This function returns true if it was able to make progress by clearing out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) * other tickets, or if it stumbles across a ticket that was smaller than the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) * first ticket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) struct btrfs_space_info *space_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) struct reserve_ticket *ticket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) u64 tickets_id = space_info->tickets_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) u64 first_ticket_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) btrfs_info(fs_info, "cannot satisfy tickets, dumping space info");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) __btrfs_dump_space_info(fs_info, space_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) while (!list_empty(&space_info->tickets) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) tickets_id == space_info->tickets_id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) ticket = list_first_entry(&space_info->tickets,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) struct reserve_ticket, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) if (ticket->steal &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) steal_from_global_rsv(fs_info, space_info, ticket))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) * may_commit_transaction will avoid committing the transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) * if it doesn't feel like the space reclaimed by the commit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) * would result in the ticket succeeding. However if we have a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) * smaller ticket in the queue it may be small enough to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) * satisified by committing the transaction, so if any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) * subsequent ticket is smaller than the first ticket go ahead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) * and send us back for another loop through the enospc flushing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) * code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) if (first_ticket_bytes == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) first_ticket_bytes = ticket->bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) else if (first_ticket_bytes > ticket->bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) btrfs_info(fs_info, "failing ticket with %llu bytes",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) ticket->bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) remove_ticket(space_info, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) ticket->error = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) wake_up(&ticket->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) * We're just throwing tickets away, so more flushing may not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) * trip over btrfs_try_granting_tickets, so we need to call it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) * here to see if we can make progress with the next ticket in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) * the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) btrfs_try_granting_tickets(fs_info, space_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) return (tickets_id != space_info->tickets_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) * This is for normal flushers, we can wait all goddamned day if we want to. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) * will loop and continuously try to flush as long as we are making progress.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) * We count progress as clearing off tickets each time we have to loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) struct btrfs_fs_info *fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) struct btrfs_space_info *space_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) u64 to_reclaim;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) int flush_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) int commit_cycles = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) u64 last_tickets_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) if (!to_reclaim) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) space_info->flush = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) last_tickets_id = space_info->tickets_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) flush_state = FLUSH_DELAYED_ITEMS_NR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) flush_space(fs_info, space_info, to_reclaim, flush_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) if (list_empty(&space_info->tickets)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) space_info->flush = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) space_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) if (last_tickets_id == space_info->tickets_id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) flush_state++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) last_tickets_id = space_info->tickets_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) flush_state = FLUSH_DELAYED_ITEMS_NR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) if (commit_cycles)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) commit_cycles--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) * We don't want to force a chunk allocation until we've tried
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) * pretty hard to reclaim space. Think of the case where we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) * freed up a bunch of space and so have a lot of pinned space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) * to reclaim. We would rather use that than possibly create a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) * underutilized metadata chunk. So if this is our first run
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) * through the flushing state machine skip ALLOC_CHUNK_FORCE and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) * commit the transaction. If nothing has changed the next go
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) * around then we can force a chunk allocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) flush_state++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) if (flush_state > COMMIT_TRANS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) commit_cycles++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) if (commit_cycles > 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) if (maybe_fail_all_tickets(fs_info, space_info)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) flush_state = FLUSH_DELAYED_ITEMS_NR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) commit_cycles--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) space_info->flush = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) flush_state = FLUSH_DELAYED_ITEMS_NR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) } while (flush_state <= COMMIT_TRANS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) * FLUSH_DELALLOC_WAIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) * Space is freed from flushing delalloc in one of two ways.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) * 1) compression is on and we allocate less space than we reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) * 2) we are overwriting existing space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) * For #1 that extra space is reclaimed as soon as the delalloc pages are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) * COWed, by way of btrfs_add_reserved_bytes() which adds the actual extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) * length to ->bytes_reserved, and subtracts the reserved space from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) * ->bytes_may_use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) * For #2 this is trickier. Once the ordered extent runs we will drop the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) * extent in the range we are overwriting, which creates a delayed ref for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) * that freed extent. This however is not reclaimed until the transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) * commits, thus the next stages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) * RUN_DELAYED_IPUTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) * If we are freeing inodes, we want to make sure all delayed iputs have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) * completed, because they could have been on an inode with i_nlink == 0, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) * thus have been truncated and freed up space. But again this space is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) * immediately re-usable, it comes in the form of a delayed ref, which must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) * run and then the transaction must be committed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) * FLUSH_DELAYED_REFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) * The above two cases generate delayed refs that will affect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) * ->total_bytes_pinned. However this counter can be inconsistent with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) * reality if there are outstanding delayed refs. This is because we adjust
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) * the counter based solely on the current set of delayed refs and disregard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) * any on-disk state which might include more refs. So for example, if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) * have an extent with 2 references, but we only drop 1, we'll see that there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) * is a negative delayed ref count for the extent and assume that the space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * will be freed, and thus increase ->total_bytes_pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) * Running the delayed refs gives us the actual real view of what will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) * freed at the transaction commit time. This stage will not actually free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) * space for us, it just makes sure that may_commit_transaction() has all of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) * the information it needs to make the right decision.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) * COMMIT_TRANS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) * This is where we reclaim all of the pinned space generated by the previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) * two stages. We will not commit the transaction if we don't think we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) * likely to satisfy our request, which means if our current free space +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) * total_bytes_pinned < reservation we will not commit. This is why the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) * previous states are actually important, to make sure we know for sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) * whether committing the transaction will allow us to make progress.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) * ALLOC_CHUNK_FORCE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) * For data we start with alloc chunk force, however we could have been full
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) * before, and then the transaction commit could have freed new block groups,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) * so if we now have space to allocate do the force chunk allocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) static const enum btrfs_flush_state data_flush_states[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) FLUSH_DELALLOC_WAIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) RUN_DELAYED_IPUTS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) FLUSH_DELAYED_REFS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) COMMIT_TRANS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) ALLOC_CHUNK_FORCE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) static void btrfs_async_reclaim_data_space(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) struct btrfs_fs_info *fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) struct btrfs_space_info *space_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) u64 last_tickets_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) int flush_state = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) fs_info = container_of(work, struct btrfs_fs_info, async_data_reclaim_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) space_info = fs_info->data_sinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) if (list_empty(&space_info->tickets)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) space_info->flush = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) last_tickets_id = space_info->tickets_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) while (!space_info->full) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) if (list_empty(&space_info->tickets)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) space_info->flush = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) last_tickets_id = space_info->tickets_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) while (flush_state < ARRAY_SIZE(data_flush_states)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) flush_space(fs_info, space_info, U64_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) data_flush_states[flush_state]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) if (list_empty(&space_info->tickets)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) space_info->flush = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) if (last_tickets_id == space_info->tickets_id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) flush_state++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) last_tickets_id = space_info->tickets_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) flush_state = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) if (flush_state >= ARRAY_SIZE(data_flush_states)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) if (space_info->full) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) if (maybe_fail_all_tickets(fs_info, space_info))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) flush_state = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) space_info->flush = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) flush_state = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) INIT_WORK(&fs_info->async_reclaim_work, btrfs_async_reclaim_metadata_space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) INIT_WORK(&fs_info->async_data_reclaim_work, btrfs_async_reclaim_data_space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) static const enum btrfs_flush_state priority_flush_states[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) FLUSH_DELAYED_ITEMS_NR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) FLUSH_DELAYED_ITEMS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) ALLOC_CHUNK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) static const enum btrfs_flush_state evict_flush_states[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) FLUSH_DELAYED_ITEMS_NR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) FLUSH_DELAYED_ITEMS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) FLUSH_DELAYED_REFS_NR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) FLUSH_DELAYED_REFS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) FLUSH_DELALLOC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) FLUSH_DELALLOC_WAIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) ALLOC_CHUNK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) COMMIT_TRANS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) struct btrfs_space_info *space_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) struct reserve_ticket *ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) const enum btrfs_flush_state *states,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) int states_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) u64 to_reclaim;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) int flush_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) if (!to_reclaim) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) flush_state = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) flush_space(fs_info, space_info, to_reclaim, states[flush_state]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) flush_state++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) if (ticket->bytes == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) } while (flush_state < states_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) static void priority_reclaim_data_space(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) struct btrfs_space_info *space_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) struct reserve_ticket *ticket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) while (!space_info->full) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) if (ticket->bytes == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) static void wait_reserve_ticket(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) struct btrfs_space_info *space_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) struct reserve_ticket *ticket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) while (ticket->bytes > 0 && ticket->error == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) * Delete us from the list. After we unlock the space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) * info, we don't want the async reclaim job to reserve
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) * space for this ticket. If that would happen, then the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) * ticket's task would not known that space was reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) * despite getting an error, resulting in a space leak
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) * (bytes_may_use counter of our space_info).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) remove_ticket(space_info, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) ticket->error = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) finish_wait(&ticket->wait, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) * handle_reserve_ticket - do the appropriate flushing and waiting for a ticket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) * @fs_info - the fs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) * @space_info - the space_info for the reservation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) * @ticket - the ticket for the reservation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) * @flush - how much we can flush
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) * This does the work of figuring out how to flush for the ticket, waiting for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) * the reservation, and returning the appropriate error if there is one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) struct btrfs_space_info *space_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) struct reserve_ticket *ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) enum btrfs_reserve_flush_enum flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) switch (flush) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) case BTRFS_RESERVE_FLUSH_DATA:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) case BTRFS_RESERVE_FLUSH_ALL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) case BTRFS_RESERVE_FLUSH_ALL_STEAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) wait_reserve_ticket(fs_info, space_info, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) case BTRFS_RESERVE_FLUSH_LIMIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) priority_reclaim_metadata_space(fs_info, space_info, ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) priority_flush_states,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) ARRAY_SIZE(priority_flush_states));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) case BTRFS_RESERVE_FLUSH_EVICT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) priority_reclaim_metadata_space(fs_info, space_info, ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) evict_flush_states,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) ARRAY_SIZE(evict_flush_states));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) case BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) priority_reclaim_data_space(fs_info, space_info, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) ASSERT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) ret = ticket->error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) if (ticket->bytes || ticket->error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) * We were a priority ticket, so we need to delete ourselves
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) * from the list. Because we could have other priority tickets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) * behind us that require less space, run
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) * btrfs_try_granting_tickets() to see if their reservations can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) * now be made.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) if (!list_empty(&ticket->list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) remove_ticket(space_info, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) btrfs_try_granting_tickets(fs_info, space_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) ASSERT(list_empty(&ticket->list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) * Check that we can't have an error set if the reservation succeeded,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) * as that would confuse tasks and lead them to error out without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) * releasing reserved space (if an error happens the expectation is that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) * space wasn't reserved at all).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) ASSERT(!(ticket->bytes == 0 && ticket->error));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) * This returns true if this flush state will go through the ordinary flushing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) * code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) static inline bool is_normal_flushing(enum btrfs_reserve_flush_enum flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) return (flush == BTRFS_RESERVE_FLUSH_ALL) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) * @root - the root we're allocating for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) * @space_info - the space info we want to allocate from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) * @orig_bytes - the number of bytes we want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) * @flush - whether or not we can flush to make our reservation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) * This will reserve orig_bytes number of bytes from the space info associated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) * with the block_rsv. If there is not enough space it will make an attempt to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) * flush out space to make room. It will do this by flushing delalloc if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) * possible or committing the transaction. If flush is 0 then no attempts to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) * regain reservations will be made and this will fail if there is not enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) * space already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) static int __reserve_bytes(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) struct btrfs_space_info *space_info, u64 orig_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) enum btrfs_reserve_flush_enum flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) struct work_struct *async_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) struct reserve_ticket ticket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) u64 used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) bool pending_tickets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) ASSERT(orig_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) if (flush == BTRFS_RESERVE_FLUSH_DATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) async_work = &fs_info->async_data_reclaim_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) async_work = &fs_info->async_reclaim_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) spin_lock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) used = btrfs_space_info_used(space_info, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) * We don't want NO_FLUSH allocations to jump everybody, they can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) * generally handle ENOSPC in a different way, so treat them the same as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) * normal flushers when it comes to skipping pending tickets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) if (is_normal_flushing(flush) || (flush == BTRFS_RESERVE_NO_FLUSH))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) pending_tickets = !list_empty(&space_info->tickets) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) !list_empty(&space_info->priority_tickets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) pending_tickets = !list_empty(&space_info->priority_tickets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) * Carry on if we have enough space (short-circuit) OR call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) * can_overcommit() to ensure we can overcommit to continue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) if (!pending_tickets &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) ((used + orig_bytes <= space_info->total_bytes) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) btrfs_space_info_update_bytes_may_use(fs_info, space_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) orig_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) * If we couldn't make a reservation then setup our reservation ticket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) * and kick the async worker if it's not already running.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) * If we are a priority flusher then we just need to add our ticket to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) * the list and we will do our own flushing further down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) ticket.bytes = orig_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) ticket.error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) space_info->reclaim_size += ticket.bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) init_waitqueue_head(&ticket.wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) ticket.steal = (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) if (flush == BTRFS_RESERVE_FLUSH_ALL ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) flush == BTRFS_RESERVE_FLUSH_ALL_STEAL ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) flush == BTRFS_RESERVE_FLUSH_DATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) list_add_tail(&ticket.list, &space_info->tickets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) if (!space_info->flush) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) space_info->flush = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) trace_btrfs_trigger_flush(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) space_info->flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) orig_bytes, flush,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) "enospc");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) queue_work(system_unbound_wq, async_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) list_add_tail(&ticket.list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) &space_info->priority_tickets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) used += orig_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) * We will do the space reservation dance during log replay,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) * which means we won't have fs_info->fs_root set, so don't do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) * the async reclaim as we will panic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) need_do_async_reclaim(fs_info, space_info, used) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) !work_busy(&fs_info->async_reclaim_work)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) trace_btrfs_trigger_flush(fs_info, space_info->flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) orig_bytes, flush, "preempt");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) queue_work(system_unbound_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) &fs_info->async_reclaim_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) spin_unlock(&space_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) return handle_reserve_ticket(fs_info, space_info, &ticket, flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) * @root - the root we're allocating for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) * @block_rsv - the block_rsv we're allocating for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) * @orig_bytes - the number of bytes we want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) * @flush - whether or not we can flush to make our reservation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) * This will reserve orig_bytes number of bytes from the space info associated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) * with the block_rsv. If there is not enough space it will make an attempt to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) * flush out space to make room. It will do this by flushing delalloc if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) * possible or committing the transaction. If flush is 0 then no attempts to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) * regain reservations will be made and this will fail if there is not enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) * space already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) struct btrfs_block_rsv *block_rsv,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) u64 orig_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) enum btrfs_reserve_flush_enum flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) struct btrfs_fs_info *fs_info = root->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) ret = __reserve_bytes(fs_info, block_rsv->space_info, orig_bytes, flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) if (ret == -ENOSPC &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) if (block_rsv != global_rsv &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) !btrfs_block_rsv_use_bytes(global_rsv, orig_bytes))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) if (ret == -ENOSPC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) trace_btrfs_space_reservation(fs_info, "space_info:enospc",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) block_rsv->space_info->flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) orig_bytes, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) btrfs_dump_space_info(fs_info, block_rsv->space_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) orig_bytes, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) * btrfs_reserve_data_bytes - try to reserve data bytes for an allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) * @fs_info - the filesystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) * @bytes - the number of bytes we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) * @flush - how we are allowed to flush
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) * This will reserve bytes from the data space info. If there is not enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) * space then we will attempt to flush space as specified by flush.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) enum btrfs_reserve_flush_enum flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) ASSERT(flush == BTRFS_RESERVE_FLUSH_DATA ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) flush == BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) ret = __reserve_bytes(fs_info, data_sinfo, bytes, flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) if (ret == -ENOSPC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) trace_btrfs_space_reservation(fs_info, "space_info:enospc",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) data_sinfo->flags, bytes, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) btrfs_dump_space_info(fs_info, data_sinfo, bytes, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) }