^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Written by Alex Tomas <alex@clusterfs.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * mballoc.c contains the multiblocks allocation routines
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "ext4_jbd2.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "mballoc.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/log2.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/nospec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <trace/events/ext4.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * MUSTDO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * - test ext4_ext_search_left() and ext4_ext_search_right()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * - search for metadata in few groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * TODO v4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * - normalization should take into account whether file is still open
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * - discard preallocations if no free space left (policy?)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * - don't normalize tails
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * - quota
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * - reservation for superuser
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * TODO v3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * - bitmap read-ahead (proposed by Oleg Drokin aka green)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * - track min/max extents in each group for better group selection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * - mb_mark_used() may allocate chunk right after splitting buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * - tree of groups sorted by number of free blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * - error handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * The allocation request involve request for multiple number of blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * near to the goal(block) value specified.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * During initialization phase of the allocator we decide to use the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * group preallocation or inode preallocation depending on the size of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * the file. The size of the file could be the resulting file size we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * would have after allocation, or the current file size, which ever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * is larger. If the size is less than sbi->s_mb_stream_request we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * select to use the group preallocation. The default value of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * s_mb_stream_request is 16 blocks. This can also be tuned via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * terms of number of blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * The main motivation for having small file use group preallocation is to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * ensure that we have small files closer together on the disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * First stage the allocator looks at the inode prealloc list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * ext4_inode_info->i_prealloc_list, which contains list of prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * spaces for this particular inode. The inode prealloc space is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * represented as:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * pa_lstart -> the logical start block for this prealloc space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * pa_pstart -> the physical start block for this prealloc space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * pa_len -> length for this prealloc space (in clusters)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * pa_free -> free space available in this prealloc space (in clusters)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * The inode preallocation space is used looking at the _logical_ start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * block. If only the logical file block falls within the range of prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * space we will consume the particular prealloc space. This makes sure that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * we have contiguous physical blocks representing the file blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * The important thing to be noted in case of inode prealloc space is that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * we don't modify the values associated to inode prealloc space except
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * pa_free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * If we are not able to find blocks in the inode prealloc space and if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * have the group allocation flag set then we look at the locality group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * prealloc space. These are per CPU prealloc list represented as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * ext4_sb_info.s_locality_groups[smp_processor_id()]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * The reason for having a per cpu locality group is to reduce the contention
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * between CPUs. It is possible to get scheduled at this point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * The locality group prealloc space is used looking at whether we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * enough free space (pa_free) within the prealloc space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * If we can't allocate blocks via inode prealloc or/and locality group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * prealloc then we look at the buddy cache. The buddy cache is represented
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * by ext4_sb_info.s_buddy_cache (struct inode) whose file offset gets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * mapped to the buddy and bitmap information regarding different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * groups. The buddy information is attached to buddy cache inode so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * we can access them through the page cache. The information regarding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * each group is loaded via ext4_mb_load_buddy. The information involve
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * block bitmap and buddy information. The information are stored in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * inode as:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * { page }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * one block each for bitmap and buddy information. So for each group we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * take up 2 blocks. A page can contain blocks_per_page (PAGE_SIZE /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * blocksize) blocks. So it can have information regarding groups_per_page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * which is blocks_per_page/2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * The buddy cache inode is not stored on disk. The inode is thrown
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * away when the filesystem is unmounted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * We look for count number of blocks in the buddy cache. If we were able
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * to locate that many free blocks we return with additional information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * regarding rest of the contiguous physical block available
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * Before allocating blocks via buddy cache we normalize the request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * blocks. This ensure we ask for more blocks that we needed. The extra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * blocks that we get after allocation is added to the respective prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * list. In case of inode preallocation we follow a list of heuristics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * based on file size. This can be found in ext4_mb_normalize_request. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * we are doing a group prealloc we try to normalize the request to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * sbi->s_mb_group_prealloc. The default value of s_mb_group_prealloc is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * dependent on the cluster size; for non-bigalloc file systems, it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * 512 blocks. This can be tuned via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * terms of number of blocks. If we have mounted the file system with -O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * stripe=<value> option the group prealloc request is normalized to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * smallest multiple of the stripe value (sbi->s_stripe) which is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * greater than the default mb_group_prealloc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * The regular allocator (using the buddy cache) supports a few tunables.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * /sys/fs/ext4/<partition>/mb_min_to_scan
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * /sys/fs/ext4/<partition>/mb_max_to_scan
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * /sys/fs/ext4/<partition>/mb_order2_req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * The regular allocator uses buddy scan only if the request len is power of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * value of s_mb_order2_reqs can be tuned via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * stripe size (sbi->s_stripe), we try to search for contiguous block in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * stripe size. This should result in better allocation on RAID setups. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * not, we search in the specific group using bitmap for best extents. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * tunable min_to_scan and max_to_scan control the behaviour here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * min_to_scan indicate how long the mballoc __must__ look for a best
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * extent and max_to_scan indicates how long the mballoc __can__ look for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * best extent in the found extents. Searching for the blocks starts with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * the group specified as the goal value in allocation context via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * ac_g_ex. Each group is first checked based on the criteria whether it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * can be used for allocation. ext4_mb_good_group explains how the groups are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) * checked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * Both the prealloc space are getting populated as above. So for the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * request we will hit the buddy cache which will result in this prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * space getting filled. The prealloc space is then later used for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * subsequent request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * mballoc operates on the following data:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * - on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * - in-core buddy (actually includes buddy and bitmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * - preallocation descriptors (PAs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * there are two types of preallocations:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * - inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * assiged to specific inode and can be used for this inode only.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * it describes part of inode's space preallocated to specific
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * physical blocks. any block from that preallocated can be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * independent. the descriptor just tracks number of blocks left
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * unused. so, before taking some block from descriptor, one must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * make sure corresponded logical block isn't allocated yet. this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * also means that freeing any block within descriptor's range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * must discard all preallocated blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * - locality group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * assigned to specific locality group which does not translate to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * permanent set of inodes: inode can join and leave group. space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * from this type of preallocation can be used for any inode. thus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * it's consumed from the beginning to the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * relation between them can be expressed as:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * in-core buddy = on-disk bitmap + preallocation descriptors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * this mean blocks mballoc considers used are:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * - allocated blocks (persistent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * - preallocated blocks (non-persistent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * consistency in mballoc world means that at any time a block is either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * free or used in ALL structures. notice: "any time" should not be read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * literally -- time is discrete and delimited by locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * to keep it simple, we don't use block numbers, instead we count number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) * blocks: how many blocks marked used/free in on-disk bitmap, buddy and PA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * all operations can be expressed as:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * - init buddy: buddy = on-disk + PAs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * - new PA: buddy += N; PA = N
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * - use inode PA: on-disk += N; PA -= N
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * - discard inode PA buddy -= on-disk - PA; PA = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * - use locality group PA on-disk += N; PA -= N
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * - discard locality group PA buddy -= PA; PA = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) * note: 'buddy -= on-disk - PA' is used to show that on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * is used in real operation because we can't know actual used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * bits from PA, only from on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * if we follow this strict logic, then all operations above should be atomic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * given some of them can block, we'd have to use something like semaphores
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) * killing performance on high-end SMP hardware. let's try to relax it using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * the following knowledge:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * 1) if buddy is referenced, it's already initialized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) * 2) while block is used in buddy and the buddy is referenced,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) * nobody can re-allocate that block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) * 3) we work on bitmaps and '+' actually means 'set bits'. if on-disk has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) * bit set and PA claims same block, it's OK. IOW, one can set bit in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * on-disk bitmap if buddy has same bit set or/and PA covers corresponded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * so, now we're building a concurrency table:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * - init buddy vs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * - new PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * blocks for PA are allocated in the buddy, buddy must be referenced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * until PA is linked to allocation group to avoid concurrent buddy init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) * - use inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * we need to make sure that either on-disk bitmap or PA has uptodate data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * given (3) we care that PA-=N operation doesn't interfere with init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * - discard inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * the simplest way would be to have buddy initialized by the discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * - use locality group PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * again PA-=N must be serialized with init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * - discard locality group PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * the simplest way would be to have buddy initialized by the discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * - new PA vs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * - use inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * i_data_sem serializes them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) * - discard inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * discard process must wait until PA isn't used by another process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * - use locality group PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * some mutex should serialize them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * - discard locality group PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * discard process must wait until PA isn't used by another process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) * - use inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * - use inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * i_data_sem or another mutex should serializes them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * - discard inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * discard process must wait until PA isn't used by another process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * - use locality group PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * nothing wrong here -- they're different PAs covering different blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * - discard locality group PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * discard process must wait until PA isn't used by another process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * now we're ready to make few consequences:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) * - PA is referenced and while it is no discard is possible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * - PA is referenced until block isn't marked in on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * - PA changes only after on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * - discard must not compete with init. either init is done before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * any discard or they're serialized somehow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * - buddy init as sum of on-disk bitmap and PAs is done atomically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) * a special case when we've used PA to emptiness. no need to modify buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) * in this case, but we should care about concurrent init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * Logic in few words:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * - allocation:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * load group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * find blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * mark bits in on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * release group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) * - use preallocation:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * find proper PA (per-inode or group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * load group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * mark bits in on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) * release group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) * release PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * - free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * load group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * mark bits in on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * release group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) * - discard preallocations in group:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * mark PAs deleted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * move them onto local list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * load on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) * load group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) * remove PA from object (inode or locality group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) * mark free blocks in-core
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * - discard inode's preallocations:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * Locking rules
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * Locks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * - bitlock on a group (group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * - object (inode/locality) (object)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * - per-pa lock (pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * Paths:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * - new pa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * object
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) * group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) * - find and use pa:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) * pa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) * - release consumed pa:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) * pa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * object
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * - generate in-core bitmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * pa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * - discard all for given object (inode, locality group):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * object
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * pa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) * group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * - discard all for given group:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) * pa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) * group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) * object
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) static struct kmem_cache *ext4_pspace_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) static struct kmem_cache *ext4_ac_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) static struct kmem_cache *ext4_free_data_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) /* We create slab caches for groupinfo data structures based on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * superblock block size. There will be one per mounted filesystem for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * each unique s_blocksize_bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) #define NR_GRPINFO_CACHES 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) "ext4_groupinfo_64k", "ext4_groupinfo_128k"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) ext4_group_t group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) ext4_group_t group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) * The algorithm using this percpu seq counter goes below:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) * 1. We sample the percpu discard_pa_seq counter before trying for block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) * allocation in ext4_mb_new_blocks().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * 2. We increment this percpu discard_pa_seq counter when we either allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) * or free these blocks i.e. while marking those blocks as used/free in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) * mb_mark_used()/mb_free_blocks().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) * 3. We also increment this percpu seq counter when we successfully identify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) * that the bb_prealloc_list is not empty and hence proceed for discarding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) * of those PAs inside ext4_mb_discard_group_preallocations().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) * Now to make sure that the regular fast path of block allocation is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) * affected, as a small optimization we only sample the percpu seq counter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) * on that cpu. Only when the block allocation fails and when freed blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) * found were 0, that is when we sample percpu seq counter for all cpus using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) * below function ext4_get_discard_pa_seq_sum(). This happens after making
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) * sure that all the PAs on grp->bb_prealloc_list got freed or if it's empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) static DEFINE_PER_CPU(u64, discard_pa_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) static inline u64 ext4_get_discard_pa_seq_sum(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) int __cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) u64 __seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) for_each_possible_cpu(__cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) __seq += per_cpu(discard_pa_seq, __cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) return __seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) #if BITS_PER_LONG == 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) *bit += ((unsigned long) addr & 7UL) << 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) addr = (void *) ((unsigned long) addr & ~7UL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) #elif BITS_PER_LONG == 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) *bit += ((unsigned long) addr & 3UL) << 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) addr = (void *) ((unsigned long) addr & ~3UL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) #error "how many bits you are?!"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) static inline int mb_test_bit(int bit, void *addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) * ext4_test_bit on architecture like powerpc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * needs unsigned long aligned address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) addr = mb_correct_addr_and_bit(&bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) return ext4_test_bit(bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) static inline void mb_set_bit(int bit, void *addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) addr = mb_correct_addr_and_bit(&bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) ext4_set_bit(bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) static inline void mb_clear_bit(int bit, void *addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) addr = mb_correct_addr_and_bit(&bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) ext4_clear_bit(bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) static inline int mb_test_and_clear_bit(int bit, void *addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) addr = mb_correct_addr_and_bit(&bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) return ext4_test_and_clear_bit(bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) static inline int mb_find_next_zero_bit(void *addr, int max, int start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) int fix = 0, ret, tmpmax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) addr = mb_correct_addr_and_bit(&fix, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) tmpmax = max + fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) start += fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) if (ret > max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) return max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) static inline int mb_find_next_bit(void *addr, int max, int start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) int fix = 0, ret, tmpmax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) addr = mb_correct_addr_and_bit(&fix, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) tmpmax = max + fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) start += fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) if (ret > max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) return max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) char *bb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) BUG_ON(max == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) if (order > e4b->bd_blkbits + 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) *max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) /* at order 0 we see each particular block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) if (order == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) *max = 1 << (e4b->bd_blkbits + 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) return e4b->bd_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) return bb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) #ifdef DOUBLE_CHECK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) int first, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) struct super_block *sb = e4b->bd_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) if (unlikely(e4b->bd_info->bb_bitmap == NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) for (i = 0; i < count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) ext4_fsblk_t blocknr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) ext4_grp_locked_error(sb, e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) inode ? inode->i_ino : 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) blocknr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) "freeing block already freed "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) "(bit %u)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) first + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) EXT4_GROUP_INFO_BBITMAP_CORRUPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) if (unlikely(e4b->bd_info->bb_bitmap == NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) for (i = 0; i < count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) if (unlikely(e4b->bd_info->bb_bitmap == NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) unsigned char *b1, *b2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) b2 = (unsigned char *) bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) if (b1[i] != b2[i]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) ext4_msg(e4b->bd_sb, KERN_ERR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) "corruption in group %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) "at byte %u(%u): %x in copy != %x "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) "on disk/prealloc",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) e4b->bd_group, i, i * 8, b1[i], b2[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) static void mb_group_bb_bitmap_alloc(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) struct ext4_group_info *grp, ext4_group_t group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) struct buffer_head *bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) grp->bb_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) if (!grp->bb_bitmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) bh = ext4_read_block_bitmap(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) if (IS_ERR_OR_NULL(bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) kfree(grp->bb_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) grp->bb_bitmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) memcpy(grp->bb_bitmap, bh->b_data, sb->s_blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) put_bh(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) static void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) kfree(grp->bb_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) static inline void mb_free_blocks_double(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) struct ext4_buddy *e4b, int first, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) static inline void mb_mark_used_double(struct ext4_buddy *e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) int first, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) static inline void mb_group_bb_bitmap_alloc(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) struct ext4_group_info *grp, ext4_group_t group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) static inline void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) #ifdef AGGRESSIVE_CHECK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) #define MB_CHECK_ASSERT(assert) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) if (!(assert)) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) printk(KERN_EMERG \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) "Assertion failure in %s() at %s:%d: \"%s\"\n", \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) function, file, line, # assert); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) BUG(); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) } \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) const char *function, int line)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) struct super_block *sb = e4b->bd_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) int order = e4b->bd_blkbits + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) int max2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) int k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) struct ext4_group_info *grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) int fragments = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) int fstart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) struct list_head *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) void *buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) void *buddy2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) if (e4b->bd_info->bb_check_counter++ % 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) while (order > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) buddy = mb_find_buddy(e4b, order, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) MB_CHECK_ASSERT(buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) buddy2 = mb_find_buddy(e4b, order - 1, &max2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) MB_CHECK_ASSERT(buddy2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) MB_CHECK_ASSERT(buddy != buddy2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) MB_CHECK_ASSERT(max * 2 == max2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) for (i = 0; i < max; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) if (mb_test_bit(i, buddy)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) /* only single bit in buddy2 may be 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) if (!mb_test_bit(i << 1, buddy2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) MB_CHECK_ASSERT(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) mb_test_bit((i<<1)+1, buddy2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) MB_CHECK_ASSERT(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) mb_test_bit(i << 1, buddy2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) /* both bits in buddy2 must be 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) for (j = 0; j < (1 << order); j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) k = (i * (1 << order)) + j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) MB_CHECK_ASSERT(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) !mb_test_bit(k, e4b->bd_bitmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) order--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) fstart = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) buddy = mb_find_buddy(e4b, 0, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) for (i = 0; i < max; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) if (!mb_test_bit(i, buddy)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) if (fstart == -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) fragments++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) fstart = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) fstart = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) /* check used bits only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) for (j = 0; j < e4b->bd_blkbits + 1; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) buddy2 = mb_find_buddy(e4b, j, &max2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) k = i >> j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) MB_CHECK_ASSERT(k < max2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) grp = ext4_get_group_info(sb, e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) list_for_each(cur, &grp->bb_prealloc_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) ext4_group_t groupnr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) MB_CHECK_ASSERT(groupnr == e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) for (i = 0; i < pa->pa_len; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) #undef MB_CHECK_ASSERT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) #define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) __FILE__, __func__, __LINE__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) #define mb_check_buddy(e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) * Divide blocks started from @first with length @len into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) * smaller chunks with power of 2 blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) * Clear the bits in bitmap which the blocks of the chunk(s) covered,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) * then increase bb_counters[] for corresponded chunk size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) static void ext4_mb_mark_free_simple(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) struct ext4_group_info *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) ext4_grpblk_t min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) ext4_grpblk_t max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) ext4_grpblk_t chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) unsigned int border;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) border = 2 << sb->s_blocksize_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) while (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) /* find how many blocks can be covered since this position */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) max = ffs(first | border) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) /* find how many blocks of power 2 we need to mark */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) min = fls(len) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) if (max < min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) min = max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) chunk = 1 << min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) /* mark multiblock chunks only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) grp->bb_counters[min]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) if (min > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) mb_clear_bit(first >> min,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) buddy + sbi->s_mb_offsets[min]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) len -= chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) first += chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) * Cache the order of the largest free extent we have available in this block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) * group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) int bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) grp->bb_largest_free_order = -1; /* uninit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) bits = sb->s_blocksize_bits + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) for (i = bits; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) if (grp->bb_counters[i] > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) grp->bb_largest_free_order = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) void ext4_mb_generate_buddy(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) void *buddy, void *bitmap, ext4_group_t group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) struct ext4_group_info *grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) ext4_grpblk_t i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) ext4_grpblk_t first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) ext4_grpblk_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) unsigned free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) unsigned fragments = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) unsigned long long period = get_cycles();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) /* initialize buddy from bitmap which is aggregation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) * of on-disk bitmap and preallocations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) i = mb_find_next_zero_bit(bitmap, max, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) grp->bb_first_free = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) while (i < max) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) fragments++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) first = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) i = mb_find_next_bit(bitmap, max, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) len = i - first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) free += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) if (len > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) grp->bb_counters[0]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) if (i < max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) i = mb_find_next_zero_bit(bitmap, max, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) grp->bb_fragments = fragments;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) if (free != grp->bb_free) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) ext4_grp_locked_error(sb, group, 0, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) "block bitmap and bg descriptor "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) "inconsistent: %u vs %u free clusters",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) free, grp->bb_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) * If we intend to continue, we consider group descriptor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) * corrupt and update bb_free using bitmap value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) grp->bb_free = free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) ext4_mark_group_bitmap_corrupted(sb, group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) EXT4_GROUP_INFO_BBITMAP_CORRUPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) mb_set_largest_free_order(sb, grp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) period = get_cycles() - period;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) spin_lock(&sbi->s_bal_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) sbi->s_mb_buddies_generated++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) sbi->s_mb_generation_time += period;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) spin_unlock(&sbi->s_bal_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) static void mb_regenerate_buddy(struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) int order = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) void *buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) while ((buddy = mb_find_buddy(e4b, order++, &count))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) ext4_set_bits(buddy, 0, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) e4b->bd_info->bb_fragments = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) memset(e4b->bd_info->bb_counters, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) sizeof(*e4b->bd_info->bb_counters) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) (e4b->bd_sb->s_blocksize_bits + 2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) e4b->bd_bitmap, e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) /* The buddy information is attached the buddy cache inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) * for convenience. The information regarding each group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) * is loaded via ext4_mb_load_buddy. The information involve
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) * block bitmap and buddy information. The information are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) * stored in the inode as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) * { page }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) * one block each for bitmap and buddy information.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) * So for each group we take up 2 blocks. A page can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) * contain blocks_per_page (PAGE_SIZE / blocksize) blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) * So it can have information regarding groups_per_page which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) * is blocks_per_page/2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) * Locking note: This routine takes the block group lock of all groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) * for this page; do not hold this lock when calling this routine!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) ext4_group_t ngroups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) int blocksize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) int blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) int groups_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) ext4_group_t first_group, group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) int first_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) struct super_block *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) struct buffer_head *bhs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) struct buffer_head **bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) char *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) char *bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) struct ext4_group_info *grinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) inode = page->mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) blocksize = i_blocksize(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) blocks_per_page = PAGE_SIZE / blocksize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) mb_debug(sb, "init page %lu\n", page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) groups_per_page = blocks_per_page >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) if (groups_per_page == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) groups_per_page = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) /* allocate buffer_heads to read bitmaps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) if (groups_per_page > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) i = sizeof(struct buffer_head *) * groups_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) bh = kzalloc(i, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) if (bh == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) bh = &bhs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) first_group = page->index * blocks_per_page / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) /* read all groups the page covers into the cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) if (group >= ngroups)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) grinfo = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) * If page is uptodate then we came here after online resize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) * which added some new uninitialized group info structs, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) * we must skip all initialized uptodate buddies on the page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) * which may be currently in use by an allocating task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) bh[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) bh[i] = ext4_read_block_bitmap_nowait(sb, group, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) if (IS_ERR(bh[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) err = PTR_ERR(bh[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) bh[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) mb_debug(sb, "read bitmap for group %u\n", group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) /* wait for I/O completion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) int err2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) if (!bh[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) err2 = ext4_wait_block_bitmap(sb, group, bh[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) err = err2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) first_block = page->index * blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) for (i = 0; i < blocks_per_page; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) group = (first_block + i) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) if (group >= ngroups)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) if (!bh[group - first_group])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) /* skip initialized uptodate buddy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) if (!buffer_verified(bh[group - first_group]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) /* Skip faulty bitmaps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) * data carry information regarding this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) * particular group in the format specified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) * above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) data = page_address(page) + (i * blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) bitmap = bh[group - first_group]->b_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) * We place the buddy block and bitmap block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) * close together
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) if ((first_block + i) & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) /* this is block of buddy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) BUG_ON(incore == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) group, page->index, i * blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) trace_ext4_mb_buddy_bitmap_load(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) grinfo = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) grinfo->bb_fragments = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) memset(grinfo->bb_counters, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) sizeof(*grinfo->bb_counters) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) (sb->s_blocksize_bits+2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) * incore got set to the group block bitmap below
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) /* init the buddy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) memset(data, 0xff, blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) ext4_mb_generate_buddy(sb, data, incore, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) incore = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) /* this is block of bitmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) BUG_ON(incore != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) mb_debug(sb, "put bitmap for group %u in page %lu/%x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) group, page->index, i * blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) trace_ext4_mb_bitmap_load(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) /* see comments in ext4_mb_put_pa() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) memcpy(data, bitmap, blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) /* mark all preallocated blks used in in-core bitmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) ext4_mb_generate_from_pa(sb, data, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) ext4_mb_generate_from_freelist(sb, data, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) /* set incore so that the buddy information can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) * generated using this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) incore = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) if (bh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) for (i = 0; i < groups_per_page; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) brelse(bh[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) if (bh != &bhs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) kfree(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * Lock the buddy and bitmap pages. This make sure other parallel init_group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) * on the same buddy page doesn't happen whild holding the buddy page lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) int block, pnum, poff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) int blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) e4b->bd_buddy_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) e4b->bd_bitmap_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) blocks_per_page = PAGE_SIZE / sb->s_blocksize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) * the buddy cache inode stores the block bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) * and buddy information in consecutive blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) * So for each group we need two blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) block = group * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) pnum = block / blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) poff = block % blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) page = find_or_create_page(inode->i_mapping, pnum, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) BUG_ON(page->mapping != inode->i_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) e4b->bd_bitmap_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) if (blocks_per_page >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) /* buddy and bitmap are on the same page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) block++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) pnum = block / blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) page = find_or_create_page(inode->i_mapping, pnum, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) BUG_ON(page->mapping != inode->i_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) e4b->bd_buddy_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) if (e4b->bd_bitmap_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) unlock_page(e4b->bd_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) put_page(e4b->bd_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) if (e4b->bd_buddy_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) unlock_page(e4b->bd_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) put_page(e4b->bd_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) * Locking note: This routine calls ext4_mb_init_cache(), which takes the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) * block group lock of all groups for this page; do not hold the BG lock when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) * calling this routine!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) struct ext4_group_info *this_grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) mb_debug(sb, "init group %u\n", group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) this_grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) * This ensures that we don't reinit the buddy cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) * page which map to the group from which we are already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) * allocating. If we are looking at the buddy cache we would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) * have taken a reference using ext4_mb_load_buddy and that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) * would have pinned buddy page to page cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) * The call to ext4_mb_get_buddy_page_lock will mark the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) * page accessed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) * somebody initialized the group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) * return without doing anything
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) page = e4b.bd_bitmap_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) ret = ext4_mb_init_cache(page, NULL, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) if (e4b.bd_buddy_page == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * If both the bitmap and buddy are in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * the same page we don't need to force
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) * init the buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) /* init buddy cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) page = e4b.bd_buddy_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) ext4_mb_put_buddy_page_lock(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) * Locking note: This routine calls ext4_mb_init_cache(), which takes the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) * block group lock of all groups for this page; do not hold the BG lock when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) * calling this routine!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) struct ext4_buddy *e4b, gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) int blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) int block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) int pnum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) int poff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) struct ext4_group_info *grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) struct inode *inode = sbi->s_buddy_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) mb_debug(sb, "load group %u\n", group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) blocks_per_page = PAGE_SIZE / sb->s_blocksize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) e4b->bd_blkbits = sb->s_blocksize_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) e4b->bd_info = grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) e4b->bd_sb = sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) e4b->bd_group = group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) e4b->bd_buddy_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) e4b->bd_bitmap_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) * we need full data about the group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) * to make a good selection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) ret = ext4_mb_init_group(sb, group, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) * the buddy cache inode stores the block bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) * and buddy information in consecutive blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) * So for each group we need two blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) block = group * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) pnum = block / blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) poff = block % blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) /* we could use find_or_create_page(), but it locks page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) * what we'd like to avoid in fast path ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) if (page == NULL || !PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) * drop the page reference and try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) * to get the page with lock. If we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) * are not uptodate that implies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) * somebody just created the page but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) * is yet to initialize the same. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) * wait for it to initialize.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) page = find_or_create_page(inode->i_mapping, pnum, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) BUG_ON(page->mapping != inode->i_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) ret = ext4_mb_init_cache(page, NULL, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) mb_cmp_bitmaps(e4b, page_address(page) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) (poff * sb->s_blocksize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) if (page == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) /* Pages marked accessed already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) e4b->bd_bitmap_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) block++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) pnum = block / blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) poff = block % blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) if (page == NULL || !PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) page = find_or_create_page(inode->i_mapping, pnum, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) BUG_ON(page->mapping != inode->i_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) if (page == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) /* Pages marked accessed already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) e4b->bd_buddy_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) if (e4b->bd_bitmap_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) put_page(e4b->bd_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) if (e4b->bd_buddy_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) put_page(e4b->bd_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) e4b->bd_buddy = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) e4b->bd_bitmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) if (e4b->bd_bitmap_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) put_page(e4b->bd_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) if (e4b->bd_buddy_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) put_page(e4b->bd_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) int order = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) int bb_incr = 1 << (e4b->bd_blkbits - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) void *bb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) bb = e4b->bd_buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) while (order <= e4b->bd_blkbits + 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) block = block >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) if (!mb_test_bit(block, bb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) /* this block is part of buddy of order 'order' */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) return order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) bb += bb_incr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) bb_incr >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) order++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) static void mb_clear_bits(void *bm, int cur, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) __u32 *addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) len = cur + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) while (cur < len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) if ((cur & 31) == 0 && (len - cur) >= 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) /* fast path: clear whole word at once */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) addr = bm + (cur >> 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) *addr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) cur += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) mb_clear_bit(cur, bm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) cur++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) /* clear bits in given range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) * will return first found zero bit if any, -1 otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) static int mb_test_and_clear_bits(void *bm, int cur, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) __u32 *addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) int zero_bit = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) len = cur + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) while (cur < len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) if ((cur & 31) == 0 && (len - cur) >= 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) /* fast path: clear whole word at once */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) addr = bm + (cur >> 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) if (*addr != (__u32)(-1) && zero_bit == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) *addr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) cur += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) zero_bit = cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) cur++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) return zero_bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) void ext4_set_bits(void *bm, int cur, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) __u32 *addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) len = cur + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) while (cur < len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) if ((cur & 31) == 0 && (len - cur) >= 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) /* fast path: set whole word at once */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) addr = bm + (cur >> 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) *addr = 0xffffffff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) cur += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) mb_set_bit(cur, bm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) cur++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) if (mb_test_bit(*bit + side, bitmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) mb_clear_bit(*bit, bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) (*bit) -= side;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) (*bit) += side;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) mb_set_bit(*bit, bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) int order = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) void *buddy = mb_find_buddy(e4b, order, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) while (buddy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) void *buddy2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) /* Bits in range [first; last] are known to be set since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) * corresponding blocks were allocated. Bits in range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) * (first; last) will stay set because they form buddies on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) * upper layer. We just deal with borders if they don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) * align with upper layer and then go up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) * Releasing entire group is all about clearing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) * single bit of highest order buddy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) /* Example:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) * ---------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) * | 1 | 1 | 1 | 1 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) * ---------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) * | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) * ---------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) * 0 1 2 3 4 5 6 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) * \_____________________/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) * Neither [1] nor [6] is aligned to above layer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) * Left neighbour [0] is free, so mark it busy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) * decrease bb_counters and extend range to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) * [0; 6]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) * Right neighbour [7] is busy. It can't be coaleasced with [6], so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) * mark [6] free, increase bb_counters and shrink range to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) * [0; 5].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) * Then shift range to [0; 2], go up and do the same.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) if (first & 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) if (!(last & 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) if (first > last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) order++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) mb_clear_bits(buddy, first, last - first + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) e4b->bd_info->bb_counters[order - 1] += last - first + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) first >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) last >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) buddy = buddy2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) int first, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) int left_is_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) int right_is_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) int block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) int last = first + count - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) struct super_block *sb = e4b->bd_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) if (WARN_ON(count == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) BUG_ON(last >= (sb->s_blocksize << 3));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) /* Don't bother if the block group is corrupt. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) mb_check_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) mb_free_blocks_double(inode, e4b, first, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) this_cpu_inc(discard_pa_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) e4b->bd_info->bb_free += count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) if (first < e4b->bd_info->bb_first_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) e4b->bd_info->bb_first_free = first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) /* access memory sequentially: check left neighbour,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) * clear range and then check right neighbour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) if (first != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) if (unlikely(block != -1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) ext4_fsblk_t blocknr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) blocknr += EXT4_C2B(sbi, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) ext4_grp_locked_error(sb, e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) inode ? inode->i_ino : 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) blocknr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) "freeing already freed block (bit %u); block bitmap corrupt.",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) ext4_mark_group_bitmap_corrupted(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) sb, e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) EXT4_GROUP_INFO_BBITMAP_CORRUPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) mb_regenerate_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) /* let's maintain fragments counter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) if (left_is_free && right_is_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) e4b->bd_info->bb_fragments--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) else if (!left_is_free && !right_is_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) e4b->bd_info->bb_fragments++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) /* buddy[0] == bd_bitmap is a special case, so handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) * it right away and let mb_buddy_mark_free stay free of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) * zero order checks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) * Check if neighbours are to be coaleasced,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) * adjust bitmap bb_counters and borders appropriately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) if (first & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) first += !left_is_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) if (!(last & 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) last -= !right_is_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) if (first <= last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) mb_buddy_mark_free(e4b, first >> 1, last >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) mb_set_largest_free_order(sb, e4b->bd_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) mb_check_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) static int mb_find_extent(struct ext4_buddy *e4b, int block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) int needed, struct ext4_free_extent *ex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) int next = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) int max, order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) void *buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) BUG_ON(ex == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) buddy = mb_find_buddy(e4b, 0, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) BUG_ON(buddy == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) BUG_ON(block >= max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) if (mb_test_bit(block, buddy)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) ex->fe_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) ex->fe_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) ex->fe_group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) /* find actual order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) order = mb_find_order_for_block(e4b, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) block = block >> order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) ex->fe_len = 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) ex->fe_start = block << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) ex->fe_group = e4b->bd_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) /* calc difference from given start */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) next = next - ex->fe_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) ex->fe_len -= next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) ex->fe_start += next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) while (needed > ex->fe_len &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) mb_find_buddy(e4b, order, &max)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) if (block + 1 >= max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) next = (block + 1) * (1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) if (mb_test_bit(next, e4b->bd_bitmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) order = mb_find_order_for_block(e4b, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) block = next >> order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) ex->fe_len += 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) /* Should never happen! (but apparently sometimes does?!?) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) ext4_grp_locked_error(e4b->bd_sb, e4b->bd_group, 0, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) "corruption or bug in mb_find_extent "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) block, order, needed, ex->fe_group, ex->fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) ex->fe_len, ex->fe_logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) ex->fe_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) ex->fe_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) ex->fe_group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) return ex->fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) int ord;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) int mlen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) int max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) int cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) int start = ex->fe_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) int len = ex->fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) unsigned ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) int len0 = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) void *buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) BUG_ON(e4b->bd_group != ex->fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) mb_check_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) mb_mark_used_double(e4b, start, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) this_cpu_inc(discard_pa_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) e4b->bd_info->bb_free -= len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) if (e4b->bd_info->bb_first_free == start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) e4b->bd_info->bb_first_free += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) /* let's maintain fragments counter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) if (start != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) max = !mb_test_bit(start + len, e4b->bd_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) if (mlen && max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) e4b->bd_info->bb_fragments++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) else if (!mlen && !max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) e4b->bd_info->bb_fragments--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) /* let's maintain buddy itself */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) while (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) ord = mb_find_order_for_block(e4b, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) if (((start >> ord) << ord) == start && len >= (1 << ord)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) /* the whole chunk may be allocated at once! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) mlen = 1 << ord;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) buddy = mb_find_buddy(e4b, ord, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) BUG_ON((start >> ord) >= max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) mb_set_bit(start >> ord, buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) e4b->bd_info->bb_counters[ord]--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) start += mlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) len -= mlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) BUG_ON(len < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) /* store for history */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) ret = len | (ord << 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) /* we have to split large buddy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) BUG_ON(ord <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) buddy = mb_find_buddy(e4b, ord, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) mb_set_bit(start >> ord, buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) e4b->bd_info->bb_counters[ord]--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) ord--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) cur = (start >> ord) & ~1U;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) buddy = mb_find_buddy(e4b, ord, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) mb_clear_bit(cur, buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) mb_clear_bit(cur + 1, buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) e4b->bd_info->bb_counters[ord]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) e4b->bd_info->bb_counters[ord]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) mb_check_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) * Must be called under group lock!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) BUG_ON(ac->ac_status == AC_STATUS_FOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) ret = mb_mark_used(e4b, &ac->ac_b_ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) /* preallocation can change ac_b_ex, thus we store actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) * allocated blocks for history */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) ac->ac_f_ex = ac->ac_b_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) ac->ac_status = AC_STATUS_FOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) ac->ac_tail = ret & 0xffff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) ac->ac_buddy = ret >> 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) * take the page reference. We want the page to be pinned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) * so that we don't get a ext4_mb_init_cache_call for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) * group until we update the bitmap. That would mean we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) * double allocate blocks. The reference is dropped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) * in ext4_mb_release_context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) ac->ac_bitmap_page = e4b->bd_bitmap_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) get_page(ac->ac_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) ac->ac_buddy_page = e4b->bd_buddy_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) get_page(ac->ac_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) /* store last allocated for subsequent stream allocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) spin_lock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) spin_unlock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) * As we've just preallocated more space than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) * user requested originally, we store allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) * space in a special descriptor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) if (ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) ext4_mb_new_preallocation(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) struct ext4_buddy *e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) int finish_group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) struct ext4_free_extent *bex = &ac->ac_b_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) struct ext4_free_extent *gex = &ac->ac_g_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) struct ext4_free_extent ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) if (ac->ac_status == AC_STATUS_FOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) * We don't want to scan for a whole year
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) if (ac->ac_found > sbi->s_mb_max_to_scan &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) ac->ac_status = AC_STATUS_BREAK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) * Haven't found good chunk so far, let's continue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) if (bex->fe_len < gex->fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) && bex->fe_group == e4b->bd_group) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) /* recheck chunk's availability - we don't know
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) * when it was found (within this lock-unlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) * period or not) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) if (max >= gex->fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) * The routine checks whether found extent is good enough. If it is,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) * then the extent gets marked used and flag is set to the context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) * to stop scanning. Otherwise, the extent is compared with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) * previous found extent and if new one is better, then it's stored
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) * in the context. Later, the best found extent will be used, if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) * mballoc can't find good enough extent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) * FIXME: real allocation policy is to be designed yet!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) struct ext4_free_extent *ex,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) struct ext4_free_extent *bex = &ac->ac_b_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) struct ext4_free_extent *gex = &ac->ac_g_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) BUG_ON(ex->fe_len <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) ac->ac_found++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) * The special case - take what you catch first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) *bex = *ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) * Let's check whether the chuck is good enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) if (ex->fe_len == gex->fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) *bex = *ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) * If this is first found extent, just store it in the context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) if (bex->fe_len == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) *bex = *ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) * If new found extent is better, store it in the context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) if (bex->fe_len < gex->fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) /* if the request isn't satisfied, any found extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) * larger than previous best one is better */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) if (ex->fe_len > bex->fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) *bex = *ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) } else if (ex->fe_len > gex->fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) /* if the request is satisfied, then we try to find
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) * an extent that still satisfy the request, but is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) * smaller than previous one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) if (ex->fe_len < bex->fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) *bex = *ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) ext4_mb_check_limits(ac, e4b, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) struct ext4_free_extent ex = ac->ac_b_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) ext4_group_t group = ex.fe_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) BUG_ON(ex.fe_len <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) ext4_lock_group(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) if (max > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) ac->ac_b_ex = ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) ext4_unlock_group(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) ext4_mb_unload_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) ext4_group_t group = ac->ac_g_ex.fe_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) struct ext4_free_extent ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) if (grp->bb_free == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) ext4_mb_unload_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) ext4_lock_group(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) ac->ac_g_ex.fe_len, &ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) ex.fe_logical = 0xDEADFA11; /* debug value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) ext4_fsblk_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) ex.fe_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) /* use do_div to get remainder (would be 64-bit modulo) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) if (do_div(start, sbi->s_stripe) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) ac->ac_found++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) ac->ac_b_ex = ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) } else if (max >= ac->ac_g_ex.fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) BUG_ON(ex.fe_len <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) ac->ac_found++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) ac->ac_b_ex = ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) /* Sometimes, caller may want to merge even small
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) * number of blocks to an existing extent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) BUG_ON(ex.fe_len <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) ac->ac_found++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) ac->ac_b_ex = ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) ext4_unlock_group(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) ext4_mb_unload_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) * The routine scans buddy structures (not bitmap!) from given order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) * to max order and tries to find big enough chunk to satisfy the req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) struct ext4_group_info *grp = e4b->bd_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) void *buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) int k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) BUG_ON(ac->ac_2order <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) if (grp->bb_counters[i] == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) buddy = mb_find_buddy(e4b, i, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) BUG_ON(buddy == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) k = mb_find_next_zero_bit(buddy, max, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) if (k >= max) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) "%d free clusters of order %d. But found 0",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) grp->bb_counters[i], i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) ext4_mark_group_bitmap_corrupted(ac->ac_sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) EXT4_GROUP_INFO_BBITMAP_CORRUPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) ac->ac_found++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) ac->ac_b_ex.fe_len = 1 << i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) ac->ac_b_ex.fe_start = k << i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) ac->ac_b_ex.fe_group = e4b->bd_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) BUG_ON(ac->ac_f_ex.fe_len != ac->ac_g_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) if (EXT4_SB(sb)->s_mb_stats)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) * The routine scans the group and measures all found extents.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) * In order to optimize scanning, caller must pass number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) * free blocks in the group, so the routine can know upper limit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) void *bitmap = e4b->bd_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) struct ext4_free_extent ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) int free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) free = e4b->bd_info->bb_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) if (WARN_ON(free <= 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) i = e4b->bd_info->bb_first_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) while (free && ac->ac_status == AC_STATUS_CONTINUE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) i = mb_find_next_zero_bit(bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) EXT4_CLUSTERS_PER_GROUP(sb), i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) * IF we have corrupt bitmap, we won't find any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) * free blocks even though group info says we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) * have free blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) "%d free clusters as per "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) "group info. But bitmap says 0",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) EXT4_GROUP_INFO_BBITMAP_CORRUPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) if (WARN_ON(ex.fe_len <= 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) if (free < ex.fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) "%d free clusters as per "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) "group info. But got %d blocks",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) free, ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) EXT4_GROUP_INFO_BBITMAP_CORRUPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) * The number of free blocks differs. This mostly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) * indicate that the bitmap is corrupt. So exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) * without claiming the space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) ex.fe_logical = 0xDEADC0DE; /* debug value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) ext4_mb_measure_extent(ac, &ex, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) i += ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) free -= ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) ext4_mb_check_limits(ac, e4b, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) * This is a special case for storages like raid5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) * we try to find stripe-aligned chunks for stripe-size-multiple requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) void *bitmap = e4b->bd_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) struct ext4_free_extent ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) ext4_fsblk_t first_group_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) ext4_fsblk_t a;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) ext4_grpblk_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) BUG_ON(sbi->s_stripe == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) /* find first stripe-aligned block in group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) a = first_group_block + sbi->s_stripe - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) do_div(a, sbi->s_stripe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) i = (a * sbi->s_stripe) - first_group_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) if (!mb_test_bit(i, bitmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) if (max >= sbi->s_stripe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) ac->ac_found++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) ex.fe_logical = 0xDEADF00D; /* debug value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) ac->ac_b_ex = ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) i += sbi->s_stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) * This is also called BEFORE we load the buddy bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) * Returns either 1 or 0 indicating that the group is either suitable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) * for the allocation or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) ext4_group_t group, int cr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) ext4_grpblk_t free, fragments;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) BUG_ON(cr < 0 || cr >= 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) free = grp->bb_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) if (free == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) fragments = grp->bb_fragments;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) if (fragments == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) switch (cr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) case 0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) BUG_ON(ac->ac_2order == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) /* Avoid using the first bg of a flexgroup for data files */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) ((group % flex_size) == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) if (free < ac->ac_g_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) if (ac->ac_2order > ac->ac_sb->s_blocksize_bits+1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) if (grp->bb_largest_free_order < ac->ac_2order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) case 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) if ((free / fragments) >= ac->ac_g_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) case 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) if (free >= ac->ac_g_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) case 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) * This could return negative error code if something goes wrong
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) * during ext4_mb_init_group(). This should not be called with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) * ext4_lock_group() held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) ext4_group_t group, int cr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) ext4_grpblk_t free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) if (should_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) free = grp->bb_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) if (free == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) if (cr <= 2 && free < ac->ac_g_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) if (should_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) /* We only do this if the grp has never been initialized */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) struct ext4_group_desc *gdp =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) ext4_get_group_desc(sb, group, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) /* cr=0/1 is a very optimistic search to find large
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) * good chunks almost for free. If buddy data is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) * ready, then this optimization makes no sense. But
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) * we never skip the first block group in a flex_bg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) * since this gets used for metadata block allocation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) * and we want to make sure we locate metadata blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) * in the first block group in the flex_bg if possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) if (cr < 2 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) (!sbi->s_log_groups_per_flex ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) !(ext4_has_group_desc_csum(sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) ret = ext4_mb_init_group(sb, group, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) if (should_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) ret = ext4_mb_good_group(ac, group, cr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) if (should_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) * Start prefetching @nr block bitmaps starting at @group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) * Return the next group which needs to be prefetched.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) unsigned int nr, int *cnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) ext4_group_t ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) struct buffer_head *bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) while (nr-- > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) struct ext4_group_info *grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) * Prefetch block groups with free blocks; but don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) * bother if it is marked uninitialized on disk, since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) * it won't require I/O to read. Also only try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) * prefetch once, so we avoid getblk() call, which can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) * be expensive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) EXT4_MB_GRP_NEED_INIT(grp) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) ext4_free_group_clusters(sb, gdp) > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) !(ext4_has_group_desc_csum(sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) bh = ext4_read_block_bitmap_nowait(sb, group, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) if (bh && !IS_ERR(bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) if (!buffer_uptodate(bh) && cnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) (*cnt)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) brelse(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) if (++group >= ngroups)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) return group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) * Prefetching reads the block bitmap into the buffer cache; but we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) * need to make sure that the buddy bitmap in the page cache has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) * initialized. Note that ext4_mb_init_group() will block if the I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) * is not yet completed, or indeed if it was not initiated by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) * ext4_mb_prefetch did not start the I/O.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) * TODO: We should actually kick off the buddy bitmap setup in a work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) * queue when the buffer I/O is completed, so that we don't block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) * waiting for the block allocation bitmap read to finish when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) unsigned int nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) while (nr-- > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) struct ext4_group_info *grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) if (!group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) group = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) group--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) if (EXT4_MB_GRP_NEED_INIT(grp) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) ext4_free_group_clusters(sb, gdp) > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) !(ext4_has_group_desc_csum(sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) if (ext4_mb_init_group(sb, group, GFP_NOFS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) ext4_group_t prefetch_grp = 0, ngroups, group, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) int cr = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) int err = 0, first_err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) unsigned int nr = 0, prefetch_ios = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) struct ext4_sb_info *sbi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) struct super_block *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) int lost;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) /* non-extent files are limited to low blocks/groups */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) ngroups = sbi->s_blockfile_groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) BUG_ON(ac->ac_status == AC_STATUS_FOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) /* first, try the goal */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) err = ext4_mb_find_by_goal(ac, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) if (err || ac->ac_status == AC_STATUS_FOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) * ac->ac_2order is set only if the fe_len is a power of 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) * if ac->ac_2order is set we also set criteria to 0 so that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) * try exact allocation using buddy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) i = fls(ac->ac_g_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) ac->ac_2order = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) * We search using buddy data only if the order of the request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) * is greater than equal to the sbi_s_mb_order2_reqs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) * We also support searching for power-of-two requests only for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) * requests upto maximum buddy size we have constructed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) if (i >= sbi->s_mb_order2_reqs && i <= sb->s_blocksize_bits + 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) * This should tell if fe_len is exactly power of 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) ac->ac_2order = array_index_nospec(i - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) sb->s_blocksize_bits + 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) /* if stream allocation is enabled, use global goal */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) /* TBD: may be hot point */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) spin_lock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) spin_unlock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) /* Let's just scan groups to find more-less suitable blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) cr = ac->ac_2order ? 0 : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) * cr == 0 try to get exact allocation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) * cr == 3 try to get anything
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) ac->ac_criteria = cr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) * searching for the right group start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) * from the goal value specified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) group = ac->ac_g_ex.fe_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) prefetch_grp = group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) for (i = 0; i < ngroups; group++, i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) * Artificially restricted ngroups for non-extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) * files makes group > ngroups possible on first loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) if (group >= ngroups)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) * Batch reads of the block allocation bitmaps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) * to get multiple READs in flight; limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) * prefetching at cr=0/1, otherwise mballoc can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) * spend a lot of time loading imperfect groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) if ((prefetch_grp == group) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) (cr > 1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) prefetch_ios < sbi->s_mb_prefetch_limit)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) unsigned int curr_ios = prefetch_ios;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) nr = sbi->s_mb_prefetch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) if (ext4_has_feature_flex_bg(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) nr = 1 << sbi->s_log_groups_per_flex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) nr -= group & (nr - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) nr = min(nr, sbi->s_mb_prefetch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) prefetch_grp = ext4_mb_prefetch(sb, group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) nr, &prefetch_ios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) if (prefetch_ios == curr_ios)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) nr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) /* This now checks without needing the buddy page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) ret = ext4_mb_good_group_nolock(ac, group, cr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) if (ret <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) if (!first_err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) first_err = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) err = ext4_mb_load_buddy(sb, group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) * We need to check again after locking the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) * block group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) ret = ext4_mb_good_group(ac, group, cr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) if (ret == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) ac->ac_groups_scanned++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) if (cr == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) ext4_mb_simple_scan_group(ac, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) else if (cr == 1 && sbi->s_stripe &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) !(ac->ac_g_ex.fe_len % sbi->s_stripe))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) ext4_mb_scan_aligned(ac, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) ext4_mb_complex_scan_group(ac, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) if (ac->ac_status != AC_STATUS_CONTINUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) * We've been searching too long. Let's try to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) * the best chunk we've found so far
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) ext4_mb_try_best_found(ac, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) if (ac->ac_status != AC_STATUS_FOUND) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) * Someone more lucky has already allocated it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) * The only thing we can do is just take first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) * found block(s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) lost = atomic_inc_return(&sbi->s_mb_lost_chunks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) mb_debug(sb, "lost chunk, group: %u, start: %d, len: %d, lost: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) ac->ac_b_ex.fe_len, lost);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) ac->ac_b_ex.fe_group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) ac->ac_b_ex.fe_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) ac->ac_b_ex.fe_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) ac->ac_status = AC_STATUS_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) ac->ac_flags |= EXT4_MB_HINT_FIRST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) cr = 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) err = first_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) mb_debug(sb, "Best len %d, origin len %d, ac_status %u, ac_flags 0x%x, cr %d ret %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) ac->ac_flags, cr, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) if (nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) ext4_mb_prefetch_fini(sb, prefetch_grp, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) struct super_block *sb = PDE_DATA(file_inode(seq->file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) group = *pos + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) return (void *) ((unsigned long) group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) struct super_block *sb = PDE_DATA(file_inode(seq->file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) ++*pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) group = *pos + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) return (void *) ((unsigned long) group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) struct super_block *sb = PDE_DATA(file_inode(seq->file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) ext4_group_t group = (ext4_group_t) ((unsigned long) v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) int err, buddy_loaded = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) struct ext4_group_info *grinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) unsigned char blocksize_bits = min_t(unsigned char,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) sb->s_blocksize_bits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) EXT4_MAX_BLOCK_LOG_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) struct sg {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) struct ext4_group_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) } sg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) group--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) if (group == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) seq_puts(seq, "#group: free frags first ["
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) sizeof(struct ext4_group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) grinfo = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) /* Load the group info in memory only if not already loaded. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) err = ext4_mb_load_buddy(sb, group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) seq_printf(seq, "#%-5u: I/O error\n", group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) buddy_loaded = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) memcpy(&sg, ext4_get_group_info(sb, group), i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) if (buddy_loaded)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) sg.info.bb_fragments, sg.info.bb_first_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) for (i = 0; i <= 13; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) sg.info.bb_counters[i] : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) seq_puts(seq, " ]\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) const struct seq_operations ext4_mb_seq_groups_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) .start = ext4_mb_seq_groups_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) .next = ext4_mb_seq_groups_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) .stop = ext4_mb_seq_groups_stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) .show = ext4_mb_seq_groups_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) BUG_ON(!cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) return cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) * Allocate the top-level s_group_info array for the specified number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) * of groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) unsigned size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) struct ext4_group_info ***old_groupinfo, ***new_groupinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) EXT4_DESC_PER_BLOCK_BITS(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) if (size <= sbi->s_group_info_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) new_groupinfo = kvzalloc(size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) if (!new_groupinfo) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) old_groupinfo = rcu_dereference(sbi->s_group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) if (old_groupinfo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) memcpy(new_groupinfo, old_groupinfo,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) sbi->s_group_info_size * sizeof(*sbi->s_group_info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) rcu_assign_pointer(sbi->s_group_info, new_groupinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) if (old_groupinfo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) ext4_kvfree_array_rcu(old_groupinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) sbi->s_group_info_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) /* Create and initialize ext4_group_info data for the given group. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) struct ext4_group_desc *desc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) int metalen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) struct ext4_group_info **meta_group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) * First check if this group is the first of a reserved block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) * If it's true, we have to allocate a new table of pointers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) * to ext4_group_info structures
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) metalen = sizeof(*meta_group_info) <<
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) EXT4_DESC_PER_BLOCK_BITS(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) meta_group_info = kmalloc(metalen, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) if (meta_group_info == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) ext4_msg(sb, KERN_ERR, "can't allocate mem "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) "for a buddy group");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) goto exit_meta_group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) rcu_dereference(sbi->s_group_info)[idx] = meta_group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) if (meta_group_info[i] == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) goto exit_group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) &(meta_group_info[i]->bb_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) * initialize bb_free to be able to skip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) * empty groups without initialization
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) if (ext4_has_group_desc_csum(sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) meta_group_info[i]->bb_free =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) ext4_free_clusters_after_init(sb, group, desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) meta_group_info[i]->bb_free =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) ext4_free_group_clusters(sb, desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) init_rwsem(&meta_group_info[i]->alloc_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) meta_group_info[i]->bb_free_root = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) meta_group_info[i]->bb_largest_free_order = -1; /* uninit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) exit_group_info:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) /* If a meta_group_info table has been allocated, release it now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) struct ext4_group_info ***group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) group_info = rcu_dereference(sbi->s_group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) kfree(group_info[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) group_info[idx] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) exit_meta_group_info:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) } /* ext4_mb_add_groupinfo */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) static int ext4_mb_init_backend(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) ext4_group_t ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) ext4_group_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) struct ext4_group_desc *desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) struct ext4_group_info ***group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) struct kmem_cache *cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) err = ext4_mb_alloc_groupinfo(sb, ngroups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) sbi->s_buddy_cache = new_inode(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) if (sbi->s_buddy_cache == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) ext4_msg(sb, KERN_ERR, "can't get new inode");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) goto err_freesgi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) /* To avoid potentially colliding with an valid on-disk inode number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) * use EXT4_BAD_INO for the buddy cache inode number. This inode is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) * not in the inode hash, so it should never be found by iget(), but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) * this will avoid confusion if it ever shows up during debugging. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) for (i = 0; i < ngroups; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) desc = ext4_get_group_desc(sb, i, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) if (desc == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) goto err_freebuddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) goto err_freebuddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) if (ext4_has_feature_flex_bg(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) /* a single flex group is supposed to be read by a single IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) * 2 ^ s_log_groups_per_flex != UINT_MAX as s_mb_prefetch is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) * unsigned integer, so the maximum shift is 32.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) if (sbi->s_es->s_log_groups_per_flex >= 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) goto err_freebuddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) sbi->s_mb_prefetch = 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) sbi->s_mb_prefetch = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) /* now many real IOs to prefetch within a single allocation at cr=0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) * given cr=0 is an CPU-related optimization we shouldn't try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) * load too many groups, at some point we should start to use what
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) * we've got in memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) * with an average random access time 5ms, it'd take a second to get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) * 200 groups (* N with flex_bg), so let's make this limit 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) err_freebuddy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) cachep = get_groupinfo_cache(sb->s_blocksize_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) while (i-- > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) kmem_cache_free(cachep, ext4_get_group_info(sb, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) i = sbi->s_group_info_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) group_info = rcu_dereference(sbi->s_group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) while (i-- > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) kfree(group_info[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) iput(sbi->s_buddy_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) err_freesgi:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) kvfree(rcu_dereference(sbi->s_group_info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) static void ext4_groupinfo_destroy_slabs(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) for (i = 0; i < NR_GRPINFO_CACHES; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) kmem_cache_destroy(ext4_groupinfo_caches[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) ext4_groupinfo_caches[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) static int ext4_groupinfo_create_slab(size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) int slab_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) int blocksize_bits = order_base_2(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) struct kmem_cache *cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) if (cache_index >= NR_GRPINFO_CACHES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) if (unlikely(cache_index < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) cache_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) mutex_lock(&ext4_grpinfo_slab_create_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) if (ext4_groupinfo_caches[cache_index]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) mutex_unlock(&ext4_grpinfo_slab_create_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) return 0; /* Already created */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) slab_size = offsetof(struct ext4_group_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) bb_counters[blocksize_bits + 2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) slab_size, 0, SLAB_RECLAIM_ACCOUNT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) ext4_groupinfo_caches[cache_index] = cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) mutex_unlock(&ext4_grpinfo_slab_create_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) if (!cachep) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) printk(KERN_EMERG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) "EXT4-fs: no memory for groupinfo slab cache\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) int ext4_mb_init(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) unsigned i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) unsigned offset, offset_incr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) unsigned max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) if (sbi->s_mb_offsets == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) if (sbi->s_mb_maxs == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) ret = ext4_groupinfo_create_slab(sb->s_blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) /* order 0 is regular bitmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) sbi->s_mb_offsets[0] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) i = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) offset_incr = 1 << (sb->s_blocksize_bits - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) max = sb->s_blocksize << 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) sbi->s_mb_offsets[i] = offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) sbi->s_mb_maxs[i] = max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) offset += offset_incr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) offset_incr = offset_incr >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) max = max >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) } while (i <= sb->s_blocksize_bits + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) spin_lock_init(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) spin_lock_init(&sbi->s_bal_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) sbi->s_mb_free_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) INIT_LIST_HEAD(&sbi->s_freed_data_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) sbi->s_mb_stats = MB_DEFAULT_STATS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) * The default group preallocation is 512, which for 4k block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) * sizes translates to 2 megabytes. However for bigalloc file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) * systems, this is probably too big (i.e, if the cluster size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) * is 1 megabyte, then group preallocation size becomes half a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) * gigabyte!). As a default, we will keep a two megabyte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) * group pralloc size for cluster sizes up to 64k, and after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) * that, we will force a minimum group preallocation size of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) * 32 clusters. This translates to 8 megs when the cluster
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) * size is 256k, and 32 megs when the cluster size is 1 meg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) * which seems reasonable as a default.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) sbi->s_cluster_bits, 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) * to the lowest multiple of s_stripe which is bigger than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) * the s_mb_group_prealloc as determined above. We want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) * the preallocation size to be an exact multiple of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) * RAID stripe size so that preallocations don't fragment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) * the stripes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) if (sbi->s_stripe > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) sbi->s_mb_group_prealloc = roundup(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) sbi->s_mb_group_prealloc, sbi->s_stripe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) if (sbi->s_locality_groups == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) for_each_possible_cpu(i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) struct ext4_locality_group *lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) lg = per_cpu_ptr(sbi->s_locality_groups, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) mutex_init(&lg->lg_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) for (j = 0; j < PREALLOC_TB_SIZE; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) spin_lock_init(&lg->lg_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) /* init file for buddy data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) ret = ext4_mb_init_backend(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) if (ret != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) goto out_free_locality_groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) out_free_locality_groups:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) free_percpu(sbi->s_locality_groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) sbi->s_locality_groups = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) kfree(sbi->s_mb_offsets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) sbi->s_mb_offsets = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) kfree(sbi->s_mb_maxs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) sbi->s_mb_maxs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) /* need to called with the ext4 group lock held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) static int ext4_mb_cleanup_pa(struct ext4_group_info *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) struct list_head *cur, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) int count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) list_del(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) kmem_cache_free(ext4_pspace_cachep, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) int ext4_mb_release(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) ext4_group_t ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) ext4_group_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) int num_meta_group_infos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) struct ext4_group_info *grinfo, ***group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) if (sbi->s_group_info) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) for (i = 0; i < ngroups; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) grinfo = ext4_get_group_info(sb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) mb_group_bb_bitmap_free(grinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) ext4_lock_group(sb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) count = ext4_mb_cleanup_pa(grinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) if (count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) mb_debug(sb, "mballoc: %d PAs left\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) ext4_unlock_group(sb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) kmem_cache_free(cachep, grinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) num_meta_group_infos = (ngroups +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) EXT4_DESC_PER_BLOCK(sb) - 1) >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) EXT4_DESC_PER_BLOCK_BITS(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) group_info = rcu_dereference(sbi->s_group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) for (i = 0; i < num_meta_group_infos; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) kfree(group_info[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) kvfree(group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) kfree(sbi->s_mb_offsets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) kfree(sbi->s_mb_maxs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) iput(sbi->s_buddy_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) if (sbi->s_mb_stats) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) ext4_msg(sb, KERN_INFO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) "mballoc: %u blocks %u reqs (%u success)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) atomic_read(&sbi->s_bal_allocated),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) atomic_read(&sbi->s_bal_reqs),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) atomic_read(&sbi->s_bal_success));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) ext4_msg(sb, KERN_INFO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) "mballoc: %u extents scanned, %u goal hits, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) "%u 2^N hits, %u breaks, %u lost",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) atomic_read(&sbi->s_bal_ex_scanned),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) atomic_read(&sbi->s_bal_goals),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) atomic_read(&sbi->s_bal_2orders),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) atomic_read(&sbi->s_bal_breaks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) atomic_read(&sbi->s_mb_lost_chunks));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) ext4_msg(sb, KERN_INFO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) "mballoc: %lu generated and it took %Lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) sbi->s_mb_buddies_generated,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) sbi->s_mb_generation_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) ext4_msg(sb, KERN_INFO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) "mballoc: %u preallocated, %u discarded",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) atomic_read(&sbi->s_mb_preallocated),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) atomic_read(&sbi->s_mb_discarded));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) free_percpu(sbi->s_locality_groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) static inline int ext4_issue_discard(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) ext4_group_t block_group, ext4_grpblk_t cluster, int count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) struct bio **biop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) ext4_fsblk_t discard_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) ext4_group_first_block_no(sb, block_group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) count = EXT4_C2B(EXT4_SB(sb), count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) trace_ext4_discard_blocks(sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) (unsigned long long) discard_block, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) if (biop) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) return __blkdev_issue_discard(sb->s_bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) (sector_t)discard_block << (sb->s_blocksize_bits - 9),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) (sector_t)count << (sb->s_blocksize_bits - 9),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) GFP_NOFS, 0, biop);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) static void ext4_free_data_in_buddy(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) struct ext4_free_data *entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) struct ext4_group_info *db;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) int err, count = 0, count2 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) mb_debug(sb, "gonna free %u blocks in group %u (0x%p):",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) entry->efd_count, entry->efd_group, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) /* we expect to find existing buddy because it's pinned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) BUG_ON(err != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) spin_lock(&EXT4_SB(sb)->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) spin_unlock(&EXT4_SB(sb)->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) db = e4b.bd_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) /* there are blocks to put in buddy to make them really free */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) count += entry->efd_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) count2++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) ext4_lock_group(sb, entry->efd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) /* Take it out of per group rb tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) rb_erase(&entry->efd_node, &(db->bb_free_root));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) * Clear the trimmed flag for the group so that the next
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) * ext4_trim_fs can trim it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) * If the volume is mounted with -o discard, online discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) * is supported and the free blocks will be trimmed online.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) if (!test_opt(sb, DISCARD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) EXT4_MB_GRP_CLEAR_TRIMMED(db);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) if (!db->bb_free_root.rb_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) /* No more items in the per group rb tree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) * balance refcounts from ext4_mb_free_metadata()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) put_page(e4b.bd_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) put_page(e4b.bd_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) ext4_unlock_group(sb, entry->efd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) kmem_cache_free(ext4_free_data_cachep, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) mb_debug(sb, "freed %d blocks in %d structures\n", count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) count2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) * This function is called by the jbd2 layer once the commit has finished,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) * so we know we can free the blocks that were released with that commit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) struct ext4_free_data *entry, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) struct bio *discard_bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) struct list_head freed_data_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) struct list_head *cut_pos = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) INIT_LIST_HEAD(&freed_data_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) spin_lock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) if (entry->efd_tid != commit_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) cut_pos = &entry->efd_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) if (cut_pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) cut_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) spin_unlock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) if (test_opt(sb, DISCARD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) list_for_each_entry(entry, &freed_data_list, efd_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) err = ext4_issue_discard(sb, entry->efd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) entry->efd_start_cluster,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) entry->efd_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) &discard_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) if (err && err != -EOPNOTSUPP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) ext4_msg(sb, KERN_WARNING, "discard request in"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) " group:%d block:%d count:%d failed"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) " with %d", entry->efd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) entry->efd_start_cluster,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) entry->efd_count, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) } else if (err == -EOPNOTSUPP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) if (discard_bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) submit_bio_wait(discard_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) bio_put(discard_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) ext4_free_data_in_buddy(sb, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) int __init ext4_init_mballoc(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) SLAB_RECLAIM_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) if (ext4_pspace_cachep == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) SLAB_RECLAIM_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) if (ext4_ac_cachep == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) goto out_pa_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) SLAB_RECLAIM_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) if (ext4_free_data_cachep == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) goto out_ac_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) out_ac_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) kmem_cache_destroy(ext4_ac_cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) out_pa_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) kmem_cache_destroy(ext4_pspace_cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) void ext4_exit_mballoc(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) * Wait for completion of call_rcu()'s on ext4_pspace_cachep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) * before destroying the slab cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) rcu_barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) kmem_cache_destroy(ext4_pspace_cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) kmem_cache_destroy(ext4_ac_cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) kmem_cache_destroy(ext4_free_data_cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) ext4_groupinfo_destroy_slabs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) * Returns 0 if success or error code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) handle_t *handle, unsigned int reserv_clstrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) struct buffer_head *bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) struct ext4_group_desc *gdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) struct buffer_head *gdp_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) struct ext4_sb_info *sbi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) struct super_block *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) ext4_fsblk_t block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) int err, len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) BUG_ON(ac->ac_status != AC_STATUS_FOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) BUG_ON(ac->ac_b_ex.fe_len <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) BUFFER_TRACE(bitmap_bh, "getting write access");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) err = ext4_journal_get_write_access(handle, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) if (!gdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) ext4_free_group_clusters(sb, gdp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) BUFFER_TRACE(gdp_bh, "get_write_access");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) err = ext4_journal_get_write_access(handle, gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) if (!ext4_inode_block_valid(ac->ac_inode, block, len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) "fs metadata", block, block+len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) /* File system mounted not to panic on error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) * Fix the bitmap and return EFSCORRUPTED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) * We leak some of the blocks here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) ext4_lock_group(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) err = -EFSCORRUPTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) ext4_lock_group(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) #ifdef AGGRESSIVE_CHECK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) bitmap_bh->b_data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) if (ext4_has_group_desc_csum(sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) ext4_free_group_clusters_set(sb, gdp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) ext4_free_clusters_after_init(sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) ac->ac_b_ex.fe_group, gdp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) ext4_free_group_clusters_set(sb, gdp, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) * Now reduce the dirty block count also. Should not go negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) /* release all the reserved blocks if non delalloc */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) percpu_counter_sub(&sbi->s_dirtyclusters_counter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) reserv_clstrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) if (sbi->s_log_groups_per_flex) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) ext4_group_t flex_group = ext4_flex_group(sbi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) atomic64_sub(ac->ac_b_ex.fe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) &sbi_array_rcu_deref(sbi, s_flex_groups,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) flex_group)->free_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) * Idempotent helper for Ext4 fast commit replay path to set the state of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) * blocks in bitmaps and update counters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) int len, int state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) struct buffer_head *bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) struct ext4_group_desc *gdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) struct buffer_head *gdp_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) ext4_grpblk_t blkoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) int i, err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) int already;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) unsigned int clen, clen_changed, thisgrp_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) while (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) * Check to see if we are freeing blocks across a group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) * boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) * In case of flex_bg, this can happen that (block, len) may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) * span across more than one group. In that case we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) * get the corresponding group metadata to work with.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) * For this we have goto again loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) thisgrp_len = min_t(unsigned int, (unsigned int)len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) clen = EXT4_NUM_B2C(sbi, thisgrp_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) bitmap_bh = ext4_read_block_bitmap(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) gdp = ext4_get_group_desc(sb, group, &gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) if (!gdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) already = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) for (i = 0; i < clen; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) !state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) already++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) clen_changed = clen - already;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) if (state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) if (ext4_has_group_desc_csum(sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) ext4_free_group_clusters_set(sb, gdp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) ext4_free_clusters_after_init(sb, group, gdp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) if (state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) ext4_free_group_clusters_set(sb, gdp, clen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) ext4_group_desc_csum_set(sb, group, gdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) if (sbi->s_log_groups_per_flex) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) ext4_group_t flex_group = ext4_flex_group(sbi, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) struct flex_groups *fg = sbi_array_rcu_deref(sbi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) s_flex_groups, flex_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) if (state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) atomic64_sub(clen_changed, &fg->free_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) atomic64_add(clen_changed, &fg->free_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) sync_dirty_buffer(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) sync_dirty_buffer(gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) block += thisgrp_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) len -= thisgrp_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) BUG_ON(len < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) * here we normalize request for locality group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) * Group request are normalized to s_mb_group_prealloc, which goes to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) * s_strip if we set the same via mount option.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) * s_mb_group_prealloc can be configured via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) * /sys/fs/ext4/<partition>/mb_group_prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) * XXX: should we try to preallocate more than the group has now?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) struct ext4_locality_group *lg = ac->ac_lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) BUG_ON(lg == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) mb_debug(sb, "goal %u blocks for locality group\n", ac->ac_g_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) * Normalization means making request better in terms of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) * size and alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) static noinline_for_stack void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) ext4_mb_normalize_request(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) struct ext4_allocation_request *ar)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) int bsbits, max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) ext4_lblk_t end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) loff_t size, start_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) loff_t orig_size __maybe_unused;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) ext4_lblk_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) /* do normalize only data requests, metadata requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) do not need preallocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) /* sometime caller may want exact blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) /* caller may indicate that preallocation isn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) * required (it's a tail, for example) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) ext4_mb_normalize_group_request(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) return ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) bsbits = ac->ac_sb->s_blocksize_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) /* first, let's learn actual file size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) * given current request is allocated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) size = size << bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) if (size < i_size_read(ac->ac_inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) size = i_size_read(ac->ac_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) orig_size = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) /* max size of free chunks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) max = 2 << bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) #define NRL_CHECK_SIZE(req, size, max, chunk_size) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) (req <= (size) || max <= (chunk_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) /* first, try to predict filesize */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) /* XXX: should this table be tunable? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) start_off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) if (size <= 16 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) size = 16 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) } else if (size <= 32 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) size = 32 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) } else if (size <= 64 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) size = 64 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) } else if (size <= 128 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) size = 128 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) } else if (size <= 256 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) size = 256 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) } else if (size <= 512 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) size = 512 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) } else if (size <= 1024 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) size = 1024 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) (21 - bsbits)) << 21;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) size = 2 * 1024 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) (22 - bsbits)) << 22;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) size = 4 * 1024 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) (8<<20)>>bsbits, max, 8 * 1024)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) (23 - bsbits)) << 23;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) size = 8 * 1024 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) ac->ac_o_ex.fe_len) << bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) size = size >> bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) start = start_off >> bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) /* don't cover already allocated blocks in selected range */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) if (ar->pleft && start <= ar->lleft) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) size -= ar->lleft + 1 - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) start = ar->lleft + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) if (ar->pright && start + size - 1 >= ar->lright)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) size -= start + size - ar->lright;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) * Trim allocation request for filesystems with artificially small
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) * groups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) end = start + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) /* check we don't cross already preallocated blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) ext4_lblk_t pa_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) if (pa->pa_deleted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) if (pa->pa_deleted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) /* PA must not overlap original request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) ac->ac_o_ex.fe_logical < pa->pa_lstart));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) /* skip PAs this normalized request doesn't overlap with */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) if (pa->pa_lstart >= end || pa_end <= start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) BUG_ON(pa->pa_lstart <= start && pa_end >= end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) /* adjust start or end to be adjacent to this pa */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) if (pa_end <= ac->ac_o_ex.fe_logical) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) BUG_ON(pa_end < start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) start = pa_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) BUG_ON(pa->pa_lstart > end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) end = pa->pa_lstart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) size = end - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) /* XXX: extra loop to check we really don't overlap preallocations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) ext4_lblk_t pa_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) if (pa->pa_deleted == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) if (start + size <= ac->ac_o_ex.fe_logical &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) start > ac->ac_o_ex.fe_logical) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) ext4_msg(ac->ac_sb, KERN_ERR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) "start %lu, size %lu, fe_logical %lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) (unsigned long) start, (unsigned long) size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) (unsigned long) ac->ac_o_ex.fe_logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) /* now prepare goal request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) /* XXX: is it better to align blocks WRT to logical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) * placement or satisfy big request as is */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) ac->ac_g_ex.fe_logical = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) /* define goal start in order to merge */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) if (ar->pright && (ar->lright == (start + size))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) /* merge to the right */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) &ac->ac_f_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) &ac->ac_f_ex.fe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) if (ar->pleft && (ar->lleft + 1 == start)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) /* merge to the left */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) &ac->ac_f_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) &ac->ac_f_ex.fe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) mb_debug(ac->ac_sb, "goal: %lld(was %lld) blocks at %u\n", size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) orig_size, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) atomic_inc(&sbi->s_bal_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) atomic_inc(&sbi->s_bal_success);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) atomic_inc(&sbi->s_bal_goals);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) if (ac->ac_found > sbi->s_mb_max_to_scan)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) atomic_inc(&sbi->s_bal_breaks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) trace_ext4_mballoc_alloc(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) trace_ext4_mballoc_prealloc(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) * Called on failure; free up any blocks from the inode PA for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) * context. We don't need this for MB_GROUP_PA because we only change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) * pa_free in ext4_mb_release_context(), but on failure, we've already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) struct ext4_prealloc_space *pa = ac->ac_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) if (pa == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) if (ac->ac_f_ex.fe_len == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) * This should never happen since we pin the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) * pages in the ext4_allocation_context so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) * ext4_mb_load_buddy() should never fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) WARN(1, "mb_load_buddy failed (%d)", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) ac->ac_f_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) if (pa->pa_type == MB_INODE_PA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) pa->pa_free += ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) * use blocks preallocated to inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) struct ext4_prealloc_space *pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) ext4_fsblk_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) ext4_fsblk_t end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) /* found preallocated blocks, use them */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) len = EXT4_NUM_B2C(sbi, end - start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) &ac->ac_b_ex.fe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) ac->ac_b_ex.fe_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) ac->ac_status = AC_STATUS_FOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) ac->ac_pa = pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) BUG_ON(start < pa->pa_pstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) BUG_ON(pa->pa_free < len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) pa->pa_free -= len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) mb_debug(ac->ac_sb, "use %llu/%d from inode pa %p\n", start, len, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) * use blocks preallocated to locality group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) struct ext4_prealloc_space *pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) unsigned int len = ac->ac_o_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) &ac->ac_b_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) &ac->ac_b_ex.fe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) ac->ac_b_ex.fe_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) ac->ac_status = AC_STATUS_FOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) ac->ac_pa = pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) /* we don't correct pa_pstart or pa_plen here to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) * possible race when the group is being loaded concurrently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) * instead we correct pa later, after blocks are marked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) * in on-disk bitmap -- see ext4_mb_release_context()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) * Other CPUs are prevented from allocating from this pa by lg_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) mb_debug(ac->ac_sb, "use %u/%u from group pa %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) pa->pa_lstart-len, len, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) * Return the prealloc space that have minimal distance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) * from the goal block. @cpa is the prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) * space that is having currently known minimal distance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750) * from the goal block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) static struct ext4_prealloc_space *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) struct ext4_prealloc_space *pa,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) struct ext4_prealloc_space *cpa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) ext4_fsblk_t cur_distance, new_distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) if (cpa == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) atomic_inc(&pa->pa_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) return pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) cur_distance = abs(goal_block - cpa->pa_pstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) new_distance = abs(goal_block - pa->pa_pstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) if (cur_distance <= new_distance)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) return cpa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) /* drop the previous reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) atomic_dec(&cpa->pa_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) atomic_inc(&pa->pa_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) return pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) * search goal blocks in preallocated space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) static noinline_for_stack bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) int order, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) struct ext4_locality_group *lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) struct ext4_prealloc_space *pa, *cpa = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) ext4_fsblk_t goal_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) /* only data can be preallocated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) /* first, try per-file preallocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) /* all fields in this condition don't change,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) * so we can skip locking for them */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) EXT4_C2B(sbi, pa->pa_len)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) /* non-extent files can't have physical blocks past 2^32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) EXT4_MAX_BLOCK_FILE_PHYS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) /* found preallocated blocks, use them */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) if (pa->pa_deleted == 0 && pa->pa_free) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) atomic_inc(&pa->pa_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) ext4_mb_use_inode_pa(ac, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) ac->ac_criteria = 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) /* can we use group allocation? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) /* inode may have no locality group for some reason */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) lg = ac->ac_lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) if (lg == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) order = fls(ac->ac_o_ex.fe_len) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) if (order > PREALLOC_TB_SIZE - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) /* The max size of hash table is PREALLOC_TB_SIZE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) order = PREALLOC_TB_SIZE - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) * search for the prealloc space that is having
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) * minimal distance from the goal block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) for (i = order; i < PREALLOC_TB_SIZE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) pa_inode_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) if (pa->pa_deleted == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) pa->pa_free >= ac->ac_o_ex.fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) cpa = ext4_mb_check_group_pa(goal_block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) pa, cpa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) if (cpa) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) ext4_mb_use_group_pa(ac, cpa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) ac->ac_criteria = 20;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) * the function goes through all block freed in the group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) * but not yet committed and marks them used in in-core bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) * buddy must be generated from this bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) * Need to be called with the ext4 group lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) ext4_group_t group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) struct rb_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) struct ext4_group_info *grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) struct ext4_free_data *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) n = rb_first(&(grp->bb_free_root));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) while (n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) entry = rb_entry(n, struct ext4_free_data, efd_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) n = rb_next(n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) * the function goes through all preallocation in this group and marks them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) * used in in-core bitmap. buddy must be generated from this bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891) * Need to be called with ext4 group lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) ext4_group_t group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) struct ext4_group_info *grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) struct list_head *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) ext4_group_t groupnr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) ext4_grpblk_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) int preallocated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) /* all form of preallocation discards first load group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) * so the only competing code is preallocation use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) * we don't need any locking here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) * notice we do NOT ignore preallocations with pa_deleted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) * otherwise we could leave used blocks available for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) * allocation in buddy when concurrent ext4_mb_put_pa()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) * is dropping preallocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) list_for_each(cur, &grp->bb_prealloc_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) ext4_get_group_no_and_offset(sb, pa->pa_pstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) &groupnr, &start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) len = pa->pa_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) if (unlikely(len == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) BUG_ON(groupnr != group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) ext4_set_bits(bitmap, start, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) preallocated += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) mb_debug(sb, "preallocated %d for group %u\n", preallocated, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) static void ext4_mb_mark_pa_deleted(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) struct ext4_prealloc_space *pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) struct ext4_inode_info *ei;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) if (pa->pa_deleted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) ext4_warning(sb, "deleted pa, type:%d, pblk:%llu, lblk:%u, len:%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936) pa->pa_type, pa->pa_pstart, pa->pa_lstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) pa->pa_deleted = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) if (pa->pa_type == MB_INODE_PA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) ei = EXT4_I(pa->pa_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) atomic_dec(&ei->i_prealloc_active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) static void ext4_mb_pa_callback(struct rcu_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) BUG_ON(atomic_read(&pa->pa_count));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) BUG_ON(pa->pa_deleted == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) kmem_cache_free(ext4_pspace_cachep, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) * drops a reference to preallocated space descriptor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) * if this was the last reference and the space is consumed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) struct super_block *sb, struct ext4_prealloc_space *pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966) ext4_group_t grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) ext4_fsblk_t grp_blk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) /* in this short window concurrent discard can set pa_deleted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) if (pa->pa_deleted == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) ext4_mb_mark_pa_deleted(sb, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) grp_blk = pa->pa_pstart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) * If doing group-based preallocation, pa_pstart may be in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) * next group when pa is used up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) if (pa->pa_type == MB_GROUP_PA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) grp_blk--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992) grp = ext4_get_group_number(sb, grp_blk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) * possible race:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997) * P1 (buddy init) P2 (regular allocation)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) * find block B in PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) * copy on-disk bitmap to buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) * mark B in on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) * drop PA from group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) * mark all PAs in buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) * thus, P1 initializes buddy with B available. to prevent this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) * we make "copy" and "mark all PAs" atomic and serialize "drop PA"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) * against that pair
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) ext4_lock_group(sb, grp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) list_del(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) ext4_unlock_group(sb, grp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012) spin_lock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) list_del_rcu(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) spin_unlock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) * creates new preallocated space for given inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022) static noinline_for_stack void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) struct ext4_group_info *grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) struct ext4_inode_info *ei;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) /* preallocate only when found space is larger then requested */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) BUG_ON(ac->ac_status != AC_STATUS_FOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) BUG_ON(ac->ac_pa == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) pa = ac->ac_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) int winl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) int wins;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) int win;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) int offs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) /* we can't allocate as much as normalizer wants.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) * so, found space must get proper lstart
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) * to cover original request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) /* we're limited by original request in that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) * logical block must be covered any way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) * winl is window we can move our chunk within */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) /* also, we should cover whole original request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059) /* the smallest one defines real window */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) win = min(winl, wins);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) offs = ac->ac_o_ex.fe_logical %
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) if (offs && offs < win)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) win = offs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) EXT4_NUM_B2C(sbi, win);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) /* preallocation can change ac_b_ex, thus we store actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) * allocated blocks for history */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) ac->ac_f_ex = ac->ac_b_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) pa->pa_lstart = ac->ac_b_ex.fe_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078) pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) pa->pa_len = ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) pa->pa_free = pa->pa_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081) spin_lock_init(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082) INIT_LIST_HEAD(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) INIT_LIST_HEAD(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) pa->pa_deleted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085) pa->pa_type = MB_INODE_PA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) mb_debug(sb, "new inode pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088) pa->pa_len, pa->pa_lstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) trace_ext4_mb_new_inode_pa(ac, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) ext4_mb_use_inode_pa(ac, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092) atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) ei = EXT4_I(ac->ac_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) pa->pa_obj_lock = &ei->i_prealloc_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) pa->pa_inode = ac->ac_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) spin_lock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103) list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104) spin_unlock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) atomic_inc(&ei->i_prealloc_active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109) * creates new preallocated space for locality group inodes belongs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) static noinline_for_stack void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114) struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115) struct ext4_locality_group *lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) struct ext4_group_info *grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) /* preallocate only when found space is larger then requested */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120) BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121) BUG_ON(ac->ac_status != AC_STATUS_FOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123) BUG_ON(ac->ac_pa == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125) pa = ac->ac_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127) /* preallocation can change ac_b_ex, thus we store actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128) * allocated blocks for history */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129) ac->ac_f_ex = ac->ac_b_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131) pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) pa->pa_lstart = pa->pa_pstart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) pa->pa_len = ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134) pa->pa_free = pa->pa_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135) spin_lock_init(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136) INIT_LIST_HEAD(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) INIT_LIST_HEAD(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) pa->pa_deleted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) pa->pa_type = MB_GROUP_PA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141) mb_debug(sb, "new group pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) pa->pa_len, pa->pa_lstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143) trace_ext4_mb_new_group_pa(ac, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) ext4_mb_use_group_pa(ac, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146) atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149) lg = ac->ac_lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150) BUG_ON(lg == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152) pa->pa_obj_lock = &lg->lg_prealloc_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) pa->pa_inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) * We will later add the new pa to the right bucket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) * after updating the pa_free in ext4_mb_release_context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163) static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165) if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166) ext4_mb_new_group_pa(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168) ext4_mb_new_inode_pa(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172) * finds all unused blocks in on-disk bitmap, frees them in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173) * in-core bitmap and buddy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174) * @pa must be unlinked from inode and group lists, so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175) * nobody else can find/use it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176) * the caller MUST hold group/inode locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177) * TODO: optimize the case when there are no in-core structures yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180) ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) struct ext4_prealloc_space *pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) struct super_block *sb = e4b->bd_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185) unsigned int end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) unsigned int next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187) ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) ext4_grpblk_t bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189) unsigned long long grp_blk_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190) int free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192) BUG_ON(pa->pa_deleted == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195) BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196) end = bit + pa->pa_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198) while (bit < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) if (bit >= end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202) next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) mb_debug(sb, "free preallocated %u/%u in group %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) (unsigned) ext4_group_first_block_no(sb, group) + bit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205) (unsigned) next - bit, (unsigned) group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) free += next - bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208) trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) EXT4_C2B(sbi, bit)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) next - bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213) bit = next + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) if (free != pa->pa_free) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216) ext4_msg(e4b->bd_sb, KERN_CRIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217) "pa %p: logic %lu, phys. %lu, len %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218) pa, (unsigned long) pa->pa_lstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) (unsigned long) pa->pa_pstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220) pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221) ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222) free, pa->pa_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224) * pa is already deleted so we use the value obtained
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225) * from the bitmap and continue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) atomic_add(free, &sbi->s_mb_discarded);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) ext4_mb_release_group_pa(struct ext4_buddy *e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) struct ext4_prealloc_space *pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) struct super_block *sb = e4b->bd_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) ext4_grpblk_t bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) trace_ext4_mb_release_group_pa(sb, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) BUG_ON(pa->pa_deleted == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245) mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246) atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) * releases all preallocations in given group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) * first, we need to decide discard policy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256) * - when do we discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257) * 1) ENOSPC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258) * - how many do we discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259) * 1) how many requested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) ext4_mb_discard_group_preallocations(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263) ext4_group_t group, int *busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) struct ext4_group_info *grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266) struct buffer_head *bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) struct ext4_prealloc_space *pa, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269) struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271) int free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273) mb_debug(sb, "discard preallocation for group %u\n", group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274) if (list_empty(&grp->bb_prealloc_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) goto out_dbg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) bitmap_bh = ext4_read_block_bitmap(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279) err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) ext4_error_err(sb, -err,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) "Error %d reading block bitmap for %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) err, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) goto out_dbg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) err = ext4_mb_load_buddy(sb, group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) ext4_warning(sb, "Error %d loading buddy information for %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) err, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290) put_bh(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) goto out_dbg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) INIT_LIST_HEAD(&list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296) list_for_each_entry_safe(pa, tmp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) &grp->bb_prealloc_list, pa_group_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) if (atomic_read(&pa->pa_count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301) *busy = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304) if (pa->pa_deleted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) /* seems this one can be freed ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310) ext4_mb_mark_pa_deleted(sb, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) if (!free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313) this_cpu_inc(discard_pa_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315) /* we can trust pa_free ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316) free += pa->pa_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320) list_del(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321) list_add(&pa->u.pa_tmp_list, &list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324) /* now free all selected PAs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325) list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327) /* remove from object (inode or locality group) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) spin_lock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329) list_del_rcu(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330) spin_unlock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332) if (pa->pa_type == MB_GROUP_PA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333) ext4_mb_release_group_pa(&e4b, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335) ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337) list_del(&pa->u.pa_tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338) call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342) ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) put_bh(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344) out_dbg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345) mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) free, group, grp->bb_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347) return free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4350) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4351) * releases all non-used preallocated blocks for given inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4352) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4353) * It's important to discard preallocations under i_data_sem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4354) * We don't want another block to be served from the prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4355) * space when we are discarding the inode prealloc space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4356) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4357) * FIXME!! Make sure it is valid at all the call sites
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4358) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4359) void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4360) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4361) struct ext4_inode_info *ei = EXT4_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4362) struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4363) struct buffer_head *bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4364) struct ext4_prealloc_space *pa, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4365) ext4_group_t group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4366) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4367) struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4368) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4370) if (!S_ISREG(inode->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4371) /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4372) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4375) if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4376) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4378) mb_debug(sb, "discard preallocation for inode %lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4379) inode->i_ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4380) trace_ext4_discard_preallocations(inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4381) atomic_read(&ei->i_prealloc_active), needed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4383) INIT_LIST_HEAD(&list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4385) if (needed == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4386) needed = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4388) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4389) /* first, collect all pa's in the inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4390) spin_lock(&ei->i_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4391) while (!list_empty(&ei->i_prealloc_list) && needed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4392) pa = list_entry(ei->i_prealloc_list.prev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4393) struct ext4_prealloc_space, pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4394) BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4395) spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4396) if (atomic_read(&pa->pa_count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4397) /* this shouldn't happen often - nobody should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4398) * use preallocation while we're discarding it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4399) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4400) spin_unlock(&ei->i_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4401) ext4_msg(sb, KERN_ERR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4402) "uh-oh! used pa while discarding");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4403) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4404) schedule_timeout_uninterruptible(HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4405) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4408) if (pa->pa_deleted == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4409) ext4_mb_mark_pa_deleted(sb, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4410) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4411) list_del_rcu(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4412) list_add(&pa->u.pa_tmp_list, &list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4413) needed--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4414) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4417) /* someone is deleting pa right now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4418) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4419) spin_unlock(&ei->i_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4421) /* we have to wait here because pa_deleted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4422) * doesn't mean pa is already unlinked from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4423) * the list. as we might be called from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4424) * ->clear_inode() the inode will get freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4425) * and concurrent thread which is unlinking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4426) * pa from inode's list may access already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4427) * freed memory, bad-bad-bad */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4429) /* XXX: if this happens too often, we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4430) * add a flag to force wait only in case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4431) * of ->clear_inode(), but not in case of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4432) * regular truncate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4433) schedule_timeout_uninterruptible(HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4434) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4436) spin_unlock(&ei->i_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4438) list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4439) BUG_ON(pa->pa_type != MB_INODE_PA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4440) group = ext4_get_group_number(sb, pa->pa_pstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4442) err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4443) GFP_NOFS|__GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4444) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4445) ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4446) err, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4447) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4450) bitmap_bh = ext4_read_block_bitmap(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4451) if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4452) err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4453) ext4_error_err(sb, -err, "Error %d reading block bitmap for %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4454) err, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4455) ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4456) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4459) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4460) list_del(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4461) ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4462) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4464) ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4465) put_bh(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4467) list_del(&pa->u.pa_tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4468) call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4472) static int ext4_mb_pa_alloc(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4473) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4474) struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4476) BUG_ON(ext4_pspace_cachep == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4477) pa = kmem_cache_zalloc(ext4_pspace_cachep, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4478) if (!pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4479) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4480) atomic_set(&pa->pa_count, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4481) ac->ac_pa = pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4482) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4485) static void ext4_mb_pa_free(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4486) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4487) struct ext4_prealloc_space *pa = ac->ac_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4489) BUG_ON(!pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4490) ac->ac_pa = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4491) WARN_ON(!atomic_dec_and_test(&pa->pa_count));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4492) kmem_cache_free(ext4_pspace_cachep, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4493) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4495) #ifdef CONFIG_EXT4_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4496) static inline void ext4_mb_show_pa(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4497) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4498) ext4_group_t i, ngroups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4500) if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4501) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4503) ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4504) mb_debug(sb, "groups: ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4505) for (i = 0; i < ngroups; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4506) struct ext4_group_info *grp = ext4_get_group_info(sb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4507) struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4508) ext4_grpblk_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4509) struct list_head *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4510) ext4_lock_group(sb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4511) list_for_each(cur, &grp->bb_prealloc_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4512) pa = list_entry(cur, struct ext4_prealloc_space,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4513) pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4514) spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4515) ext4_get_group_no_and_offset(sb, pa->pa_pstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4516) NULL, &start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4517) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4518) mb_debug(sb, "PA:%u:%d:%d\n", i, start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4519) pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4521) ext4_unlock_group(sb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4522) mb_debug(sb, "%u: %d/%d\n", i, grp->bb_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4523) grp->bb_fragments);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4527) static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4529) struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4531) if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4532) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4534) mb_debug(sb, "Can't allocate:"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4535) " Allocation context details:");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4536) mb_debug(sb, "status %u flags 0x%x",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4537) ac->ac_status, ac->ac_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4538) mb_debug(sb, "orig %lu/%lu/%lu@%lu, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4539) "goal %lu/%lu/%lu@%lu, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4540) "best %lu/%lu/%lu@%lu cr %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4541) (unsigned long)ac->ac_o_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4542) (unsigned long)ac->ac_o_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4543) (unsigned long)ac->ac_o_ex.fe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4544) (unsigned long)ac->ac_o_ex.fe_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4545) (unsigned long)ac->ac_g_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4546) (unsigned long)ac->ac_g_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4547) (unsigned long)ac->ac_g_ex.fe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4548) (unsigned long)ac->ac_g_ex.fe_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4549) (unsigned long)ac->ac_b_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4550) (unsigned long)ac->ac_b_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4551) (unsigned long)ac->ac_b_ex.fe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4552) (unsigned long)ac->ac_b_ex.fe_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4553) (int)ac->ac_criteria);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4554) mb_debug(sb, "%u found", ac->ac_found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4555) ext4_mb_show_pa(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4557) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4558) static inline void ext4_mb_show_pa(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4559) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4560) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4562) static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4563) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4564) ext4_mb_show_pa(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4565) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4567) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4569) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4570) * We use locality group preallocation for small size file. The size of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4571) * file is determined by the current size or the resulting size after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4572) * allocation which ever is larger
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4573) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4574) * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4575) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4576) static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4577) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4578) struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4579) int bsbits = ac->ac_sb->s_blocksize_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4580) loff_t size, isize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4582) if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4583) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4585) if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4586) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4588) size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4589) isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4590) >> bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4592) if ((size == isize) && !ext4_fs_is_busy(sbi) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4593) !inode_is_open_for_write(ac->ac_inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4594) ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4595) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4596) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4598) if (sbi->s_mb_group_prealloc <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4599) ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4600) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4603) /* don't use group allocation for large files */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4604) size = max(size, isize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4605) if (size > sbi->s_mb_stream_request) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4606) ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4607) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4610) BUG_ON(ac->ac_lg != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4611) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4612) * locality group prealloc space are per cpu. The reason for having
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4613) * per cpu locality group is to reduce the contention between block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4614) * request from multiple CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4615) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4616) ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4618) /* we're going to use group allocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4619) ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4621) /* serialize all allocations in the group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4622) mutex_lock(&ac->ac_lg->lg_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4625) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4626) ext4_mb_initialize_context(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4627) struct ext4_allocation_request *ar)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4628) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4629) struct super_block *sb = ar->inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4630) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4631) struct ext4_super_block *es = sbi->s_es;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4632) ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4633) unsigned int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4634) ext4_fsblk_t goal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4635) ext4_grpblk_t block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4637) /* we can't allocate > group size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4638) len = ar->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4640) /* just a dirty hack to filter too big requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4641) if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4642) len = EXT4_CLUSTERS_PER_GROUP(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4644) /* start searching from the goal */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4645) goal = ar->goal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4646) if (goal < le32_to_cpu(es->s_first_data_block) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4647) goal >= ext4_blocks_count(es))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4648) goal = le32_to_cpu(es->s_first_data_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4649) ext4_get_group_no_and_offset(sb, goal, &group, &block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4651) /* set up allocation goals */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4652) ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4653) ac->ac_status = AC_STATUS_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4654) ac->ac_sb = sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4655) ac->ac_inode = ar->inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4656) ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4657) ac->ac_o_ex.fe_group = group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4658) ac->ac_o_ex.fe_start = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4659) ac->ac_o_ex.fe_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4660) ac->ac_g_ex = ac->ac_o_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4661) ac->ac_flags = ar->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4663) /* we have to define context: we'll work with a file or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4664) * locality group. this is a policy, actually */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4665) ext4_mb_group_or_file(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4667) mb_debug(sb, "init ac: %u blocks @ %u, goal %u, flags 0x%x, 2^%d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4668) "left: %u/%u, right %u/%u to %swritable\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4669) (unsigned) ar->len, (unsigned) ar->logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4670) (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4671) (unsigned) ar->lleft, (unsigned) ar->pleft,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4672) (unsigned) ar->lright, (unsigned) ar->pright,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4673) inode_is_open_for_write(ar->inode) ? "" : "non-");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4674) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4678) static noinline_for_stack void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4679) ext4_mb_discard_lg_preallocations(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4680) struct ext4_locality_group *lg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4681) int order, int total_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4682) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4683) ext4_group_t group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4684) struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4685) struct list_head discard_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4686) struct ext4_prealloc_space *pa, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4688) mb_debug(sb, "discard locality group preallocation\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4690) INIT_LIST_HEAD(&discard_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4692) spin_lock(&lg->lg_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4693) list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4694) pa_inode_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4695) lockdep_is_held(&lg->lg_prealloc_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4696) spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4697) if (atomic_read(&pa->pa_count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4698) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4699) * This is the pa that we just used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4700) * for block allocation. So don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4701) * free that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4702) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4703) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4704) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4705) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4706) if (pa->pa_deleted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4707) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4708) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4709) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4710) /* only lg prealloc space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4711) BUG_ON(pa->pa_type != MB_GROUP_PA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4713) /* seems this one can be freed ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4714) ext4_mb_mark_pa_deleted(sb, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4715) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4717) list_del_rcu(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4718) list_add(&pa->u.pa_tmp_list, &discard_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4720) total_entries--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4721) if (total_entries <= 5) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4722) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4723) * we want to keep only 5 entries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4724) * allowing it to grow to 8. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4725) * mak sure we don't call discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4726) * soon for this list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4727) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4728) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4729) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4730) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4731) spin_unlock(&lg->lg_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4733) list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4734) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4736) group = ext4_get_group_number(sb, pa->pa_pstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4737) err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4738) GFP_NOFS|__GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4739) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4740) ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4741) err, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4742) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4744) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4745) list_del(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4746) ext4_mb_release_group_pa(&e4b, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4747) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4749) ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4750) list_del(&pa->u.pa_tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4751) call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4755) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4756) * We have incremented pa_count. So it cannot be freed at this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4757) * point. Also we hold lg_mutex. So no parallel allocation is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4758) * possible from this lg. That means pa_free cannot be updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4759) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4760) * A parallel ext4_mb_discard_group_preallocations is possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4761) * which can cause the lg_prealloc_list to be updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4762) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4764) static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4765) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4766) int order, added = 0, lg_prealloc_count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4767) struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4768) struct ext4_locality_group *lg = ac->ac_lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4769) struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4771) order = fls(pa->pa_free) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4772) if (order > PREALLOC_TB_SIZE - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4773) /* The max size of hash table is PREALLOC_TB_SIZE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4774) order = PREALLOC_TB_SIZE - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4775) /* Add the prealloc space to lg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4776) spin_lock(&lg->lg_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4777) list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4778) pa_inode_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4779) lockdep_is_held(&lg->lg_prealloc_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4780) spin_lock(&tmp_pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4781) if (tmp_pa->pa_deleted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4782) spin_unlock(&tmp_pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4783) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4784) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4785) if (!added && pa->pa_free < tmp_pa->pa_free) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4786) /* Add to the tail of the previous entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4787) list_add_tail_rcu(&pa->pa_inode_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4788) &tmp_pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4789) added = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4790) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4791) * we want to count the total
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4792) * number of entries in the list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4793) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4794) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4795) spin_unlock(&tmp_pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4796) lg_prealloc_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4797) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4798) if (!added)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4799) list_add_tail_rcu(&pa->pa_inode_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4800) &lg->lg_prealloc_list[order]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4801) spin_unlock(&lg->lg_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4803) /* Now trim the list to be not more than 8 elements */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4804) if (lg_prealloc_count > 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4805) ext4_mb_discard_lg_preallocations(sb, lg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4806) order, lg_prealloc_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4807) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4809) return ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4812) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4813) * if per-inode prealloc list is too long, trim some PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4814) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4815) static void ext4_mb_trim_inode_pa(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4816) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4817) struct ext4_inode_info *ei = EXT4_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4818) struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4819) int count, delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4821) count = atomic_read(&ei->i_prealloc_active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4822) delta = (sbi->s_mb_max_inode_prealloc >> 2) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4823) if (count > sbi->s_mb_max_inode_prealloc + delta) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4824) count -= sbi->s_mb_max_inode_prealloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4825) ext4_discard_preallocations(inode, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4826) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4829) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4830) * release all resource we used in allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4831) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4832) static int ext4_mb_release_context(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4833) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4834) struct inode *inode = ac->ac_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4835) struct ext4_inode_info *ei = EXT4_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4836) struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4837) struct ext4_prealloc_space *pa = ac->ac_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4838) if (pa) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4839) if (pa->pa_type == MB_GROUP_PA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4840) /* see comment in ext4_mb_use_group_pa() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4841) spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4842) pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4843) pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4844) pa->pa_free -= ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4845) pa->pa_len -= ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4846) spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4848) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4849) * We want to add the pa to the right bucket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4850) * Remove it from the list and while adding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4851) * make sure the list to which we are adding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4852) * doesn't grow big.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4853) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4854) if (likely(pa->pa_free)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4855) spin_lock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4856) list_del_rcu(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4857) spin_unlock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4858) ext4_mb_add_n_trim(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4859) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4862) if (pa->pa_type == MB_INODE_PA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4863) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4864) * treat per-inode prealloc list as a lru list, then try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4865) * to trim the least recently used PA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4866) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4867) spin_lock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4868) list_move(&pa->pa_inode_list, &ei->i_prealloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4869) spin_unlock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4870) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4872) ext4_mb_put_pa(ac, ac->ac_sb, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4874) if (ac->ac_bitmap_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4875) put_page(ac->ac_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4876) if (ac->ac_buddy_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4877) put_page(ac->ac_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4878) if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4879) mutex_unlock(&ac->ac_lg->lg_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4880) ext4_mb_collect_stats(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4881) ext4_mb_trim_inode_pa(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4882) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4885) static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4886) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4887) ext4_group_t i, ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4888) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4889) int freed = 0, busy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4890) int retry = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4892) trace_ext4_mb_discard_preallocations(sb, needed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4894) if (needed == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4895) needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4896) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4897) for (i = 0; i < ngroups && needed > 0; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4898) ret = ext4_mb_discard_group_preallocations(sb, i, &busy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4899) freed += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4900) needed -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4901) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4902) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4904) if (needed > 0 && busy && ++retry < 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4905) busy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4906) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4909) return freed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4910) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4912) static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4913) struct ext4_allocation_context *ac, u64 *seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4914) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4915) int freed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4916) u64 seq_retry = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4917) bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4919) freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4920) if (freed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4921) ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4922) goto out_dbg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4924) seq_retry = ext4_get_discard_pa_seq_sum();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4925) if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_retry != *seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4926) ac->ac_flags |= EXT4_MB_STRICT_CHECK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4927) *seq = seq_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4928) ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4931) out_dbg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4932) mb_debug(sb, "freed %d, retry ? %s\n", freed, ret ? "yes" : "no");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4933) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4936) static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4937) struct ext4_allocation_request *ar, int *errp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4939) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4940) * Main entry point into mballoc to allocate blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4941) * it tries to use preallocation first, then falls back
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4942) * to usual allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4943) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4944) ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4945) struct ext4_allocation_request *ar, int *errp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4946) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4947) struct ext4_allocation_context *ac = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4948) struct ext4_sb_info *sbi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4949) struct super_block *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4950) ext4_fsblk_t block = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4951) unsigned int inquota = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4952) unsigned int reserv_clstrs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4953) u64 seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4955) might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4956) sb = ar->inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4957) sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4959) trace_ext4_request_blocks(ar);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4960) if (sbi->s_mount_state & EXT4_FC_REPLAY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4961) return ext4_mb_new_blocks_simple(handle, ar, errp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4963) /* Allow to use superuser reservation for quota file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4964) if (ext4_is_quota_file(ar->inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4965) ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4967) if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4968) /* Without delayed allocation we need to verify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4969) * there is enough free blocks to do block allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4970) * and verify allocation doesn't exceed the quota limits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4971) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4972) while (ar->len &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4973) ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4975) /* let others to free the space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4976) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4977) ar->len = ar->len >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4979) if (!ar->len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4980) ext4_mb_show_pa(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4981) *errp = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4982) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4984) reserv_clstrs = ar->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4985) if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4986) dquot_alloc_block_nofail(ar->inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4987) EXT4_C2B(sbi, ar->len));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4988) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4989) while (ar->len &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4990) dquot_alloc_block(ar->inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4991) EXT4_C2B(sbi, ar->len))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4993) ar->flags |= EXT4_MB_HINT_NOPREALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4994) ar->len--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4996) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4997) inquota = ar->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4998) if (ar->len == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4999) *errp = -EDQUOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5000) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5002) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5004) ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5005) if (!ac) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5006) ar->len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5007) *errp = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5008) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5011) *errp = ext4_mb_initialize_context(ac, ar);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5012) if (*errp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5013) ar->len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5014) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5017) ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5018) seq = this_cpu_read(discard_pa_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5019) if (!ext4_mb_use_preallocated(ac)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5020) ac->ac_op = EXT4_MB_HISTORY_ALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5021) ext4_mb_normalize_request(ac, ar);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5023) *errp = ext4_mb_pa_alloc(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5024) if (*errp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5025) goto errout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5026) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5027) /* allocate space in core */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5028) *errp = ext4_mb_regular_allocator(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5029) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5030) * pa allocated above is added to grp->bb_prealloc_list only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5031) * when we were able to allocate some block i.e. when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5032) * ac->ac_status == AC_STATUS_FOUND.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5033) * And error from above mean ac->ac_status != AC_STATUS_FOUND
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5034) * So we have to free this pa here itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5035) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5036) if (*errp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5037) ext4_mb_pa_free(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5038) ext4_discard_allocated_blocks(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5039) goto errout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5041) if (ac->ac_status == AC_STATUS_FOUND &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5042) ac->ac_o_ex.fe_len >= ac->ac_f_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5043) ext4_mb_pa_free(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5045) if (likely(ac->ac_status == AC_STATUS_FOUND)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5046) *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5047) if (*errp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5048) ext4_discard_allocated_blocks(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5049) goto errout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5050) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5051) block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5052) ar->len = ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5053) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5054) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5055) if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5056) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5057) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5058) * If block allocation fails then the pa allocated above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5059) * needs to be freed here itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5060) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5061) ext4_mb_pa_free(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5062) *errp = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5063) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5065) errout:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5066) if (*errp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5067) ac->ac_b_ex.fe_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5068) ar->len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5069) ext4_mb_show_ac(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5070) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5071) ext4_mb_release_context(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5072) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5073) if (ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5074) kmem_cache_free(ext4_ac_cachep, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5075) if (inquota && ar->len < inquota)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5076) dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5077) if (!ar->len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5078) if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5079) /* release all the reserved blocks if non delalloc */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5080) percpu_counter_sub(&sbi->s_dirtyclusters_counter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5081) reserv_clstrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5084) trace_ext4_allocate_blocks(ar, (unsigned long long)block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5086) return block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5087) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5089) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5090) * We can merge two free data extents only if the physical blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5091) * are contiguous, AND the extents were freed by the same transaction,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5092) * AND the blocks are associated with the same group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5093) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5094) static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5095) struct ext4_free_data *entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5096) struct ext4_free_data *new_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5097) struct rb_root *entry_rb_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5098) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5099) if ((entry->efd_tid != new_entry->efd_tid) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5100) (entry->efd_group != new_entry->efd_group))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5101) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5102) if (entry->efd_start_cluster + entry->efd_count ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5103) new_entry->efd_start_cluster) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5104) new_entry->efd_start_cluster = entry->efd_start_cluster;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5105) new_entry->efd_count += entry->efd_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5106) } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5107) entry->efd_start_cluster) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5108) new_entry->efd_count += entry->efd_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5109) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5110) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5111) spin_lock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5112) list_del(&entry->efd_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5113) spin_unlock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5114) rb_erase(&entry->efd_node, entry_rb_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5115) kmem_cache_free(ext4_free_data_cachep, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5118) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5119) ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5120) struct ext4_free_data *new_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5122) ext4_group_t group = e4b->bd_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5123) ext4_grpblk_t cluster;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5124) ext4_grpblk_t clusters = new_entry->efd_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5125) struct ext4_free_data *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5126) struct ext4_group_info *db = e4b->bd_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5127) struct super_block *sb = e4b->bd_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5128) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5129) struct rb_node **n = &db->bb_free_root.rb_node, *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5130) struct rb_node *parent = NULL, *new_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5132) BUG_ON(!ext4_handle_valid(handle));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5133) BUG_ON(e4b->bd_bitmap_page == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5134) BUG_ON(e4b->bd_buddy_page == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5136) new_node = &new_entry->efd_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5137) cluster = new_entry->efd_start_cluster;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5139) if (!*n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5140) /* first free block exent. We need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5141) protect buddy cache from being freed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5142) * otherwise we'll refresh it from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5143) * on-disk bitmap and lose not-yet-available
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5144) * blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5145) get_page(e4b->bd_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5146) get_page(e4b->bd_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5148) while (*n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5149) parent = *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5150) entry = rb_entry(parent, struct ext4_free_data, efd_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5151) if (cluster < entry->efd_start_cluster)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5152) n = &(*n)->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5153) else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5154) n = &(*n)->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5155) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5156) ext4_grp_locked_error(sb, group, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5157) ext4_group_first_block_no(sb, group) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5158) EXT4_C2B(sbi, cluster),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5159) "Block already on to-be-freed list");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5160) kmem_cache_free(ext4_free_data_cachep, new_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5161) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5165) rb_link_node(new_node, parent, n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5166) rb_insert_color(new_node, &db->bb_free_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5168) /* Now try to see the extent can be merged to left and right */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5169) node = rb_prev(new_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5170) if (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5171) entry = rb_entry(node, struct ext4_free_data, efd_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5172) ext4_try_merge_freed_extent(sbi, entry, new_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5173) &(db->bb_free_root));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5176) node = rb_next(new_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5177) if (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5178) entry = rb_entry(node, struct ext4_free_data, efd_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5179) ext4_try_merge_freed_extent(sbi, entry, new_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5180) &(db->bb_free_root));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5183) spin_lock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5184) list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5185) sbi->s_mb_free_pending += clusters;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5186) spin_unlock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5187) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5190) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5191) * Simple allocator for Ext4 fast commit replay path. It searches for blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5192) * linearly starting at the goal block and also excludes the blocks which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5193) * are going to be in use after fast commit replay.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5194) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5195) static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5196) struct ext4_allocation_request *ar, int *errp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5198) struct buffer_head *bitmap_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5199) struct super_block *sb = ar->inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5200) ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5201) ext4_grpblk_t blkoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5202) ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5203) ext4_grpblk_t i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5204) ext4_fsblk_t goal, block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5205) struct ext4_super_block *es = EXT4_SB(sb)->s_es;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5207) goal = ar->goal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5208) if (goal < le32_to_cpu(es->s_first_data_block) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5209) goal >= ext4_blocks_count(es))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5210) goal = le32_to_cpu(es->s_first_data_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5212) ar->len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5213) ext4_get_group_no_and_offset(sb, goal, &group, &blkoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5214) for (; group < ext4_get_groups_count(sb); group++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5215) bitmap_bh = ext4_read_block_bitmap(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5216) if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5217) *errp = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5218) pr_warn("Failed to read block bitmap\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5219) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5222) ext4_get_group_no_and_offset(sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5223) max(ext4_group_first_block_no(sb, group), goal),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5224) NULL, &blkoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5225) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5226) i = mb_find_next_zero_bit(bitmap_bh->b_data, max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5227) blkoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5228) if (i >= max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5229) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5230) if (ext4_fc_replay_check_excluded(sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5231) ext4_group_first_block_no(sb, group) + i)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5232) blkoff = i + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5233) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5234) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5236) brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5237) if (i < max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5238) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5241) if (group >= ext4_get_groups_count(sb) || i >= max) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5242) *errp = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5243) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5246) block = ext4_group_first_block_no(sb, group) + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5247) ext4_mb_mark_bb(sb, block, 1, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5248) ar->len = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5250) return block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5253) static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5254) unsigned long count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5255) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5256) struct buffer_head *bitmap_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5257) struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5258) struct ext4_group_desc *gdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5259) struct buffer_head *gdp_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5260) ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5261) ext4_grpblk_t blkoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5262) int already_freed = 0, err, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5264) ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5265) bitmap_bh = ext4_read_block_bitmap(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5266) if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5267) err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5268) pr_warn("Failed to read block bitmap\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5269) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5271) gdp = ext4_get_group_desc(sb, group, &gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5272) if (!gdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5273) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5275) for (i = 0; i < count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5276) if (!mb_test_bit(blkoff + i, bitmap_bh->b_data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5277) already_freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5279) mb_clear_bits(bitmap_bh->b_data, blkoff, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5280) err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5281) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5282) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5283) ext4_free_group_clusters_set(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5284) sb, gdp, ext4_free_group_clusters(sb, gdp) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5285) count - already_freed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5286) ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5287) ext4_group_desc_csum_set(sb, group, gdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5288) ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5289) sync_dirty_buffer(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5290) sync_dirty_buffer(gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5291) brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5294) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5295) * ext4_free_blocks() -- Free given blocks and update quota
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5296) * @handle: handle for this transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5297) * @inode: inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5298) * @bh: optional buffer of the block to be freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5299) * @block: starting physical block to be freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5300) * @count: number of blocks to be freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5301) * @flags: flags used by ext4_free_blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5302) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5303) void ext4_free_blocks(handle_t *handle, struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5304) struct buffer_head *bh, ext4_fsblk_t block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5305) unsigned long count, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5306) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5307) struct buffer_head *bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5308) struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5309) struct ext4_group_desc *gdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5310) unsigned int overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5311) ext4_grpblk_t bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5312) struct buffer_head *gd_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5313) ext4_group_t block_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5314) struct ext4_sb_info *sbi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5315) struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5316) unsigned int count_clusters;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5317) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5318) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5320) sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5322) if (sbi->s_mount_state & EXT4_FC_REPLAY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5323) ext4_free_blocks_simple(inode, block, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5324) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5327) might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5328) if (bh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5329) if (block)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5330) BUG_ON(block != bh->b_blocknr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5331) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5332) block = bh->b_blocknr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5335) if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5336) !ext4_inode_block_valid(inode, block, count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5337) ext4_error(sb, "Freeing blocks not in datazone - "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5338) "block = %llu, count = %lu", block, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5339) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5342) ext4_debug("freeing block %llu\n", block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5343) trace_ext4_free_blocks(inode, block, count, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5345) if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5346) BUG_ON(count > 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5348) ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5349) inode, bh, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5352) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5353) * If the extent to be freed does not begin on a cluster
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5354) * boundary, we need to deal with partial clusters at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5355) * beginning and end of the extent. Normally we will free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5356) * blocks at the beginning or the end unless we are explicitly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5357) * requested to avoid doing so.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5358) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5359) overflow = EXT4_PBLK_COFF(sbi, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5360) if (overflow) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5361) if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5362) overflow = sbi->s_cluster_ratio - overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5363) block += overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5364) if (count > overflow)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5365) count -= overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5366) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5367) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5368) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5369) block -= overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5370) count += overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5373) overflow = EXT4_LBLK_COFF(sbi, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5374) if (overflow) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5375) if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5376) if (count > overflow)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5377) count -= overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5378) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5379) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5380) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5381) count += sbi->s_cluster_ratio - overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5384) if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5385) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5386) int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5388) for (i = 0; i < count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5389) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5390) if (is_metadata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5391) bh = sb_find_get_block(inode->i_sb, block + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5392) ext4_forget(handle, is_metadata, inode, bh, block + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5396) do_more:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5397) overflow = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5398) ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5400) if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5401) ext4_get_group_info(sb, block_group))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5402) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5404) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5405) * Check to see if we are freeing blocks across a group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5406) * boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5407) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5408) if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5409) overflow = EXT4_C2B(sbi, bit) + count -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5410) EXT4_BLOCKS_PER_GROUP(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5411) count -= overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5413) count_clusters = EXT4_NUM_B2C(sbi, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5414) bitmap_bh = ext4_read_block_bitmap(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5415) if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5416) err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5417) bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5418) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5419) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5420) gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5421) if (!gdp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5422) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5423) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5426) if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5427) in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5428) in_range(block, ext4_inode_table(sb, gdp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5429) sbi->s_itb_per_group) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5430) in_range(block + count - 1, ext4_inode_table(sb, gdp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5431) sbi->s_itb_per_group)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5433) ext4_error(sb, "Freeing blocks in system zone - "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5434) "Block = %llu, count = %lu", block, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5435) /* err = 0. ext4_std_error should be a no op */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5436) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5439) BUFFER_TRACE(bitmap_bh, "getting write access");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5440) err = ext4_journal_get_write_access(handle, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5441) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5442) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5444) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5445) * We are about to modify some metadata. Call the journal APIs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5446) * to unshare ->b_data if a currently-committing transaction is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5447) * using it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5448) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5449) BUFFER_TRACE(gd_bh, "get_write_access");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5450) err = ext4_journal_get_write_access(handle, gd_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5451) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5452) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5453) #ifdef AGGRESSIVE_CHECK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5455) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5456) for (i = 0; i < count_clusters; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5457) BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5459) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5460) trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5462) /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5463) err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5464) GFP_NOFS|__GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5465) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5466) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5468) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5469) * We need to make sure we don't reuse the freed block until after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5470) * transaction is committed. We make an exception if the inode is to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5471) * written in writeback mode since writeback mode has weak data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5472) * consistency guarantees.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5473) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5474) if (ext4_handle_valid(handle) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5475) ((flags & EXT4_FREE_BLOCKS_METADATA) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5476) !ext4_should_writeback_data(inode))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5477) struct ext4_free_data *new_entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5478) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5479) * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5480) * to fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5481) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5482) new_entry = kmem_cache_alloc(ext4_free_data_cachep,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5483) GFP_NOFS|__GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5484) new_entry->efd_start_cluster = bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5485) new_entry->efd_group = block_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5486) new_entry->efd_count = count_clusters;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5487) new_entry->efd_tid = handle->h_transaction->t_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5489) ext4_lock_group(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5490) mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5491) ext4_mb_free_metadata(handle, &e4b, new_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5492) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5493) /* need to update group_info->bb_free and bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5494) * with group lock held. generate_buddy look at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5495) * them with group lock_held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5496) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5497) if (test_opt(sb, DISCARD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5498) err = ext4_issue_discard(sb, block_group, bit, count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5499) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5500) if (err && err != -EOPNOTSUPP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5501) ext4_msg(sb, KERN_WARNING, "discard request in"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5502) " group:%d block:%d count:%lu failed"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5503) " with %d", block_group, bit, count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5504) err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5505) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5506) EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5508) ext4_lock_group(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5509) mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5510) mb_free_blocks(inode, &e4b, bit, count_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5513) ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5514) ext4_free_group_clusters_set(sb, gdp, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5515) ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5516) ext4_group_desc_csum_set(sb, block_group, gdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5517) ext4_unlock_group(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5519) if (sbi->s_log_groups_per_flex) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5520) ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5521) atomic64_add(count_clusters,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5522) &sbi_array_rcu_deref(sbi, s_flex_groups,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5523) flex_group)->free_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5526) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5527) * on a bigalloc file system, defer the s_freeclusters_counter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5528) * update to the caller (ext4_remove_space and friends) so they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5529) * can determine if a cluster freed here should be rereserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5530) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5531) if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5532) if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5533) dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5534) percpu_counter_add(&sbi->s_freeclusters_counter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5535) count_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5538) ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5540) /* We dirtied the bitmap block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5541) BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5542) err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5544) /* And the group descriptor block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5545) BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5546) ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5547) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5548) err = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5550) if (overflow && !err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5551) block += count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5552) count = overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5553) put_bh(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5554) goto do_more;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5556) error_return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5557) brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5558) ext4_std_error(sb, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5559) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5562) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5563) * ext4_group_add_blocks() -- Add given blocks to an existing group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5564) * @handle: handle to this transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5565) * @sb: super block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5566) * @block: start physical block to add to the block group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5567) * @count: number of blocks to free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5568) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5569) * This marks the blocks as free in the bitmap and buddy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5570) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5571) int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5572) ext4_fsblk_t block, unsigned long count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5573) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5574) struct buffer_head *bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5575) struct buffer_head *gd_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5576) ext4_group_t block_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5577) ext4_grpblk_t bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5578) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5579) struct ext4_group_desc *desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5580) struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5581) struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5582) int err = 0, ret, free_clusters_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5583) ext4_grpblk_t clusters_freed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5584) ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5585) ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5586) unsigned long cluster_count = last_cluster - first_cluster + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5588) ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5590) if (count == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5591) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5593) ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5594) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5595) * Check to see if we are freeing blocks across a group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5596) * boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5597) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5598) if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5599) ext4_warning(sb, "too many blocks added to group %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5600) block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5601) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5602) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5605) bitmap_bh = ext4_read_block_bitmap(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5606) if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5607) err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5608) bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5609) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5612) desc = ext4_get_group_desc(sb, block_group, &gd_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5613) if (!desc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5614) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5615) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5618) if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5619) in_range(ext4_inode_bitmap(sb, desc), block, count) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5620) in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5621) in_range(block + count - 1, ext4_inode_table(sb, desc),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5622) sbi->s_itb_per_group)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5623) ext4_error(sb, "Adding blocks in system zones - "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5624) "Block = %llu, count = %lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5625) block, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5626) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5627) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5630) BUFFER_TRACE(bitmap_bh, "getting write access");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5631) err = ext4_journal_get_write_access(handle, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5632) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5633) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5635) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5636) * We are about to modify some metadata. Call the journal APIs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5637) * to unshare ->b_data if a currently-committing transaction is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5638) * using it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5639) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5640) BUFFER_TRACE(gd_bh, "get_write_access");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5641) err = ext4_journal_get_write_access(handle, gd_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5642) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5643) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5645) for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5646) BUFFER_TRACE(bitmap_bh, "clear bit");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5647) if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5648) ext4_error(sb, "bit already cleared for block %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5649) (ext4_fsblk_t)(block + i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5650) BUFFER_TRACE(bitmap_bh, "bit already cleared");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5651) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5652) clusters_freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5656) err = ext4_mb_load_buddy(sb, block_group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5657) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5658) goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5660) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5661) * need to update group_info->bb_free and bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5662) * with group lock held. generate_buddy look at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5663) * them with group lock_held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5664) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5665) ext4_lock_group(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5666) mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5667) mb_free_blocks(NULL, &e4b, bit, cluster_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5668) free_clusters_count = clusters_freed +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5669) ext4_free_group_clusters(sb, desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5670) ext4_free_group_clusters_set(sb, desc, free_clusters_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5671) ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5672) ext4_group_desc_csum_set(sb, block_group, desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5673) ext4_unlock_group(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5674) percpu_counter_add(&sbi->s_freeclusters_counter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5675) clusters_freed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5677) if (sbi->s_log_groups_per_flex) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5678) ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5679) atomic64_add(clusters_freed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5680) &sbi_array_rcu_deref(sbi, s_flex_groups,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5681) flex_group)->free_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5684) ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5686) /* We dirtied the bitmap block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5687) BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5688) err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5690) /* And the group descriptor block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5691) BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5692) ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5693) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5694) err = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5696) error_return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5697) brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5698) ext4_std_error(sb, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5699) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5702) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5703) * ext4_trim_extent -- function to TRIM one single free extent in the group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5704) * @sb: super block for the file system
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5705) * @start: starting block of the free extent in the alloc. group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5706) * @count: number of blocks to TRIM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5707) * @group: alloc. group we are working with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5708) * @e4b: ext4 buddy for the group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5709) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5710) * Trim "count" blocks starting at "start" in the "group". To assure that no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5711) * one will allocate those blocks, mark it as used in buddy bitmap. This must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5712) * be called with under the group lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5713) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5714) static int ext4_trim_extent(struct super_block *sb, int start, int count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5715) ext4_group_t group, struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5716) __releases(bitlock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5717) __acquires(bitlock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5718) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5719) struct ext4_free_extent ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5720) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5722) trace_ext4_trim_extent(sb, group, start, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5724) assert_spin_locked(ext4_group_lock_ptr(sb, group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5726) ex.fe_start = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5727) ex.fe_group = group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5728) ex.fe_len = count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5730) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5731) * Mark blocks used, so no one can reuse them while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5732) * being trimmed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5733) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5734) mb_mark_used(e4b, &ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5735) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5736) ret = ext4_issue_discard(sb, group, start, count, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5737) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5738) mb_free_blocks(NULL, e4b, start, ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5739) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5742) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5743) * ext4_trim_all_free -- function to trim all free space in alloc. group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5744) * @sb: super block for file system
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5745) * @group: group to be trimmed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5746) * @start: first group block to examine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5747) * @max: last group block to examine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5748) * @minblocks: minimum extent block count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5749) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5750) * ext4_trim_all_free walks through group's buddy bitmap searching for free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5751) * extents. When the free block is found, ext4_trim_extent is called to TRIM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5752) * the extent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5753) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5754) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5755) * ext4_trim_all_free walks through group's block bitmap searching for free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5756) * extents. When the free extent is found, mark it as used in group buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5757) * bitmap. Then issue a TRIM command on this extent and free the extent in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5758) * the group buddy bitmap. This is done until whole group is scanned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5759) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5760) static ext4_grpblk_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5761) ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5762) ext4_grpblk_t start, ext4_grpblk_t max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5763) ext4_grpblk_t minblocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5765) void *bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5766) ext4_grpblk_t next, count = 0, free_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5767) struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5768) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5770) trace_ext4_trim_all_free(sb, group, start, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5772) ret = ext4_mb_load_buddy(sb, group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5773) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5774) ext4_warning(sb, "Error %d loading buddy information for %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5775) ret, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5776) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5778) bitmap = e4b.bd_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5780) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5781) if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5782) minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5783) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5785) start = (e4b.bd_info->bb_first_free > start) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5786) e4b.bd_info->bb_first_free : start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5788) while (start <= max) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5789) start = mb_find_next_zero_bit(bitmap, max + 1, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5790) if (start > max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5791) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5792) next = mb_find_next_bit(bitmap, max + 1, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5794) if ((next - start) >= minblocks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5795) ret = ext4_trim_extent(sb, start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5796) next - start, group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5797) if (ret && ret != -EOPNOTSUPP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5798) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5799) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5800) count += next - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5802) free_count += next - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5803) start = next + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5805) if (fatal_signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5806) count = -ERESTARTSYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5807) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5810) if (need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5811) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5812) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5813) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5816) if ((e4b.bd_info->bb_free - free_count) < minblocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5817) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5818) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5820) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5821) ret = count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5822) EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5824) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5825) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5826) ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5828) ext4_debug("trimmed %d blocks in the group %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5829) count, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5831) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5834) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5835) * ext4_trim_fs() -- trim ioctl handle function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5836) * @sb: superblock for filesystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5837) * @range: fstrim_range structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5838) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5839) * start: First Byte to trim
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5840) * len: number of Bytes to trim from start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5841) * minlen: minimum extent length in Bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5842) * ext4_trim_fs goes through all allocation groups containing Bytes from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5843) * start to start+len. For each such a group ext4_trim_all_free function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5844) * is invoked to trim all free space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5845) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5846) int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5847) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5848) struct request_queue *q = bdev_get_queue(sb->s_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5849) struct ext4_group_info *grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5850) ext4_group_t group, first_group, last_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5851) ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5852) uint64_t start, end, minlen, trimmed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5853) ext4_fsblk_t first_data_blk =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5854) le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5855) ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5856) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5858) start = range->start >> sb->s_blocksize_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5859) end = start + (range->len >> sb->s_blocksize_bits) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5860) minlen = EXT4_NUM_B2C(EXT4_SB(sb),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5861) range->minlen >> sb->s_blocksize_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5863) if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5864) start >= max_blks ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5865) range->len < sb->s_blocksize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5866) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5867) /* No point to try to trim less than discard granularity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5868) if (range->minlen < q->limits.discard_granularity) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5869) minlen = EXT4_NUM_B2C(EXT4_SB(sb),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5870) q->limits.discard_granularity >> sb->s_blocksize_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5871) if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5872) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5874) if (end >= max_blks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5875) end = max_blks - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5876) if (end <= first_data_blk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5877) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5878) if (start < first_data_blk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5879) start = first_data_blk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5881) /* Determine first and last group to examine based on start and end */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5882) ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5883) &first_group, &first_cluster);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5884) ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5885) &last_group, &last_cluster);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5887) /* end now represents the last cluster to discard in this group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5888) end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5890) for (group = first_group; group <= last_group; group++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5891) grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5892) /* We only do this if the grp has never been initialized */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5893) if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5894) ret = ext4_mb_init_group(sb, group, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5895) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5896) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5897) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5899) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5900) * For all the groups except the last one, last cluster will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5901) * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5902) * change it for the last group, note that last_cluster is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5903) * already computed earlier by ext4_get_group_no_and_offset()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5904) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5905) if (group == last_group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5906) end = last_cluster;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5908) if (grp->bb_free >= minlen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5909) cnt = ext4_trim_all_free(sb, group, first_cluster,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5910) end, minlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5911) if (cnt < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5912) ret = cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5913) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5914) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5915) trimmed += cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5918) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5919) * For every group except the first one, we are sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5920) * that the first cluster to discard will be cluster #0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5921) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5922) first_cluster = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5925) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5926) atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5928) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5929) range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5930) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5931) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5933) /* Iterate all the free extents in the group. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5934) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5935) ext4_mballoc_query_range(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5936) struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5937) ext4_group_t group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5938) ext4_grpblk_t start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5939) ext4_grpblk_t end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5940) ext4_mballoc_query_range_fn formatter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5941) void *priv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5942) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5943) void *bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5944) ext4_grpblk_t next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5945) struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5946) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5948) error = ext4_mb_load_buddy(sb, group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5949) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5950) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5951) bitmap = e4b.bd_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5953) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5955) start = (e4b.bd_info->bb_first_free > start) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5956) e4b.bd_info->bb_first_free : start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5957) if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5958) end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5960) while (start <= end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5961) start = mb_find_next_zero_bit(bitmap, end + 1, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5962) if (start > end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5963) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5964) next = mb_find_next_bit(bitmap, end + 1, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5966) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5967) error = formatter(sb, group, start, next - start, priv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5968) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5969) goto out_unload;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5970) ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5972) start = next + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5973) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5975) ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5976) out_unload:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5977) ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5979) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5980) }