Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  * Written by Alex Tomas <alex@clusterfs.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  * mballoc.c contains the multiblocks allocation routines
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include "ext4_jbd2.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include "mballoc.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/log2.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/nospec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <trace/events/ext4.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22)  * MUSTDO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23)  *   - test ext4_ext_search_left() and ext4_ext_search_right()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24)  *   - search for metadata in few groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26)  * TODO v4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27)  *   - normalization should take into account whether file is still open
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28)  *   - discard preallocations if no free space left (policy?)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29)  *   - don't normalize tails
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30)  *   - quota
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31)  *   - reservation for superuser
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33)  * TODO v3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34)  *   - bitmap read-ahead (proposed by Oleg Drokin aka green)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35)  *   - track min/max extents in each group for better group selection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36)  *   - mb_mark_used() may allocate chunk right after splitting buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37)  *   - tree of groups sorted by number of free blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38)  *   - error handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42)  * The allocation request involve request for multiple number of blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43)  * near to the goal(block) value specified.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45)  * During initialization phase of the allocator we decide to use the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46)  * group preallocation or inode preallocation depending on the size of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47)  * the file. The size of the file could be the resulting file size we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48)  * would have after allocation, or the current file size, which ever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49)  * is larger. If the size is less than sbi->s_mb_stream_request we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50)  * select to use the group preallocation. The default value of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51)  * s_mb_stream_request is 16 blocks. This can also be tuned via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52)  * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53)  * terms of number of blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55)  * The main motivation for having small file use group preallocation is to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56)  * ensure that we have small files closer together on the disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58)  * First stage the allocator looks at the inode prealloc list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59)  * ext4_inode_info->i_prealloc_list, which contains list of prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60)  * spaces for this particular inode. The inode prealloc space is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61)  * represented as:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63)  * pa_lstart -> the logical start block for this prealloc space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64)  * pa_pstart -> the physical start block for this prealloc space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65)  * pa_len    -> length for this prealloc space (in clusters)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66)  * pa_free   ->  free space available in this prealloc space (in clusters)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68)  * The inode preallocation space is used looking at the _logical_ start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69)  * block. If only the logical file block falls within the range of prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70)  * space we will consume the particular prealloc space. This makes sure that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71)  * we have contiguous physical blocks representing the file blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73)  * The important thing to be noted in case of inode prealloc space is that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74)  * we don't modify the values associated to inode prealloc space except
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75)  * pa_free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77)  * If we are not able to find blocks in the inode prealloc space and if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78)  * have the group allocation flag set then we look at the locality group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79)  * prealloc space. These are per CPU prealloc list represented as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81)  * ext4_sb_info.s_locality_groups[smp_processor_id()]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83)  * The reason for having a per cpu locality group is to reduce the contention
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84)  * between CPUs. It is possible to get scheduled at this point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86)  * The locality group prealloc space is used looking at whether we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87)  * enough free space (pa_free) within the prealloc space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89)  * If we can't allocate blocks via inode prealloc or/and locality group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90)  * prealloc then we look at the buddy cache. The buddy cache is represented
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91)  * by ext4_sb_info.s_buddy_cache (struct inode) whose file offset gets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92)  * mapped to the buddy and bitmap information regarding different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93)  * groups. The buddy information is attached to buddy cache inode so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94)  * we can access them through the page cache. The information regarding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95)  * each group is loaded via ext4_mb_load_buddy.  The information involve
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96)  * block bitmap and buddy information. The information are stored in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97)  * inode as:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99)  *  {                        page                        }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100)  *  [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103)  * one block each for bitmap and buddy information.  So for each group we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104)  * take up 2 blocks. A page can contain blocks_per_page (PAGE_SIZE /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105)  * blocksize) blocks.  So it can have information regarding groups_per_page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106)  * which is blocks_per_page/2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108)  * The buddy cache inode is not stored on disk. The inode is thrown
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109)  * away when the filesystem is unmounted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111)  * We look for count number of blocks in the buddy cache. If we were able
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112)  * to locate that many free blocks we return with additional information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113)  * regarding rest of the contiguous physical block available
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115)  * Before allocating blocks via buddy cache we normalize the request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116)  * blocks. This ensure we ask for more blocks that we needed. The extra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117)  * blocks that we get after allocation is added to the respective prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118)  * list. In case of inode preallocation we follow a list of heuristics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119)  * based on file size. This can be found in ext4_mb_normalize_request. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120)  * we are doing a group prealloc we try to normalize the request to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121)  * sbi->s_mb_group_prealloc.  The default value of s_mb_group_prealloc is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122)  * dependent on the cluster size; for non-bigalloc file systems, it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123)  * 512 blocks. This can be tuned via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124)  * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125)  * terms of number of blocks. If we have mounted the file system with -O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126)  * stripe=<value> option the group prealloc request is normalized to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127)  * smallest multiple of the stripe value (sbi->s_stripe) which is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128)  * greater than the default mb_group_prealloc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130)  * The regular allocator (using the buddy cache) supports a few tunables.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132)  * /sys/fs/ext4/<partition>/mb_min_to_scan
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133)  * /sys/fs/ext4/<partition>/mb_max_to_scan
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134)  * /sys/fs/ext4/<partition>/mb_order2_req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136)  * The regular allocator uses buddy scan only if the request len is power of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137)  * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138)  * value of s_mb_order2_reqs can be tuned via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139)  * /sys/fs/ext4/<partition>/mb_order2_req.  If the request len is equal to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140)  * stripe size (sbi->s_stripe), we try to search for contiguous block in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141)  * stripe size. This should result in better allocation on RAID setups. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142)  * not, we search in the specific group using bitmap for best extents. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143)  * tunable min_to_scan and max_to_scan control the behaviour here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144)  * min_to_scan indicate how long the mballoc __must__ look for a best
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145)  * extent and max_to_scan indicates how long the mballoc __can__ look for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146)  * best extent in the found extents. Searching for the blocks starts with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147)  * the group specified as the goal value in allocation context via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148)  * ac_g_ex. Each group is first checked based on the criteria whether it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149)  * can be used for allocation. ext4_mb_good_group explains how the groups are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150)  * checked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152)  * Both the prealloc space are getting populated as above. So for the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153)  * request we will hit the buddy cache which will result in this prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154)  * space getting filled. The prealloc space is then later used for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155)  * subsequent request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159)  * mballoc operates on the following data:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160)  *  - on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161)  *  - in-core buddy (actually includes buddy and bitmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162)  *  - preallocation descriptors (PAs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164)  * there are two types of preallocations:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165)  *  - inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166)  *    assiged to specific inode and can be used for this inode only.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167)  *    it describes part of inode's space preallocated to specific
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168)  *    physical blocks. any block from that preallocated can be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169)  *    independent. the descriptor just tracks number of blocks left
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170)  *    unused. so, before taking some block from descriptor, one must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171)  *    make sure corresponded logical block isn't allocated yet. this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172)  *    also means that freeing any block within descriptor's range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173)  *    must discard all preallocated blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174)  *  - locality group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175)  *    assigned to specific locality group which does not translate to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176)  *    permanent set of inodes: inode can join and leave group. space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177)  *    from this type of preallocation can be used for any inode. thus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178)  *    it's consumed from the beginning to the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180)  * relation between them can be expressed as:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181)  *    in-core buddy = on-disk bitmap + preallocation descriptors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183)  * this mean blocks mballoc considers used are:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184)  *  - allocated blocks (persistent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185)  *  - preallocated blocks (non-persistent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187)  * consistency in mballoc world means that at any time a block is either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188)  * free or used in ALL structures. notice: "any time" should not be read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189)  * literally -- time is discrete and delimited by locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191)  *  to keep it simple, we don't use block numbers, instead we count number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192)  *  blocks: how many blocks marked used/free in on-disk bitmap, buddy and PA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194)  * all operations can be expressed as:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195)  *  - init buddy:			buddy = on-disk + PAs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196)  *  - new PA:				buddy += N; PA = N
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197)  *  - use inode PA:			on-disk += N; PA -= N
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198)  *  - discard inode PA			buddy -= on-disk - PA; PA = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199)  *  - use locality group PA		on-disk += N; PA -= N
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200)  *  - discard locality group PA		buddy -= PA; PA = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201)  *  note: 'buddy -= on-disk - PA' is used to show that on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202)  *        is used in real operation because we can't know actual used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203)  *        bits from PA, only from on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205)  * if we follow this strict logic, then all operations above should be atomic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206)  * given some of them can block, we'd have to use something like semaphores
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207)  * killing performance on high-end SMP hardware. let's try to relax it using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208)  * the following knowledge:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209)  *  1) if buddy is referenced, it's already initialized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210)  *  2) while block is used in buddy and the buddy is referenced,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211)  *     nobody can re-allocate that block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212)  *  3) we work on bitmaps and '+' actually means 'set bits'. if on-disk has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213)  *     bit set and PA claims same block, it's OK. IOW, one can set bit in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214)  *     on-disk bitmap if buddy has same bit set or/and PA covers corresponded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215)  *     block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217)  * so, now we're building a concurrency table:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218)  *  - init buddy vs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219)  *    - new PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220)  *      blocks for PA are allocated in the buddy, buddy must be referenced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221)  *      until PA is linked to allocation group to avoid concurrent buddy init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222)  *    - use inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223)  *      we need to make sure that either on-disk bitmap or PA has uptodate data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224)  *      given (3) we care that PA-=N operation doesn't interfere with init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225)  *    - discard inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226)  *      the simplest way would be to have buddy initialized by the discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227)  *    - use locality group PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228)  *      again PA-=N must be serialized with init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229)  *    - discard locality group PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230)  *      the simplest way would be to have buddy initialized by the discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231)  *  - new PA vs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232)  *    - use inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233)  *      i_data_sem serializes them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234)  *    - discard inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235)  *      discard process must wait until PA isn't used by another process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236)  *    - use locality group PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237)  *      some mutex should serialize them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238)  *    - discard locality group PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239)  *      discard process must wait until PA isn't used by another process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240)  *  - use inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241)  *    - use inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242)  *      i_data_sem or another mutex should serializes them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243)  *    - discard inode PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244)  *      discard process must wait until PA isn't used by another process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245)  *    - use locality group PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246)  *      nothing wrong here -- they're different PAs covering different blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247)  *    - discard locality group PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248)  *      discard process must wait until PA isn't used by another process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250)  * now we're ready to make few consequences:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251)  *  - PA is referenced and while it is no discard is possible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252)  *  - PA is referenced until block isn't marked in on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253)  *  - PA changes only after on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254)  *  - discard must not compete with init. either init is done before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255)  *    any discard or they're serialized somehow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256)  *  - buddy init as sum of on-disk bitmap and PAs is done atomically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258)  * a special case when we've used PA to emptiness. no need to modify buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259)  * in this case, but we should care about concurrent init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263)  /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264)  * Logic in few words:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266)  *  - allocation:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267)  *    load group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268)  *    find blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269)  *    mark bits in on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270)  *    release group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272)  *  - use preallocation:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273)  *    find proper PA (per-inode or group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274)  *    load group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275)  *    mark bits in on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276)  *    release group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277)  *    release PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279)  *  - free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280)  *    load group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281)  *    mark bits in on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282)  *    release group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284)  *  - discard preallocations in group:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285)  *    mark PAs deleted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286)  *    move them onto local list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287)  *    load on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288)  *    load group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289)  *    remove PA from object (inode or locality group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290)  *    mark free blocks in-core
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292)  *  - discard inode's preallocations:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296)  * Locking rules
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298)  * Locks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299)  *  - bitlock on a group	(group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300)  *  - object (inode/locality)	(object)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301)  *  - per-pa lock		(pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303)  * Paths:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304)  *  - new pa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305)  *    object
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306)  *    group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308)  *  - find and use pa:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309)  *    pa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311)  *  - release consumed pa:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312)  *    pa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313)  *    group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314)  *    object
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316)  *  - generate in-core bitmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317)  *    group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318)  *        pa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320)  *  - discard all for given object (inode, locality group):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321)  *    object
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322)  *        pa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323)  *    group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325)  *  - discard all for given group:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326)  *    group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327)  *        pa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328)  *    group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329)  *        object
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) static struct kmem_cache *ext4_pspace_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) static struct kmem_cache *ext4_ac_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) static struct kmem_cache *ext4_free_data_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) /* We create slab caches for groupinfo data structures based on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337)  * superblock block size.  There will be one per mounted filesystem for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338)  * each unique s_blocksize_bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) #define NR_GRPINFO_CACHES 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 	"ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 	"ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 	"ext4_groupinfo_64k", "ext4_groupinfo_128k"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 					ext4_group_t group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 						ext4_group_t group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355)  * The algorithm using this percpu seq counter goes below:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356)  * 1. We sample the percpu discard_pa_seq counter before trying for block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357)  *    allocation in ext4_mb_new_blocks().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358)  * 2. We increment this percpu discard_pa_seq counter when we either allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359)  *    or free these blocks i.e. while marking those blocks as used/free in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360)  *    mb_mark_used()/mb_free_blocks().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361)  * 3. We also increment this percpu seq counter when we successfully identify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362)  *    that the bb_prealloc_list is not empty and hence proceed for discarding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363)  *    of those PAs inside ext4_mb_discard_group_preallocations().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365)  * Now to make sure that the regular fast path of block allocation is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366)  * affected, as a small optimization we only sample the percpu seq counter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367)  * on that cpu. Only when the block allocation fails and when freed blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368)  * found were 0, that is when we sample percpu seq counter for all cpus using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369)  * below function ext4_get_discard_pa_seq_sum(). This happens after making
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370)  * sure that all the PAs on grp->bb_prealloc_list got freed or if it's empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) static DEFINE_PER_CPU(u64, discard_pa_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) static inline u64 ext4_get_discard_pa_seq_sum(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 	int __cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 	u64 __seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	for_each_possible_cpu(__cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 		__seq += per_cpu(discard_pa_seq, __cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	return __seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) #if BITS_PER_LONG == 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 	*bit += ((unsigned long) addr & 7UL) << 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 	addr = (void *) ((unsigned long) addr & ~7UL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) #elif BITS_PER_LONG == 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 	*bit += ((unsigned long) addr & 3UL) << 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	addr = (void *) ((unsigned long) addr & ~3UL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) #error "how many bits you are?!"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) static inline int mb_test_bit(int bit, void *addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 	 * ext4_test_bit on architecture like powerpc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 	 * needs unsigned long aligned address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 	addr = mb_correct_addr_and_bit(&bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	return ext4_test_bit(bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) static inline void mb_set_bit(int bit, void *addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 	addr = mb_correct_addr_and_bit(&bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 	ext4_set_bit(bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) static inline void mb_clear_bit(int bit, void *addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 	addr = mb_correct_addr_and_bit(&bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 	ext4_clear_bit(bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) static inline int mb_test_and_clear_bit(int bit, void *addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	addr = mb_correct_addr_and_bit(&bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	return ext4_test_and_clear_bit(bit, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) static inline int mb_find_next_zero_bit(void *addr, int max, int start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	int fix = 0, ret, tmpmax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 	addr = mb_correct_addr_and_bit(&fix, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 	tmpmax = max + fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	start += fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 	ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 	if (ret > max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 		return max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) static inline int mb_find_next_bit(void *addr, int max, int start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	int fix = 0, ret, tmpmax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	addr = mb_correct_addr_and_bit(&fix, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 	tmpmax = max + fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	start += fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 	ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	if (ret > max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 		return max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	char *bb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 	BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	BUG_ON(max == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 	if (order > e4b->bd_blkbits + 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 		*max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	/* at order 0 we see each particular block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 	if (order == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 		*max = 1 << (e4b->bd_blkbits + 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 		return e4b->bd_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 	bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 	*max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	return bb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) #ifdef DOUBLE_CHECK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 			   int first, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	struct super_block *sb = e4b->bd_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 	for (i = 0; i < count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 		if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 			ext4_fsblk_t blocknr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 			blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 			ext4_grp_locked_error(sb, e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 					      inode ? inode->i_ino : 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 					      blocknr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 					      "freeing block already freed "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 					      "(bit %u)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 					      first + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 		mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	for (i = 0; i < count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 		mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 		unsigned char *b1, *b2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 		int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 		b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 		b2 = (unsigned char *) bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 		for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 			if (b1[i] != b2[i]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 				ext4_msg(e4b->bd_sb, KERN_ERR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 					 "corruption in group %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 					 "at byte %u(%u): %x in copy != %x "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 					 "on disk/prealloc",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 					 e4b->bd_group, i, i * 8, b1[i], b2[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 				BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) static void mb_group_bb_bitmap_alloc(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 			struct ext4_group_info *grp, ext4_group_t group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 	struct buffer_head *bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 	grp->bb_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	if (!grp->bb_bitmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 	bh = ext4_read_block_bitmap(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	if (IS_ERR_OR_NULL(bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 		kfree(grp->bb_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 		grp->bb_bitmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 	memcpy(grp->bb_bitmap, bh->b_data, sb->s_blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 	put_bh(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) static void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 	kfree(grp->bb_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) static inline void mb_free_blocks_double(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 				struct ext4_buddy *e4b, int first, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) static inline void mb_mark_used_double(struct ext4_buddy *e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 						int first, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) static inline void mb_group_bb_bitmap_alloc(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 			struct ext4_group_info *grp, ext4_group_t group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) static inline void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) #ifdef AGGRESSIVE_CHECK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) #define MB_CHECK_ASSERT(assert)						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) do {									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 	if (!(assert)) {						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 		printk(KERN_EMERG					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 			"Assertion failure in %s() at %s:%d: \"%s\"\n",	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 			function, file, line, # assert);		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 		BUG();							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 	}								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 				const char *function, int line)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 	struct super_block *sb = e4b->bd_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 	int order = e4b->bd_blkbits + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 	int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	int max2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	int k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 	int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	struct ext4_group_info *grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 	int fragments = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	int fstart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 	struct list_head *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	void *buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	void *buddy2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	if (e4b->bd_info->bb_check_counter++ % 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	while (order > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 		buddy = mb_find_buddy(e4b, order, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 		MB_CHECK_ASSERT(buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 		buddy2 = mb_find_buddy(e4b, order - 1, &max2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 		MB_CHECK_ASSERT(buddy2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 		MB_CHECK_ASSERT(buddy != buddy2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 		MB_CHECK_ASSERT(max * 2 == max2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 		count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 		for (i = 0; i < max; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 			if (mb_test_bit(i, buddy)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 				/* only single bit in buddy2 may be 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 				if (!mb_test_bit(i << 1, buddy2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 					MB_CHECK_ASSERT(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 						mb_test_bit((i<<1)+1, buddy2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 				} else if (!mb_test_bit((i << 1) + 1, buddy2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 					MB_CHECK_ASSERT(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 						mb_test_bit(i << 1, buddy2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 			/* both bits in buddy2 must be 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 			MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 			MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 			for (j = 0; j < (1 << order); j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 				k = (i * (1 << order)) + j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 				MB_CHECK_ASSERT(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 					!mb_test_bit(k, e4b->bd_bitmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 			count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 		MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 		order--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 	fstart = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 	buddy = mb_find_buddy(e4b, 0, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	for (i = 0; i < max; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 		if (!mb_test_bit(i, buddy)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 			MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 			if (fstart == -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 				fragments++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 				fstart = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 		fstart = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 		/* check used bits only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 		for (j = 0; j < e4b->bd_blkbits + 1; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 			buddy2 = mb_find_buddy(e4b, j, &max2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 			k = i >> j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 			MB_CHECK_ASSERT(k < max2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 			MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 	MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 	MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	grp = ext4_get_group_info(sb, e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	list_for_each(cur, &grp->bb_prealloc_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 		ext4_group_t groupnr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 		struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 		ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 		MB_CHECK_ASSERT(groupnr == e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 		for (i = 0; i < pa->pa_len; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 			MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) #undef MB_CHECK_ASSERT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) #define mb_check_buddy(e4b) __mb_check_buddy(e4b,	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 					__FILE__, __func__, __LINE__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) #define mb_check_buddy(e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706)  * Divide blocks started from @first with length @len into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707)  * smaller chunks with power of 2 blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708)  * Clear the bits in bitmap which the blocks of the chunk(s) covered,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709)  * then increase bb_counters[] for corresponded chunk size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) static void ext4_mb_mark_free_simple(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 				void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 					struct ext4_group_info *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	ext4_grpblk_t min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 	ext4_grpblk_t max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	ext4_grpblk_t chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 	unsigned int border;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 	BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 	border = 2 << sb->s_blocksize_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 	while (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 		/* find how many blocks can be covered since this position */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 		max = ffs(first | border) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 		/* find how many blocks of power 2 we need to mark */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 		min = fls(len) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 		if (max < min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 			min = max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 		chunk = 1 << min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 		/* mark multiblock chunks only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 		grp->bb_counters[min]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 		if (min > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 			mb_clear_bit(first >> min,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 				     buddy + sbi->s_mb_offsets[min]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 		len -= chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 		first += chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748)  * Cache the order of the largest free extent we have available in this block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749)  * group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	int bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	grp->bb_largest_free_order = -1; /* uninit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 	bits = sb->s_blocksize_bits + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	for (i = bits; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 		if (grp->bb_counters[i] > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 			grp->bb_largest_free_order = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) void ext4_mb_generate_buddy(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 				void *buddy, void *bitmap, ext4_group_t group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 	ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 	ext4_grpblk_t i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 	ext4_grpblk_t first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 	ext4_grpblk_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	unsigned free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 	unsigned fragments = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 	unsigned long long period = get_cycles();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 	/* initialize buddy from bitmap which is aggregation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 	 * of on-disk bitmap and preallocations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 	i = mb_find_next_zero_bit(bitmap, max, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	grp->bb_first_free = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	while (i < max) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 		fragments++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 		first = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 		i = mb_find_next_bit(bitmap, max, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 		len = i - first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 		free += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 		if (len > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 			ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 			grp->bb_counters[0]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 		if (i < max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 			i = mb_find_next_zero_bit(bitmap, max, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 	grp->bb_fragments = fragments;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	if (free != grp->bb_free) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 		ext4_grp_locked_error(sb, group, 0, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 				      "block bitmap and bg descriptor "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 				      "inconsistent: %u vs %u free clusters",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 				      free, grp->bb_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 		 * If we intend to continue, we consider group descriptor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 		 * corrupt and update bb_free using bitmap value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 		grp->bb_free = free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 		ext4_mark_group_bitmap_corrupted(sb, group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 	mb_set_largest_free_order(sb, grp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 	clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	period = get_cycles() - period;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	spin_lock(&sbi->s_bal_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 	sbi->s_mb_buddies_generated++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 	sbi->s_mb_generation_time += period;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 	spin_unlock(&sbi->s_bal_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) static void mb_regenerate_buddy(struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 	int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	int order = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 	void *buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 	while ((buddy = mb_find_buddy(e4b, order++, &count))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 		ext4_set_bits(buddy, 0, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 	e4b->bd_info->bb_fragments = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 	memset(e4b->bd_info->bb_counters, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 		sizeof(*e4b->bd_info->bb_counters) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 		(e4b->bd_sb->s_blocksize_bits + 2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 	ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 		e4b->bd_bitmap, e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) /* The buddy information is attached the buddy cache inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844)  * for convenience. The information regarding each group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845)  * is loaded via ext4_mb_load_buddy. The information involve
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846)  * block bitmap and buddy information. The information are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847)  * stored in the inode as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849)  * {                        page                        }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850)  * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853)  * one block each for bitmap and buddy information.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854)  * So for each group we take up 2 blocks. A page can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855)  * contain blocks_per_page (PAGE_SIZE / blocksize)  blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856)  * So it can have information regarding groups_per_page which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857)  * is blocks_per_page/2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859)  * Locking note:  This routine takes the block group lock of all groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860)  * for this page; do not hold this lock when calling this routine!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	ext4_group_t ngroups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	int blocksize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	int blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	int groups_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	ext4_group_t first_group, group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 	int first_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 	struct super_block *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	struct buffer_head *bhs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 	struct buffer_head **bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 	struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	char *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 	char *bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 	struct ext4_group_info *grinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 	inode = page->mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 	ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 	blocksize = i_blocksize(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 	blocks_per_page = PAGE_SIZE / blocksize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 	mb_debug(sb, "init page %lu\n", page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	groups_per_page = blocks_per_page >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	if (groups_per_page == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		groups_per_page = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	/* allocate buffer_heads to read bitmaps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 	if (groups_per_page > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 		i = sizeof(struct buffer_head *) * groups_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 		bh = kzalloc(i, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 		if (bh == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 			err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 		bh = &bhs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	first_group = page->index * blocks_per_page / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	/* read all groups the page covers into the cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 		if (group >= ngroups)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 		grinfo = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 		 * If page is uptodate then we came here after online resize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 		 * which added some new uninitialized group info structs, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 		 * we must skip all initialized uptodate buddies on the page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 		 * which may be currently in use by an allocating task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 		if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 			bh[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 		bh[i] = ext4_read_block_bitmap_nowait(sb, group, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 		if (IS_ERR(bh[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 			err = PTR_ERR(bh[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 			bh[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 		mb_debug(sb, "read bitmap for group %u\n", group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	/* wait for I/O completion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 		int err2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 		if (!bh[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 		err2 = ext4_wait_block_bitmap(sb, group, bh[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 		if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 			err = err2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 	first_block = page->index * blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 	for (i = 0; i < blocks_per_page; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 		group = (first_block + i) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 		if (group >= ngroups)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 		if (!bh[group - first_group])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 			/* skip initialized uptodate buddy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 		if (!buffer_verified(bh[group - first_group]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 			/* Skip faulty bitmaps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 		err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 		 * data carry information regarding this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 		 * particular group in the format specified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 		 * above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 		data = page_address(page) + (i * blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 		bitmap = bh[group - first_group]->b_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 		 * We place the buddy block and bitmap block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 		 * close together
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 		if ((first_block + i) & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 			/* this is block of buddy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 			BUG_ON(incore == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 			mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 				group, page->index, i * blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 			trace_ext4_mb_buddy_bitmap_load(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 			grinfo = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 			grinfo->bb_fragments = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 			memset(grinfo->bb_counters, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 			       sizeof(*grinfo->bb_counters) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 				(sb->s_blocksize_bits+2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 			 * incore got set to the group block bitmap below
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 			ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 			/* init the buddy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 			memset(data, 0xff, blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 			ext4_mb_generate_buddy(sb, data, incore, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 			ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 			incore = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 			/* this is block of bitmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 			BUG_ON(incore != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 			mb_debug(sb, "put bitmap for group %u in page %lu/%x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 				group, page->index, i * blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 			trace_ext4_mb_bitmap_load(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 			/* see comments in ext4_mb_put_pa() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 			ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 			memcpy(data, bitmap, blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 			/* mark all preallocated blks used in in-core bitmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 			ext4_mb_generate_from_pa(sb, data, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 			ext4_mb_generate_from_freelist(sb, data, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 			ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 			/* set incore so that the buddy information can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 			 * generated using this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 			incore = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 	SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 	if (bh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 		for (i = 0; i < groups_per_page; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 			brelse(bh[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 		if (bh != &bhs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 			kfree(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)  * Lock the buddy and bitmap pages. This make sure other parallel init_group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026)  * on the same buddy page doesn't happen whild holding the buddy page lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)  * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)  * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 		ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 	struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 	int block, pnum, poff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 	int blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	e4b->bd_buddy_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	e4b->bd_bitmap_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 	blocks_per_page = PAGE_SIZE / sb->s_blocksize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 	 * the buddy cache inode stores the block bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	 * and buddy information in consecutive blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 	 * So for each group we need two blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 	block = group * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 	pnum = block / blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 	poff = block % blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 	page = find_or_create_page(inode->i_mapping, pnum, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 	BUG_ON(page->mapping != inode->i_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	e4b->bd_bitmap_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 	e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 	if (blocks_per_page >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 		/* buddy and bitmap are on the same page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	block++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	pnum = block / blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 	page = find_or_create_page(inode->i_mapping, pnum, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 	BUG_ON(page->mapping != inode->i_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	e4b->bd_buddy_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 	if (e4b->bd_bitmap_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 		unlock_page(e4b->bd_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 		put_page(e4b->bd_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 	if (e4b->bd_buddy_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 		unlock_page(e4b->bd_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 		put_page(e4b->bd_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)  * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086)  * block group lock of all groups for this page; do not hold the BG lock when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087)  * calling this routine!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	struct ext4_group_info *this_grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 	struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 	might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 	mb_debug(sb, "init group %u\n", group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	this_grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 	 * This ensures that we don't reinit the buddy cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 	 * page which map to the group from which we are already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 	 * allocating. If we are looking at the buddy cache we would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 	 * have taken a reference using ext4_mb_load_buddy and that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 	 * would have pinned buddy page to page cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 	 * The call to ext4_mb_get_buddy_page_lock will mark the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 	 * page accessed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 	ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 		 * somebody initialized the group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 		 * return without doing anything
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	page = e4b.bd_bitmap_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 	ret = ext4_mb_init_cache(page, NULL, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 	if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 		ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 	if (e4b.bd_buddy_page == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 		 * If both the bitmap and buddy are in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 		 * the same page we don't need to force
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 		 * init the buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	/* init buddy cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	page = e4b.bd_buddy_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 	ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 	if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 		ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 	ext4_mb_put_buddy_page_lock(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152)  * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153)  * block group lock of all groups for this page; do not hold the BG lock when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154)  * calling this routine!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 		       struct ext4_buddy *e4b, gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 	int blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 	int block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	int pnum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	int poff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 	struct ext4_group_info *grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	struct inode *inode = sbi->s_buddy_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	mb_debug(sb, "load group %u\n", group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	blocks_per_page = PAGE_SIZE / sb->s_blocksize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	e4b->bd_blkbits = sb->s_blocksize_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 	e4b->bd_info = grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	e4b->bd_sb = sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 	e4b->bd_group = group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 	e4b->bd_buddy_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 	e4b->bd_bitmap_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 		 * we need full data about the group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 		 * to make a good selection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 		ret = ext4_mb_init_group(sb, group, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	 * the buddy cache inode stores the block bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	 * and buddy information in consecutive blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 	 * So for each group we need two blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	block = group * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 	pnum = block / blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	poff = block % blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	/* we could use find_or_create_page(), but it locks page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	 * what we'd like to avoid in fast path ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	if (page == NULL || !PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 		if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 			 * drop the page reference and try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 			 * to get the page with lock. If we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 			 * are not uptodate that implies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 			 * somebody just created the page but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 			 * is yet to initialize the same. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 			 * wait for it to initialize.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 		page = find_or_create_page(inode->i_mapping, pnum, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 		if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 			BUG_ON(page->mapping != inode->i_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 			if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 				ret = ext4_mb_init_cache(page, NULL, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 				if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 					unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 					goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 				mb_cmp_bitmaps(e4b, page_address(page) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 					       (poff * sb->s_blocksize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 	if (page == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 		ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 		ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	/* Pages marked accessed already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 	e4b->bd_bitmap_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	block++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	pnum = block / blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	poff = block % blocks_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 	page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 	if (page == NULL || !PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 		if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 		page = find_or_create_page(inode->i_mapping, pnum, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 		if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 			BUG_ON(page->mapping != inode->i_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 			if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 				ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 							 gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 				if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 					unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 					goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 	if (page == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 		ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 	if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 		ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	/* Pages marked accessed already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	e4b->bd_buddy_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 		put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	if (e4b->bd_bitmap_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 		put_page(e4b->bd_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 	if (e4b->bd_buddy_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 		put_page(e4b->bd_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 	e4b->bd_buddy = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 	e4b->bd_bitmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 			      struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 	return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	if (e4b->bd_bitmap_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 		put_page(e4b->bd_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 	if (e4b->bd_buddy_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 		put_page(e4b->bd_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 	int order = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 	int bb_incr = 1 << (e4b->bd_blkbits - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 	void *bb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 	BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 	BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 	bb = e4b->bd_buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 	while (order <= e4b->bd_blkbits + 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 		block = block >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 		if (!mb_test_bit(block, bb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 			/* this block is part of buddy of order 'order' */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 			return order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 		bb += bb_incr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 		bb_incr >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 		order++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) static void mb_clear_bits(void *bm, int cur, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 	__u32 *addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 	len = cur + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 	while (cur < len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 		if ((cur & 31) == 0 && (len - cur) >= 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 			/* fast path: clear whole word at once */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 			addr = bm + (cur >> 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 			*addr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 			cur += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 		mb_clear_bit(cur, bm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 		cur++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) /* clear bits in given range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350)  * will return first found zero bit if any, -1 otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) static int mb_test_and_clear_bits(void *bm, int cur, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	__u32 *addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 	int zero_bit = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 	len = cur + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 	while (cur < len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 		if ((cur & 31) == 0 && (len - cur) >= 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 			/* fast path: clear whole word at once */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 			addr = bm + (cur >> 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 			if (*addr != (__u32)(-1) && zero_bit == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 				zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 			*addr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 			cur += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 		if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 			zero_bit = cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 		cur++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 	return zero_bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) void ext4_set_bits(void *bm, int cur, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 	__u32 *addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 	len = cur + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 	while (cur < len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 		if ((cur & 31) == 0 && (len - cur) >= 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 			/* fast path: set whole word at once */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 			addr = bm + (cur >> 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 			*addr = 0xffffffff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 			cur += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 		mb_set_bit(cur, bm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 		cur++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 	if (mb_test_bit(*bit + side, bitmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 		mb_clear_bit(*bit, bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 		(*bit) -= side;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 	else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 		(*bit) += side;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 		mb_set_bit(*bit, bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 	int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 	int order = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 	void *buddy = mb_find_buddy(e4b, order, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 	while (buddy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 		void *buddy2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 		/* Bits in range [first; last] are known to be set since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 		 * corresponding blocks were allocated. Bits in range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 		 * (first; last) will stay set because they form buddies on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 		 * upper layer. We just deal with borders if they don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 		 * align with upper layer and then go up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 		 * Releasing entire group is all about clearing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 		 * single bit of highest order buddy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 		/* Example:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 		 * ---------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 		 * |   1   |   1   |   1   |   1   |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 		 * ---------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 		 * | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 		 * ---------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 		 *   0   1   2   3   4   5   6   7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 		 *      \_____________________/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 		 * Neither [1] nor [6] is aligned to above layer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 		 * Left neighbour [0] is free, so mark it busy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 		 * decrease bb_counters and extend range to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 		 * [0; 6]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 		 * Right neighbour [7] is busy. It can't be coaleasced with [6], so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 		 * mark [6] free, increase bb_counters and shrink range to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 		 * [0; 5].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 		 * Then shift range to [0; 2], go up and do the same.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 		if (first & 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 			e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 		if (!(last & 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 			e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 		if (first > last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 		order++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 		if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 			mb_clear_bits(buddy, first, last - first + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 			e4b->bd_info->bb_counters[order - 1] += last - first + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 		first >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 		last >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 		buddy = buddy2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 			   int first, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 	int left_is_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 	int right_is_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 	int block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 	int last = first + count - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 	struct super_block *sb = e4b->bd_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 	if (WARN_ON(count == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 	BUG_ON(last >= (sb->s_blocksize << 3));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 	/* Don't bother if the block group is corrupt. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 	mb_check_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 	mb_free_blocks_double(inode, e4b, first, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 	this_cpu_inc(discard_pa_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 	e4b->bd_info->bb_free += count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 	if (first < e4b->bd_info->bb_first_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 		e4b->bd_info->bb_first_free = first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 	/* access memory sequentially: check left neighbour,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 	 * clear range and then check right neighbour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 	if (first != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 		left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 	block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 	if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 		right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	if (unlikely(block != -1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 		struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 		ext4_fsblk_t blocknr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 		blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 		blocknr += EXT4_C2B(sbi, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 		if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 			ext4_grp_locked_error(sb, e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 					      inode ? inode->i_ino : 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 					      blocknr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 					      "freeing already freed block (bit %u); block bitmap corrupt.",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 					      block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 			ext4_mark_group_bitmap_corrupted(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 				sb, e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 				EXT4_GROUP_INFO_BBITMAP_CORRUPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 		mb_regenerate_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	/* let's maintain fragments counter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 	if (left_is_free && right_is_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 		e4b->bd_info->bb_fragments--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 	else if (!left_is_free && !right_is_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 		e4b->bd_info->bb_fragments++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 	/* buddy[0] == bd_bitmap is a special case, so handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 	 * it right away and let mb_buddy_mark_free stay free of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 	 * zero order checks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 	 * Check if neighbours are to be coaleasced,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 	 * adjust bitmap bb_counters and borders appropriately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 	if (first & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 		first += !left_is_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 		e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 	if (!(last & 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 		last -= !right_is_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 		e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 	if (first <= last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 		mb_buddy_mark_free(e4b, first >> 1, last >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 	mb_set_largest_free_order(sb, e4b->bd_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 	mb_check_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) static int mb_find_extent(struct ext4_buddy *e4b, int block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 				int needed, struct ext4_free_extent *ex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 	int next = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 	int max, order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 	void *buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 	BUG_ON(ex == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 	buddy = mb_find_buddy(e4b, 0, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 	BUG_ON(buddy == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 	BUG_ON(block >= max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 	if (mb_test_bit(block, buddy)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 		ex->fe_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 		ex->fe_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 		ex->fe_group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 	/* find actual order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 	order = mb_find_order_for_block(e4b, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 	block = block >> order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 	ex->fe_len = 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 	ex->fe_start = block << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 	ex->fe_group = e4b->bd_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 	/* calc difference from given start */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 	next = next - ex->fe_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 	ex->fe_len -= next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 	ex->fe_start += next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 	while (needed > ex->fe_len &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 	       mb_find_buddy(e4b, order, &max)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 		if (block + 1 >= max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 		next = (block + 1) * (1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 		if (mb_test_bit(next, e4b->bd_bitmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 		order = mb_find_order_for_block(e4b, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 		block = next >> order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 		ex->fe_len += 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 	if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 		/* Should never happen! (but apparently sometimes does?!?) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 		WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 		ext4_grp_locked_error(e4b->bd_sb, e4b->bd_group, 0, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 			"corruption or bug in mb_find_extent "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 			"block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 			block, order, needed, ex->fe_group, ex->fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 			ex->fe_len, ex->fe_logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 		ex->fe_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 		ex->fe_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 		ex->fe_group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 	return ex->fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	int ord;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 	int mlen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 	int max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 	int cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 	int start = ex->fe_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 	int len = ex->fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 	unsigned ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 	int len0 = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 	void *buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 	BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 	BUG_ON(e4b->bd_group != ex->fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 	mb_check_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 	mb_mark_used_double(e4b, start, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 	this_cpu_inc(discard_pa_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 	e4b->bd_info->bb_free -= len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 	if (e4b->bd_info->bb_first_free == start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 		e4b->bd_info->bb_first_free += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 	/* let's maintain fragments counter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 	if (start != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 		mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 	if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 		max = !mb_test_bit(start + len, e4b->bd_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 	if (mlen && max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 		e4b->bd_info->bb_fragments++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 	else if (!mlen && !max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 		e4b->bd_info->bb_fragments--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 	/* let's maintain buddy itself */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 	while (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 		ord = mb_find_order_for_block(e4b, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 		if (((start >> ord) << ord) == start && len >= (1 << ord)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 			/* the whole chunk may be allocated at once! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 			mlen = 1 << ord;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 			buddy = mb_find_buddy(e4b, ord, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 			BUG_ON((start >> ord) >= max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 			mb_set_bit(start >> ord, buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 			e4b->bd_info->bb_counters[ord]--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 			start += mlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 			len -= mlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 			BUG_ON(len < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 		/* store for history */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 		if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 			ret = len | (ord << 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 		/* we have to split large buddy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 		BUG_ON(ord <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 		buddy = mb_find_buddy(e4b, ord, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 		mb_set_bit(start >> ord, buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 		e4b->bd_info->bb_counters[ord]--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 		ord--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 		cur = (start >> ord) & ~1U;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 		buddy = mb_find_buddy(e4b, ord, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 		mb_clear_bit(cur, buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 		mb_clear_bit(cur + 1, buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 		e4b->bd_info->bb_counters[ord]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 		e4b->bd_info->bb_counters[ord]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 	mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 	ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 	mb_check_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689)  * Must be called under group lock!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 					struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 	BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 	BUG_ON(ac->ac_status == AC_STATUS_FOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 	ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 	ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 	ret = mb_mark_used(e4b, &ac->ac_b_ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 	/* preallocation can change ac_b_ex, thus we store actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 	 * allocated blocks for history */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 	ac->ac_f_ex = ac->ac_b_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 	ac->ac_status = AC_STATUS_FOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 	ac->ac_tail = ret & 0xffff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 	ac->ac_buddy = ret >> 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 	 * take the page reference. We want the page to be pinned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 	 * so that we don't get a ext4_mb_init_cache_call for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 	 * group until we update the bitmap. That would mean we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 	 * double allocate blocks. The reference is dropped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 	 * in ext4_mb_release_context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 	ac->ac_bitmap_page = e4b->bd_bitmap_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 	get_page(ac->ac_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 	ac->ac_buddy_page = e4b->bd_buddy_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 	get_page(ac->ac_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 	/* store last allocated for subsequent stream allocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 	if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 		spin_lock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 		sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 		sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 		spin_unlock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 	 * As we've just preallocated more space than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 	 * user requested originally, we store allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 	 * space in a special descriptor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 	if (ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 		ext4_mb_new_preallocation(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 					struct ext4_buddy *e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 					int finish_group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 	struct ext4_free_extent *bex = &ac->ac_b_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 	struct ext4_free_extent *gex = &ac->ac_g_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 	struct ext4_free_extent ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 	int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 	if (ac->ac_status == AC_STATUS_FOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 	 * We don't want to scan for a whole year
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 	if (ac->ac_found > sbi->s_mb_max_to_scan &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 			!(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 		ac->ac_status = AC_STATUS_BREAK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) 	 * Haven't found good chunk so far, let's continue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 	if (bex->fe_len < gex->fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 	if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 			&& bex->fe_group == e4b->bd_group) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 		/* recheck chunk's availability - we don't know
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) 		 * when it was found (within this lock-unlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 		 * period or not) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 		max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) 		if (max >= gex->fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 			ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781)  * The routine checks whether found extent is good enough. If it is,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782)  * then the extent gets marked used and flag is set to the context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783)  * to stop scanning. Otherwise, the extent is compared with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784)  * previous found extent and if new one is better, then it's stored
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785)  * in the context. Later, the best found extent will be used, if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786)  * mballoc can't find good enough extent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788)  * FIXME: real allocation policy is to be designed yet!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 					struct ext4_free_extent *ex,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 					struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 	struct ext4_free_extent *bex = &ac->ac_b_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 	struct ext4_free_extent *gex = &ac->ac_g_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 	BUG_ON(ex->fe_len <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 	BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 	BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 	BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 	ac->ac_found++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 	 * The special case - take what you catch first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 	if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 		*bex = *ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 		ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 	 * Let's check whether the chuck is good enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 	if (ex->fe_len == gex->fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 		*bex = *ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 		ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 	 * If this is first found extent, just store it in the context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 	if (bex->fe_len == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 		*bex = *ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 	 * If new found extent is better, store it in the context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 	if (bex->fe_len < gex->fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 		/* if the request isn't satisfied, any found extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 		 * larger than previous best one is better */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 		if (ex->fe_len > bex->fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 			*bex = *ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 	} else if (ex->fe_len > gex->fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) 		/* if the request is satisfied, then we try to find
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 		 * an extent that still satisfy the request, but is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) 		 * smaller than previous one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 		if (ex->fe_len < bex->fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 			*bex = *ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 	ext4_mb_check_limits(ac, e4b, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 					struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 	struct ext4_free_extent ex = ac->ac_b_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 	ext4_group_t group = ex.fe_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 	int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 	BUG_ON(ex.fe_len <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 	err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 		return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 	ext4_lock_group(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 	max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 	if (max > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 		ac->ac_b_ex = ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 		ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 	ext4_unlock_group(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 	ext4_mb_unload_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 				struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 	ext4_group_t group = ac->ac_g_ex.fe_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 	int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 	struct ext4_free_extent ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 	if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 	if (grp->bb_free == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 	err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) 		return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) 	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 		ext4_mb_unload_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 	ext4_lock_group(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 	max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 			     ac->ac_g_ex.fe_len, &ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 	ex.fe_logical = 0xDEADFA11; /* debug value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 	if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 		ext4_fsblk_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) 		start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 			ex.fe_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 		/* use do_div to get remainder (would be 64-bit modulo) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 		if (do_div(start, sbi->s_stripe) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 			ac->ac_found++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 			ac->ac_b_ex = ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 			ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 	} else if (max >= ac->ac_g_ex.fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 		BUG_ON(ex.fe_len <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 		BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 		BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 		ac->ac_found++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 		ac->ac_b_ex = ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 		ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 	} else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 		/* Sometimes, caller may want to merge even small
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 		 * number of blocks to an existing extent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 		BUG_ON(ex.fe_len <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 		BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 		BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 		ac->ac_found++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 		ac->ac_b_ex = ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 		ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 	ext4_unlock_group(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 	ext4_mb_unload_buddy(e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942)  * The routine scans buddy structures (not bitmap!) from given order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943)  * to max order and tries to find big enough chunk to satisfy the req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 					struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 	struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 	struct ext4_group_info *grp = e4b->bd_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 	void *buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 	int k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 	int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 	BUG_ON(ac->ac_2order <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 	for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 		if (grp->bb_counters[i] == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 		buddy = mb_find_buddy(e4b, i, &max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 		BUG_ON(buddy == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) 		k = mb_find_next_zero_bit(buddy, max, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 		if (k >= max) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 			ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) 				"%d free clusters of order %d. But found 0",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) 				grp->bb_counters[i], i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) 			ext4_mark_group_bitmap_corrupted(ac->ac_sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 					 e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) 					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) 		ac->ac_found++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 		ac->ac_b_ex.fe_len = 1 << i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) 		ac->ac_b_ex.fe_start = k << i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 		ac->ac_b_ex.fe_group = e4b->bd_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) 		ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 		BUG_ON(ac->ac_f_ex.fe_len != ac->ac_g_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 		if (EXT4_SB(sb)->s_mb_stats)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 			atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992)  * The routine scans the group and measures all found extents.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993)  * In order to optimize scanning, caller must pass number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994)  * free blocks in the group, so the routine can know upper limit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 					struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 	struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 	void *bitmap = e4b->bd_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 	struct ext4_free_extent ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 	int free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 	free = e4b->bd_info->bb_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 	if (WARN_ON(free <= 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 	i = e4b->bd_info->bb_first_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 	while (free && ac->ac_status == AC_STATUS_CONTINUE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 		i = mb_find_next_zero_bit(bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 						EXT4_CLUSTERS_PER_GROUP(sb), i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 		if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) 			 * IF we have corrupt bitmap, we won't find any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 			 * free blocks even though group info says we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 			 * have free blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 					"%d free clusters as per "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 					"group info. But bitmap says 0",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) 					free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) 			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 		mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 		if (WARN_ON(ex.fe_len <= 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 		if (free < ex.fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 					"%d free clusters as per "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) 					"group info. But got %d blocks",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 					free, ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) 			 * The number of free blocks differs. This mostly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 			 * indicate that the bitmap is corrupt. So exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 			 * without claiming the space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 		ex.fe_logical = 0xDEADC0DE; /* debug value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 		ext4_mb_measure_extent(ac, &ex, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 		i += ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 		free -= ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 	ext4_mb_check_limits(ac, e4b, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058)  * This is a special case for storages like raid5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059)  * we try to find stripe-aligned chunks for stripe-size-multiple requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 				 struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 	struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) 	void *bitmap = e4b->bd_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 	struct ext4_free_extent ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) 	ext4_fsblk_t first_group_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) 	ext4_fsblk_t a;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) 	ext4_grpblk_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 	int max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 	BUG_ON(sbi->s_stripe == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 	/* find first stripe-aligned block in group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 	first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 	a = first_group_block + sbi->s_stripe - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 	do_div(a, sbi->s_stripe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 	i = (a * sbi->s_stripe) - first_group_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) 	while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) 		if (!mb_test_bit(i, bitmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) 			max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 			if (max >= sbi->s_stripe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) 				ac->ac_found++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 				ex.fe_logical = 0xDEADF00D; /* debug value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 				ac->ac_b_ex = ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 				ext4_mb_use_best_found(ac, e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 		i += sbi->s_stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099)  * This is also called BEFORE we load the buddy bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100)  * Returns either 1 or 0 indicating that the group is either suitable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101)  * for the allocation or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) 				ext4_group_t group, int cr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 	ext4_grpblk_t free, fragments;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 	int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) 	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) 	BUG_ON(cr < 0 || cr >= 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) 	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 	free = grp->bb_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) 	if (free == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 	fragments = grp->bb_fragments;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 	if (fragments == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 	switch (cr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 	case 0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 		BUG_ON(ac->ac_2order == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 		/* Avoid using the first bg of a flexgroup for data files */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 		if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 		    (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 		    ((group % flex_size) == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 		if (free < ac->ac_g_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) 		if (ac->ac_2order > ac->ac_sb->s_blocksize_bits+1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 		if (grp->bb_largest_free_order < ac->ac_2order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 	case 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 		if ((free / fragments) >= ac->ac_g_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 	case 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) 		if (free >= ac->ac_g_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 	case 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161)  * This could return negative error code if something goes wrong
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162)  * during ext4_mb_init_group(). This should not be called with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163)  * ext4_lock_group() held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) 				     ext4_group_t group, int cr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) 	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 	struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) 	bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 	ext4_grpblk_t free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) 	if (should_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 		ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 	free = grp->bb_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) 	if (free == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) 	if (cr <= 2 && free < ac->ac_g_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) 	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) 	if (should_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) 		ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) 	/* We only do this if the grp has never been initialized */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) 	if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) 		struct ext4_group_desc *gdp =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) 			ext4_get_group_desc(sb, group, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 		/* cr=0/1 is a very optimistic search to find large
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) 		 * good chunks almost for free.  If buddy data is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) 		 * ready, then this optimization makes no sense.  But
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) 		 * we never skip the first block group in a flex_bg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 		 * since this gets used for metadata block allocation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) 		 * and we want to make sure we locate metadata blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 		 * in the first block group in the flex_bg if possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 		if (cr < 2 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) 		    (!sbi->s_log_groups_per_flex ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) 		     ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 		    !(ext4_has_group_desc_csum(sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) 		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) 		ret = ext4_mb_init_group(sb, group, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) 	if (should_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) 		ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) 	ret = ext4_mb_good_group(ac, group, cr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) 	if (should_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) 		ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222)  * Start prefetching @nr block bitmaps starting at @group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223)  * Return the next group which needs to be prefetched.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) 			      unsigned int nr, int *cnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) 	ext4_group_t ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) 	struct buffer_head *bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) 	struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) 	blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 	while (nr-- > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 								  NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 		struct ext4_group_info *grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) 		 * Prefetch block groups with free blocks; but don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 		 * bother if it is marked uninitialized on disk, since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 		 * it won't require I/O to read.  Also only try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 		 * prefetch once, so we avoid getblk() call, which can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) 		 * be expensive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 		if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) 		    EXT4_MB_GRP_NEED_INIT(grp) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 		    ext4_free_group_clusters(sb, gdp) > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) 		    !(ext4_has_group_desc_csum(sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) 		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 			bh = ext4_read_block_bitmap_nowait(sb, group, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 			if (bh && !IS_ERR(bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) 				if (!buffer_uptodate(bh) && cnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 					(*cnt)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) 				brelse(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 		if (++group >= ngroups)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) 			group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 	blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 	return group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265)  * Prefetching reads the block bitmap into the buffer cache; but we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266)  * need to make sure that the buddy bitmap in the page cache has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267)  * initialized.  Note that ext4_mb_init_group() will block if the I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268)  * is not yet completed, or indeed if it was not initiated by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269)  * ext4_mb_prefetch did not start the I/O.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271)  * TODO: We should actually kick off the buddy bitmap setup in a work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272)  * queue when the buffer I/O is completed, so that we don't block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273)  * waiting for the block allocation bitmap read to finish when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274)  * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 			   unsigned int nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 	while (nr-- > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 								  NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 		struct ext4_group_info *grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) 		if (!group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 			group = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 		group--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 		grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 		if (EXT4_MB_GRP_NEED_INIT(grp) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 		    ext4_free_group_clusters(sb, gdp) > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 		    !(ext4_has_group_desc_csum(sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 			if (ext4_mb_init_group(sb, group, GFP_NOFS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 	ext4_group_t prefetch_grp = 0, ngroups, group, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 	int cr = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 	int err = 0, first_err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 	unsigned int nr = 0, prefetch_ios = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 	struct ext4_sb_info *sbi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 	struct super_block *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 	struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) 	int lost;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 	sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) 	sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 	ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 	/* non-extent files are limited to low blocks/groups */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 	if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 		ngroups = sbi->s_blockfile_groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 	BUG_ON(ac->ac_status == AC_STATUS_FOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 	/* first, try the goal */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 	err = ext4_mb_find_by_goal(ac, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 	if (err || ac->ac_status == AC_STATUS_FOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) 	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 	 * ac->ac_2order is set only if the fe_len is a power of 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 	 * if ac->ac_2order is set we also set criteria to 0 so that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 	 * try exact allocation using buddy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 	i = fls(ac->ac_g_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 	ac->ac_2order = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) 	 * We search using buddy data only if the order of the request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 	 * is greater than equal to the sbi_s_mb_order2_reqs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 	 * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 	 * We also support searching for power-of-two requests only for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 	 * requests upto maximum buddy size we have constructed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 	if (i >= sbi->s_mb_order2_reqs && i <= sb->s_blocksize_bits + 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) 		 * This should tell if fe_len is exactly power of 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) 		if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) 			ac->ac_2order = array_index_nospec(i - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 							   sb->s_blocksize_bits + 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 	/* if stream allocation is enabled, use global goal */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) 	if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) 		/* TBD: may be hot point */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) 		spin_lock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) 		ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) 		ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) 		spin_unlock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) 	/* Let's just scan groups to find more-less suitable blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) 	cr = ac->ac_2order ? 0 : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) 	 * cr == 0 try to get exact allocation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 	 * cr == 3  try to get anything
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) 	for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 		ac->ac_criteria = cr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) 		 * searching for the right group start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 		 * from the goal value specified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) 		group = ac->ac_g_ex.fe_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 		prefetch_grp = group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 		for (i = 0; i < ngroups; group++, i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 			int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 			 * Artificially restricted ngroups for non-extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 			 * files makes group > ngroups possible on first loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 			if (group >= ngroups)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 				group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 			 * Batch reads of the block allocation bitmaps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) 			 * to get multiple READs in flight; limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 			 * prefetching at cr=0/1, otherwise mballoc can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 			 * spend a lot of time loading imperfect groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) 			if ((prefetch_grp == group) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 			    (cr > 1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) 			     prefetch_ios < sbi->s_mb_prefetch_limit)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) 				unsigned int curr_ios = prefetch_ios;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) 				nr = sbi->s_mb_prefetch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 				if (ext4_has_feature_flex_bg(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) 					nr = 1 << sbi->s_log_groups_per_flex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 					nr -= group & (nr - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 					nr = min(nr, sbi->s_mb_prefetch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 				prefetch_grp = ext4_mb_prefetch(sb, group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 							nr, &prefetch_ios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) 				if (prefetch_ios == curr_ios)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) 					nr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) 			/* This now checks without needing the buddy page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) 			ret = ext4_mb_good_group_nolock(ac, group, cr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 			if (ret <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) 				if (!first_err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 					first_err = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 			err = ext4_mb_load_buddy(sb, group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) 			if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) 			ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) 			 * We need to check again after locking the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 			 * block group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) 			ret = ext4_mb_good_group(ac, group, cr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 			if (ret == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 				ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 				ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 			ac->ac_groups_scanned++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) 			if (cr == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) 				ext4_mb_simple_scan_group(ac, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) 			else if (cr == 1 && sbi->s_stripe &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) 					!(ac->ac_g_ex.fe_len % sbi->s_stripe))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) 				ext4_mb_scan_aligned(ac, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) 				ext4_mb_complex_scan_group(ac, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) 			ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) 			ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) 			if (ac->ac_status != AC_STATUS_CONTINUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) 	if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) 	    !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 		 * We've been searching too long. Let's try to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) 		 * the best chunk we've found so far
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 		ext4_mb_try_best_found(ac, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 		if (ac->ac_status != AC_STATUS_FOUND) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) 			 * Someone more lucky has already allocated it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) 			 * The only thing we can do is just take first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) 			 * found block(s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) 			lost = atomic_inc_return(&sbi->s_mb_lost_chunks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) 			mb_debug(sb, "lost chunk, group: %u, start: %d, len: %d, lost: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) 				 ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) 				 ac->ac_b_ex.fe_len, lost);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) 			ac->ac_b_ex.fe_group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) 			ac->ac_b_ex.fe_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) 			ac->ac_b_ex.fe_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) 			ac->ac_status = AC_STATUS_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) 			ac->ac_flags |= EXT4_MB_HINT_FIRST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) 			cr = 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) 			goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) 	if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) 		err = first_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) 	mb_debug(sb, "Best len %d, origin len %d, ac_status %u, ac_flags 0x%x, cr %d ret %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) 		 ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) 		 ac->ac_flags, cr, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) 	if (nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) 		ext4_mb_prefetch_fini(sb, prefetch_grp, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) 	struct super_block *sb = PDE_DATA(file_inode(seq->file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) 	ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) 	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) 	group = *pos + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) 	return (void *) ((unsigned long) group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) 	struct super_block *sb = PDE_DATA(file_inode(seq->file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) 	ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) 	++*pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) 	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) 	group = *pos + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) 	return (void *) ((unsigned long) group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) 	struct super_block *sb = PDE_DATA(file_inode(seq->file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) 	ext4_group_t group = (ext4_group_t) ((unsigned long) v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) 	int err, buddy_loaded = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 	struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) 	struct ext4_group_info *grinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) 	unsigned char blocksize_bits = min_t(unsigned char,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) 					     sb->s_blocksize_bits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) 					     EXT4_MAX_BLOCK_LOG_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) 	struct sg {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) 		struct ext4_group_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) 		ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) 	} sg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) 	group--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) 	if (group == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) 		seq_puts(seq, "#group: free  frags first ["
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) 			      " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) 			      " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) 	i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) 		sizeof(struct ext4_group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) 	grinfo = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) 	/* Load the group info in memory only if not already loaded. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) 	if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) 		err = ext4_mb_load_buddy(sb, group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) 		if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) 			seq_printf(seq, "#%-5u: I/O error\n", group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) 		buddy_loaded = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) 	memcpy(&sg, ext4_get_group_info(sb, group), i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) 	if (buddy_loaded)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) 		ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) 	seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) 			sg.info.bb_fragments, sg.info.bb_first_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) 	for (i = 0; i <= 13; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) 		seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) 				sg.info.bb_counters[i] : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) 	seq_puts(seq, " ]\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) const struct seq_operations ext4_mb_seq_groups_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) 	.start  = ext4_mb_seq_groups_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) 	.next   = ext4_mb_seq_groups_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) 	.stop   = ext4_mb_seq_groups_stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) 	.show   = ext4_mb_seq_groups_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) 	int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) 	struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) 	BUG_ON(!cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) 	return cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587)  * Allocate the top-level s_group_info array for the specified number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588)  * of groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) 	unsigned size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) 	struct ext4_group_info ***old_groupinfo, ***new_groupinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) 	size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) 		EXT4_DESC_PER_BLOCK_BITS(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) 	if (size <= sbi->s_group_info_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) 	size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) 	new_groupinfo = kvzalloc(size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) 	if (!new_groupinfo) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) 		ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) 	old_groupinfo = rcu_dereference(sbi->s_group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) 	if (old_groupinfo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) 		memcpy(new_groupinfo, old_groupinfo,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) 		       sbi->s_group_info_size * sizeof(*sbi->s_group_info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) 	rcu_assign_pointer(sbi->s_group_info, new_groupinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) 	sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) 	if (old_groupinfo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) 		ext4_kvfree_array_rcu(old_groupinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) 	ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) 		   sbi->s_group_info_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) /* Create and initialize ext4_group_info data for the given group. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) 			  struct ext4_group_desc *desc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) 	int metalen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) 	int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) 	struct ext4_group_info **meta_group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) 	struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) 	 * First check if this group is the first of a reserved block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) 	 * If it's true, we have to allocate a new table of pointers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) 	 * to ext4_group_info structures
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) 	if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) 		metalen = sizeof(*meta_group_info) <<
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) 			EXT4_DESC_PER_BLOCK_BITS(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) 		meta_group_info = kmalloc(metalen, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) 		if (meta_group_info == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) 			ext4_msg(sb, KERN_ERR, "can't allocate mem "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) 				 "for a buddy group");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) 			goto exit_meta_group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) 		rcu_dereference(sbi->s_group_info)[idx] = meta_group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) 	meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) 	i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) 	meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) 	if (meta_group_info[i] == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) 		ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) 		goto exit_group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) 	set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) 		&(meta_group_info[i]->bb_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) 	 * initialize bb_free to be able to skip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) 	 * empty groups without initialization
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) 	if (ext4_has_group_desc_csum(sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) 	    (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) 		meta_group_info[i]->bb_free =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) 			ext4_free_clusters_after_init(sb, group, desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) 		meta_group_info[i]->bb_free =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) 			ext4_free_group_clusters(sb, desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) 	INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) 	init_rwsem(&meta_group_info[i]->alloc_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) 	meta_group_info[i]->bb_free_root = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) 	meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) 	mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) exit_group_info:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) 	/* If a meta_group_info table has been allocated, release it now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) 	if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) 		struct ext4_group_info ***group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) 		group_info = rcu_dereference(sbi->s_group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) 		kfree(group_info[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) 		group_info[idx] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) exit_meta_group_info:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) 	return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) } /* ext4_mb_add_groupinfo */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) static int ext4_mb_init_backend(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) 	ext4_group_t ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) 	ext4_group_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) 	struct ext4_group_desc *desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) 	struct ext4_group_info ***group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) 	struct kmem_cache *cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) 	err = ext4_mb_alloc_groupinfo(sb, ngroups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) 		return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) 	sbi->s_buddy_cache = new_inode(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) 	if (sbi->s_buddy_cache == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) 		ext4_msg(sb, KERN_ERR, "can't get new inode");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) 		goto err_freesgi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) 	/* To avoid potentially colliding with an valid on-disk inode number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) 	 * use EXT4_BAD_INO for the buddy cache inode number.  This inode is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) 	 * not in the inode hash, so it should never be found by iget(), but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) 	 * this will avoid confusion if it ever shows up during debugging. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) 	sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) 	EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) 	for (i = 0; i < ngroups; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) 		desc = ext4_get_group_desc(sb, i, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) 		if (desc == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) 			ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) 			goto err_freebuddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) 		if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) 			goto err_freebuddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) 	if (ext4_has_feature_flex_bg(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) 		/* a single flex group is supposed to be read by a single IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) 		 * 2 ^ s_log_groups_per_flex != UINT_MAX as s_mb_prefetch is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) 		 * unsigned integer, so the maximum shift is 32.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) 		if (sbi->s_es->s_log_groups_per_flex >= 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) 			ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) 			goto err_freebuddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) 		sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) 			BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) 		sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) 		sbi->s_mb_prefetch = 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) 	if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) 		sbi->s_mb_prefetch = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) 	/* now many real IOs to prefetch within a single allocation at cr=0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) 	 * given cr=0 is an CPU-related optimization we shouldn't try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) 	 * load too many groups, at some point we should start to use what
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) 	 * we've got in memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) 	 * with an average random access time 5ms, it'd take a second to get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) 	 * 200 groups (* N with flex_bg), so let's make this limit 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) 	sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) 	if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) 		sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) err_freebuddy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) 	cachep = get_groupinfo_cache(sb->s_blocksize_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) 	while (i-- > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) 		kmem_cache_free(cachep, ext4_get_group_info(sb, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) 	i = sbi->s_group_info_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) 	group_info = rcu_dereference(sbi->s_group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) 	while (i-- > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) 		kfree(group_info[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) 	iput(sbi->s_buddy_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) err_freesgi:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) 	kvfree(rcu_dereference(sbi->s_group_info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) 	return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) static void ext4_groupinfo_destroy_slabs(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) 	for (i = 0; i < NR_GRPINFO_CACHES; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) 		kmem_cache_destroy(ext4_groupinfo_caches[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) 		ext4_groupinfo_caches[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) static int ext4_groupinfo_create_slab(size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) 	static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) 	int slab_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) 	int blocksize_bits = order_base_2(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) 	int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) 	struct kmem_cache *cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) 	if (cache_index >= NR_GRPINFO_CACHES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) 	if (unlikely(cache_index < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) 		cache_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) 	mutex_lock(&ext4_grpinfo_slab_create_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) 	if (ext4_groupinfo_caches[cache_index]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) 		mutex_unlock(&ext4_grpinfo_slab_create_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) 		return 0;	/* Already created */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) 	slab_size = offsetof(struct ext4_group_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) 				bb_counters[blocksize_bits + 2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) 	cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) 					slab_size, 0, SLAB_RECLAIM_ACCOUNT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) 					NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) 	ext4_groupinfo_caches[cache_index] = cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) 	mutex_unlock(&ext4_grpinfo_slab_create_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) 	if (!cachep) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) 		printk(KERN_EMERG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) 		       "EXT4-fs: no memory for groupinfo slab cache\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) int ext4_mb_init(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) 	unsigned i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) 	unsigned offset, offset_incr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) 	unsigned max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) 	i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) 	sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) 	if (sbi->s_mb_offsets == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) 		ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) 	i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) 	sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) 	if (sbi->s_mb_maxs == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) 		ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) 	ret = ext4_groupinfo_create_slab(sb->s_blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) 	/* order 0 is regular bitmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) 	sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) 	sbi->s_mb_offsets[0] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) 	i = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) 	offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) 	offset_incr = 1 << (sb->s_blocksize_bits - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) 	max = sb->s_blocksize << 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) 		sbi->s_mb_offsets[i] = offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) 		sbi->s_mb_maxs[i] = max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) 		offset += offset_incr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) 		offset_incr = offset_incr >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) 		max = max >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) 		i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) 	} while (i <= sb->s_blocksize_bits + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) 	spin_lock_init(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) 	spin_lock_init(&sbi->s_bal_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) 	sbi->s_mb_free_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) 	INIT_LIST_HEAD(&sbi->s_freed_data_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) 	sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) 	sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) 	sbi->s_mb_stats = MB_DEFAULT_STATS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) 	sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) 	sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) 	sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) 	 * The default group preallocation is 512, which for 4k block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) 	 * sizes translates to 2 megabytes.  However for bigalloc file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) 	 * systems, this is probably too big (i.e, if the cluster size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) 	 * is 1 megabyte, then group preallocation size becomes half a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) 	 * gigabyte!).  As a default, we will keep a two megabyte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) 	 * group pralloc size for cluster sizes up to 64k, and after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) 	 * that, we will force a minimum group preallocation size of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) 	 * 32 clusters.  This translates to 8 megs when the cluster
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) 	 * size is 256k, and 32 megs when the cluster size is 1 meg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) 	 * which seems reasonable as a default.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) 	sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) 				       sbi->s_cluster_bits, 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) 	 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) 	 * to the lowest multiple of s_stripe which is bigger than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) 	 * the s_mb_group_prealloc as determined above. We want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) 	 * the preallocation size to be an exact multiple of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) 	 * RAID stripe size so that preallocations don't fragment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) 	 * the stripes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) 	if (sbi->s_stripe > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) 		sbi->s_mb_group_prealloc = roundup(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) 			sbi->s_mb_group_prealloc, sbi->s_stripe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) 	sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) 	if (sbi->s_locality_groups == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) 		ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) 	for_each_possible_cpu(i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) 		struct ext4_locality_group *lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) 		lg = per_cpu_ptr(sbi->s_locality_groups, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) 		mutex_init(&lg->lg_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) 		for (j = 0; j < PREALLOC_TB_SIZE; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) 			INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) 		spin_lock_init(&lg->lg_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) 	/* init file for buddy data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) 	ret = ext4_mb_init_backend(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) 	if (ret != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) 		goto out_free_locality_groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) out_free_locality_groups:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) 	free_percpu(sbi->s_locality_groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) 	sbi->s_locality_groups = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) 	kfree(sbi->s_mb_offsets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) 	sbi->s_mb_offsets = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) 	kfree(sbi->s_mb_maxs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) 	sbi->s_mb_maxs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) /* need to called with the ext4 group lock held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) static int ext4_mb_cleanup_pa(struct ext4_group_info *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) 	struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) 	struct list_head *cur, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) 	int count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) 	list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) 		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) 		list_del(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) 		count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) 		kmem_cache_free(ext4_pspace_cachep, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) 	return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) int ext4_mb_release(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) 	ext4_group_t ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) 	ext4_group_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) 	int num_meta_group_infos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) 	struct ext4_group_info *grinfo, ***group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) 	struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) 	int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) 	if (sbi->s_group_info) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) 		for (i = 0; i < ngroups; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) 			grinfo = ext4_get_group_info(sb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) 			mb_group_bb_bitmap_free(grinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) 			ext4_lock_group(sb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) 			count = ext4_mb_cleanup_pa(grinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) 			if (count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) 				mb_debug(sb, "mballoc: %d PAs left\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) 					 count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) 			ext4_unlock_group(sb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) 			kmem_cache_free(cachep, grinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) 		num_meta_group_infos = (ngroups +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) 				EXT4_DESC_PER_BLOCK(sb) - 1) >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) 			EXT4_DESC_PER_BLOCK_BITS(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) 		group_info = rcu_dereference(sbi->s_group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) 		for (i = 0; i < num_meta_group_infos; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) 			kfree(group_info[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) 		kvfree(group_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) 	kfree(sbi->s_mb_offsets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) 	kfree(sbi->s_mb_maxs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) 	iput(sbi->s_buddy_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) 	if (sbi->s_mb_stats) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) 		ext4_msg(sb, KERN_INFO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) 		       "mballoc: %u blocks %u reqs (%u success)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) 				atomic_read(&sbi->s_bal_allocated),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) 				atomic_read(&sbi->s_bal_reqs),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) 				atomic_read(&sbi->s_bal_success));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) 		ext4_msg(sb, KERN_INFO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) 		      "mballoc: %u extents scanned, %u goal hits, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) 				"%u 2^N hits, %u breaks, %u lost",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) 				atomic_read(&sbi->s_bal_ex_scanned),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) 				atomic_read(&sbi->s_bal_goals),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) 				atomic_read(&sbi->s_bal_2orders),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) 				atomic_read(&sbi->s_bal_breaks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) 				atomic_read(&sbi->s_mb_lost_chunks));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) 		ext4_msg(sb, KERN_INFO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) 		       "mballoc: %lu generated and it took %Lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) 				sbi->s_mb_buddies_generated,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) 				sbi->s_mb_generation_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) 		ext4_msg(sb, KERN_INFO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) 		       "mballoc: %u preallocated, %u discarded",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) 				atomic_read(&sbi->s_mb_preallocated),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) 				atomic_read(&sbi->s_mb_discarded));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) 	free_percpu(sbi->s_locality_groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) static inline int ext4_issue_discard(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) 		ext4_group_t block_group, ext4_grpblk_t cluster, int count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) 		struct bio **biop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) 	ext4_fsblk_t discard_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) 	discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) 			 ext4_group_first_block_no(sb, block_group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) 	count = EXT4_C2B(EXT4_SB(sb), count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) 	trace_ext4_discard_blocks(sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) 			(unsigned long long) discard_block, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) 	if (biop) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) 		return __blkdev_issue_discard(sb->s_bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) 			(sector_t)discard_block << (sb->s_blocksize_bits - 9),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) 			(sector_t)count << (sb->s_blocksize_bits - 9),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) 			GFP_NOFS, 0, biop);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) 		return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) static void ext4_free_data_in_buddy(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) 				    struct ext4_free_data *entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) 	struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) 	struct ext4_group_info *db;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) 	int err, count = 0, count2 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) 	mb_debug(sb, "gonna free %u blocks in group %u (0x%p):",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) 		 entry->efd_count, entry->efd_group, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) 	err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) 	/* we expect to find existing buddy because it's pinned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) 	BUG_ON(err != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) 	spin_lock(&EXT4_SB(sb)->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) 	EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) 	spin_unlock(&EXT4_SB(sb)->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) 	db = e4b.bd_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) 	/* there are blocks to put in buddy to make them really free */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) 	count += entry->efd_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) 	count2++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) 	ext4_lock_group(sb, entry->efd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) 	/* Take it out of per group rb tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) 	rb_erase(&entry->efd_node, &(db->bb_free_root));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) 	mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) 	 * Clear the trimmed flag for the group so that the next
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) 	 * ext4_trim_fs can trim it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) 	 * If the volume is mounted with -o discard, online discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) 	 * is supported and the free blocks will be trimmed online.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) 	if (!test_opt(sb, DISCARD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) 		EXT4_MB_GRP_CLEAR_TRIMMED(db);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) 	if (!db->bb_free_root.rb_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) 		/* No more items in the per group rb tree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) 		 * balance refcounts from ext4_mb_free_metadata()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) 		put_page(e4b.bd_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) 		put_page(e4b.bd_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) 	ext4_unlock_group(sb, entry->efd_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) 	kmem_cache_free(ext4_free_data_cachep, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) 	ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) 	mb_debug(sb, "freed %d blocks in %d structures\n", count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) 		 count2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099)  * This function is called by the jbd2 layer once the commit has finished,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100)  * so we know we can free the blocks that were released with that commit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) 	struct ext4_free_data *entry, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) 	struct bio *discard_bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) 	struct list_head freed_data_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) 	struct list_head *cut_pos = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) 	INIT_LIST_HEAD(&freed_data_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) 	spin_lock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) 	list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) 		if (entry->efd_tid != commit_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) 		cut_pos = &entry->efd_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) 	if (cut_pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) 		list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) 				  cut_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) 	spin_unlock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) 	if (test_opt(sb, DISCARD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) 		list_for_each_entry(entry, &freed_data_list, efd_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) 			err = ext4_issue_discard(sb, entry->efd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) 						 entry->efd_start_cluster,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) 						 entry->efd_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) 						 &discard_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) 			if (err && err != -EOPNOTSUPP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) 				ext4_msg(sb, KERN_WARNING, "discard request in"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) 					 " group:%d block:%d count:%d failed"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) 					 " with %d", entry->efd_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) 					 entry->efd_start_cluster,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) 					 entry->efd_count, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) 			} else if (err == -EOPNOTSUPP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) 		if (discard_bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) 			submit_bio_wait(discard_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) 			bio_put(discard_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) 	list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) 		ext4_free_data_in_buddy(sb, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) int __init ext4_init_mballoc(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) 	ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) 					SLAB_RECLAIM_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) 	if (ext4_pspace_cachep == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) 	ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) 				    SLAB_RECLAIM_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) 	if (ext4_ac_cachep == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) 		goto out_pa_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) 	ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) 					   SLAB_RECLAIM_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) 	if (ext4_free_data_cachep == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) 		goto out_ac_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) out_ac_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) 	kmem_cache_destroy(ext4_ac_cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) out_pa_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) 	kmem_cache_destroy(ext4_pspace_cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) 	return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) void ext4_exit_mballoc(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) 	 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) 	 * before destroying the slab cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) 	rcu_barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) 	kmem_cache_destroy(ext4_pspace_cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) 	kmem_cache_destroy(ext4_ac_cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) 	kmem_cache_destroy(ext4_free_data_cachep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) 	ext4_groupinfo_destroy_slabs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192)  * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193)  * Returns 0 if success or error code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) 				handle_t *handle, unsigned int reserv_clstrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) 	struct buffer_head *bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) 	struct ext4_group_desc *gdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) 	struct buffer_head *gdp_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) 	struct ext4_sb_info *sbi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) 	struct super_block *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) 	ext4_fsblk_t block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) 	int err, len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) 	BUG_ON(ac->ac_status != AC_STATUS_FOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) 	BUG_ON(ac->ac_b_ex.fe_len <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) 	sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) 	sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) 	bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) 	if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) 		err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) 		bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) 		goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) 	BUFFER_TRACE(bitmap_bh, "getting write access");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) 	err = ext4_journal_get_write_access(handle, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) 		goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) 	err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) 	gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) 	if (!gdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) 		goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) 	ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) 			ext4_free_group_clusters(sb, gdp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) 	BUFFER_TRACE(gdp_bh, "get_write_access");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) 	err = ext4_journal_get_write_access(handle, gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) 		goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) 	block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) 	len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) 	if (!ext4_inode_block_valid(ac->ac_inode, block, len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) 		ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) 			   "fs metadata", block, block+len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) 		/* File system mounted not to panic on error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) 		 * Fix the bitmap and return EFSCORRUPTED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) 		 * We leak some of the blocks here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) 		ext4_lock_group(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) 		ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) 			      ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) 		ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) 		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) 		if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) 			err = -EFSCORRUPTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) 		goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) 	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) #ifdef AGGRESSIVE_CHECK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) 		int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) 		for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) 			BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) 						bitmap_bh->b_data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) 	ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) 		      ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) 	if (ext4_has_group_desc_csum(sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) 	    (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) 		ext4_free_group_clusters_set(sb, gdp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) 					     ext4_free_clusters_after_init(sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) 						ac->ac_b_ex.fe_group, gdp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) 	len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) 	ext4_free_group_clusters_set(sb, gdp, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) 	ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) 	ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) 	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) 	percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) 	 * Now reduce the dirty block count also. Should not go negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) 	if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) 		/* release all the reserved blocks if non delalloc */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) 		percpu_counter_sub(&sbi->s_dirtyclusters_counter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) 				   reserv_clstrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) 	if (sbi->s_log_groups_per_flex) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) 		ext4_group_t flex_group = ext4_flex_group(sbi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) 							  ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) 		atomic64_sub(ac->ac_b_ex.fe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) 			     &sbi_array_rcu_deref(sbi, s_flex_groups,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) 						  flex_group)->free_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) 	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) 		goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) 	err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) 	brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311)  * Idempotent helper for Ext4 fast commit replay path to set the state of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312)  * blocks in bitmaps and update counters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) 			int len, int state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) 	struct buffer_head *bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) 	struct ext4_group_desc *gdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) 	struct buffer_head *gdp_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) 	ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) 	ext4_grpblk_t blkoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) 	int i, err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) 	int already;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) 	unsigned int clen, clen_changed, thisgrp_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) 	while (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) 		ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) 		 * Check to see if we are freeing blocks across a group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) 		 * boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) 		 * In case of flex_bg, this can happen that (block, len) may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) 		 * span across more than one group. In that case we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) 		 * get the corresponding group metadata to work with.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) 		 * For this we have goto again loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) 		thisgrp_len = min_t(unsigned int, (unsigned int)len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) 			EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) 		clen = EXT4_NUM_B2C(sbi, thisgrp_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) 		bitmap_bh = ext4_read_block_bitmap(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) 		if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) 			err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) 			bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) 		err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) 		gdp = ext4_get_group_desc(sb, group, &gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) 		if (!gdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) 		ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) 		already = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) 		for (i = 0; i < clen; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) 			if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) 					 !state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) 				already++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) 		clen_changed = clen - already;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) 		if (state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) 			ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) 			mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) 		if (ext4_has_group_desc_csum(sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) 		    (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) 			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) 			ext4_free_group_clusters_set(sb, gdp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) 			     ext4_free_clusters_after_init(sb, group, gdp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) 		if (state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) 			clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) 			clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) 		ext4_free_group_clusters_set(sb, gdp, clen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) 		ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) 		ext4_group_desc_csum_set(sb, group, gdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) 		ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) 		if (sbi->s_log_groups_per_flex) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) 			ext4_group_t flex_group = ext4_flex_group(sbi, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) 			struct flex_groups *fg = sbi_array_rcu_deref(sbi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) 						   s_flex_groups, flex_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) 			if (state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) 				atomic64_sub(clen_changed, &fg->free_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) 				atomic64_add(clen_changed, &fg->free_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) 		err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) 		sync_dirty_buffer(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) 		err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) 		sync_dirty_buffer(gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) 		block += thisgrp_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) 		len -= thisgrp_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) 		brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) 		BUG_ON(len < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) 		brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415)  * here we normalize request for locality group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416)  * Group request are normalized to s_mb_group_prealloc, which goes to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417)  * s_strip if we set the same via mount option.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418)  * s_mb_group_prealloc can be configured via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419)  * /sys/fs/ext4/<partition>/mb_group_prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421)  * XXX: should we try to preallocate more than the group has now?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) 	struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) 	struct ext4_locality_group *lg = ac->ac_lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) 	BUG_ON(lg == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) 	ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) 	mb_debug(sb, "goal %u blocks for locality group\n", ac->ac_g_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434)  * Normalization means making request better in terms of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435)  * size and alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) static noinline_for_stack void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) ext4_mb_normalize_request(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) 				struct ext4_allocation_request *ar)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) 	int bsbits, max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) 	ext4_lblk_t end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) 	loff_t size, start_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) 	loff_t orig_size __maybe_unused;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) 	ext4_lblk_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) 	struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) 	/* do normalize only data requests, metadata requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) 	   do not need preallocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) 	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) 	/* sometime caller may want exact blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) 	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) 	/* caller may indicate that preallocation isn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) 	 * required (it's a tail, for example) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) 	if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) 	if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) 		ext4_mb_normalize_group_request(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) 		return ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) 	bsbits = ac->ac_sb->s_blocksize_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) 	/* first, let's learn actual file size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) 	 * given current request is allocated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) 	size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) 	size = size << bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) 	if (size < i_size_read(ac->ac_inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) 		size = i_size_read(ac->ac_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) 	orig_size = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) 	/* max size of free chunks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) 	max = 2 << bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) #define NRL_CHECK_SIZE(req, size, max, chunk_size)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) 		(req <= (size) || max <= (chunk_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) 	/* first, try to predict filesize */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) 	/* XXX: should this table be tunable? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) 	start_off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) 	if (size <= 16 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) 		size = 16 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) 	} else if (size <= 32 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) 		size = 32 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) 	} else if (size <= 64 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) 		size = 64 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) 	} else if (size <= 128 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) 		size = 128 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) 	} else if (size <= 256 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) 		size = 256 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) 	} else if (size <= 512 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) 		size = 512 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) 	} else if (size <= 1024 * 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) 		size = 1024 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) 	} else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) 		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) 						(21 - bsbits)) << 21;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) 		size = 2 * 1024 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) 	} else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) 		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) 							(22 - bsbits)) << 22;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) 		size = 4 * 1024 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) 	} else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) 					(8<<20)>>bsbits, max, 8 * 1024)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) 		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) 							(23 - bsbits)) << 23;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) 		size = 8 * 1024 * 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) 		start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) 		size	  = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) 					      ac->ac_o_ex.fe_len) << bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) 	size = size >> bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) 	start = start_off >> bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) 	/* don't cover already allocated blocks in selected range */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) 	if (ar->pleft && start <= ar->lleft) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) 		size -= ar->lleft + 1 - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) 		start = ar->lleft + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) 	if (ar->pright && start + size - 1 >= ar->lright)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) 		size -= start + size - ar->lright;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) 	 * Trim allocation request for filesystems with artificially small
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) 	 * groups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) 	if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) 		size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) 	end = start + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) 	/* check we don't cross already preallocated blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) 	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) 		ext4_lblk_t pa_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) 		if (pa->pa_deleted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) 		spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) 		if (pa->pa_deleted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) 			spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) 		pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) 						  pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) 		/* PA must not overlap original request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) 		BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) 			ac->ac_o_ex.fe_logical < pa->pa_lstart));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) 		/* skip PAs this normalized request doesn't overlap with */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) 		if (pa->pa_lstart >= end || pa_end <= start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) 			spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) 		BUG_ON(pa->pa_lstart <= start && pa_end >= end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) 		/* adjust start or end to be adjacent to this pa */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) 		if (pa_end <= ac->ac_o_ex.fe_logical) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) 			BUG_ON(pa_end < start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) 			start = pa_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) 		} else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) 			BUG_ON(pa->pa_lstart > end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) 			end = pa->pa_lstart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) 		spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) 	size = end - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) 	/* XXX: extra loop to check we really don't overlap preallocations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) 	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) 		ext4_lblk_t pa_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) 		spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) 		if (pa->pa_deleted == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) 			pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) 							  pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) 			BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) 		spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) 	if (start + size <= ac->ac_o_ex.fe_logical &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) 			start > ac->ac_o_ex.fe_logical) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) 		ext4_msg(ac->ac_sb, KERN_ERR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) 			 "start %lu, size %lu, fe_logical %lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) 			 (unsigned long) start, (unsigned long) size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) 			 (unsigned long) ac->ac_o_ex.fe_logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) 	BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) 	/* now prepare goal request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) 	/* XXX: is it better to align blocks WRT to logical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) 	 * placement or satisfy big request as is */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) 	ac->ac_g_ex.fe_logical = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) 	ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) 	/* define goal start in order to merge */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) 	if (ar->pright && (ar->lright == (start + size))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) 		/* merge to the right */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) 		ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) 						&ac->ac_f_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) 						&ac->ac_f_ex.fe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) 		ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) 	if (ar->pleft && (ar->lleft + 1 == start)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) 		/* merge to the left */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) 		ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) 						&ac->ac_f_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) 						&ac->ac_f_ex.fe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) 		ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) 	mb_debug(ac->ac_sb, "goal: %lld(was %lld) blocks at %u\n", size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) 		 orig_size, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) 	if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) 		atomic_inc(&sbi->s_bal_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) 		atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) 		if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) 			atomic_inc(&sbi->s_bal_success);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) 		atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) 		if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) 				ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) 			atomic_inc(&sbi->s_bal_goals);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) 		if (ac->ac_found > sbi->s_mb_max_to_scan)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) 			atomic_inc(&sbi->s_bal_breaks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) 	if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) 		trace_ext4_mballoc_alloc(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) 		trace_ext4_mballoc_prealloc(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656)  * Called on failure; free up any blocks from the inode PA for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657)  * context.  We don't need this for MB_GROUP_PA because we only change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658)  * pa_free in ext4_mb_release_context(), but on failure, we've already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659)  * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) 	struct ext4_prealloc_space *pa = ac->ac_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) 	struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) 	if (pa == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) 		if (ac->ac_f_ex.fe_len == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) 		err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) 		if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) 			 * This should never happen since we pin the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) 			 * pages in the ext4_allocation_context so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) 			 * ext4_mb_load_buddy() should never fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) 			WARN(1, "mb_load_buddy failed (%d)", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) 		ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) 		mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) 			       ac->ac_f_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) 		ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) 		ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) 	if (pa->pa_type == MB_INODE_PA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) 		pa->pa_free += ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692)  * use blocks preallocated to inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) 				struct ext4_prealloc_space *pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) 	ext4_fsblk_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) 	ext4_fsblk_t end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) 	int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) 	/* found preallocated blocks, use them */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) 	start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) 	end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) 		  start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) 	len = EXT4_NUM_B2C(sbi, end - start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) 	ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) 					&ac->ac_b_ex.fe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) 	ac->ac_b_ex.fe_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) 	ac->ac_status = AC_STATUS_FOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) 	ac->ac_pa = pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) 	BUG_ON(start < pa->pa_pstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) 	BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) 	BUG_ON(pa->pa_free < len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) 	pa->pa_free -= len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) 	mb_debug(ac->ac_sb, "use %llu/%d from inode pa %p\n", start, len, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722)  * use blocks preallocated to locality group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) 				struct ext4_prealloc_space *pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) 	unsigned int len = ac->ac_o_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) 	ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) 					&ac->ac_b_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) 					&ac->ac_b_ex.fe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) 	ac->ac_b_ex.fe_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) 	ac->ac_status = AC_STATUS_FOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) 	ac->ac_pa = pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) 	/* we don't correct pa_pstart or pa_plen here to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) 	 * possible race when the group is being loaded concurrently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) 	 * instead we correct pa later, after blocks are marked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) 	 * in on-disk bitmap -- see ext4_mb_release_context()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) 	 * Other CPUs are prevented from allocating from this pa by lg_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) 	mb_debug(ac->ac_sb, "use %u/%u from group pa %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) 		 pa->pa_lstart-len, len, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747)  * Return the prealloc space that have minimal distance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748)  * from the goal block. @cpa is the prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749)  * space that is having currently known minimal distance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750)  * from the goal block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) static struct ext4_prealloc_space *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) 			struct ext4_prealloc_space *pa,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) 			struct ext4_prealloc_space *cpa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) 	ext4_fsblk_t cur_distance, new_distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) 	if (cpa == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) 		atomic_inc(&pa->pa_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) 		return pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) 	cur_distance = abs(goal_block - cpa->pa_pstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) 	new_distance = abs(goal_block - pa->pa_pstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) 	if (cur_distance <= new_distance)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) 		return cpa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) 	/* drop the previous reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) 	atomic_dec(&cpa->pa_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) 	atomic_inc(&pa->pa_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) 	return pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776)  * search goal blocks in preallocated space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) static noinline_for_stack bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) 	int order, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) 	struct ext4_locality_group *lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) 	struct ext4_prealloc_space *pa, *cpa = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) 	ext4_fsblk_t goal_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) 	/* only data can be preallocated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) 	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) 	/* first, try per-file preallocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) 	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) 		/* all fields in this condition don't change,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) 		 * so we can skip locking for them */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) 		if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) 		    ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) 					       EXT4_C2B(sbi, pa->pa_len)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) 		/* non-extent files can't have physical blocks past 2^32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) 		if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) 		    (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) 		     EXT4_MAX_BLOCK_FILE_PHYS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) 		/* found preallocated blocks, use them */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) 		spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) 		if (pa->pa_deleted == 0 && pa->pa_free) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) 			atomic_inc(&pa->pa_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) 			ext4_mb_use_inode_pa(ac, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) 			spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) 			ac->ac_criteria = 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) 			rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) 		spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) 	/* can we use group allocation? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) 	if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) 	/* inode may have no locality group for some reason */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) 	lg = ac->ac_lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) 	if (lg == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) 	order  = fls(ac->ac_o_ex.fe_len) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) 	if (order > PREALLOC_TB_SIZE - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) 		/* The max size of hash table is PREALLOC_TB_SIZE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) 		order = PREALLOC_TB_SIZE - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) 	goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) 	 * search for the prealloc space that is having
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) 	 * minimal distance from the goal block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) 	for (i = order; i < PREALLOC_TB_SIZE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) 		list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) 					pa_inode_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) 			spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) 			if (pa->pa_deleted == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) 					pa->pa_free >= ac->ac_o_ex.fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) 				cpa = ext4_mb_check_group_pa(goal_block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) 								pa, cpa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) 			spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) 	if (cpa) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) 		ext4_mb_use_group_pa(ac, cpa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) 		ac->ac_criteria = 20;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865)  * the function goes through all block freed in the group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866)  * but not yet committed and marks them used in in-core bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867)  * buddy must be generated from this bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868)  * Need to be called with the ext4 group lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) 						ext4_group_t group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) 	struct rb_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) 	struct ext4_group_info *grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) 	struct ext4_free_data *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) 	grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) 	n = rb_first(&(grp->bb_free_root));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) 	while (n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) 		entry = rb_entry(n, struct ext4_free_data, efd_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) 		ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) 		n = rb_next(n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889)  * the function goes through all preallocation in this group and marks them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890)  * used in in-core bitmap. buddy must be generated from this bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891)  * Need to be called with ext4 group lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) 					ext4_group_t group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) 	struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) 	struct list_head *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) 	ext4_group_t groupnr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) 	ext4_grpblk_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) 	int preallocated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) 	int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) 	/* all form of preallocation discards first load group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) 	 * so the only competing code is preallocation use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) 	 * we don't need any locking here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) 	 * notice we do NOT ignore preallocations with pa_deleted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) 	 * otherwise we could leave used blocks available for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) 	 * allocation in buddy when concurrent ext4_mb_put_pa()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) 	 * is dropping preallocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) 	list_for_each(cur, &grp->bb_prealloc_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) 		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) 		spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) 		ext4_get_group_no_and_offset(sb, pa->pa_pstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) 					     &groupnr, &start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) 		len = pa->pa_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) 		spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) 		if (unlikely(len == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) 		BUG_ON(groupnr != group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) 		ext4_set_bits(bitmap, start, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) 		preallocated += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) 	mb_debug(sb, "preallocated %d for group %u\n", preallocated, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) static void ext4_mb_mark_pa_deleted(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) 				    struct ext4_prealloc_space *pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) 	struct ext4_inode_info *ei;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) 	if (pa->pa_deleted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) 		ext4_warning(sb, "deleted pa, type:%d, pblk:%llu, lblk:%u, len:%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936) 			     pa->pa_type, pa->pa_pstart, pa->pa_lstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) 			     pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) 	pa->pa_deleted = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) 	if (pa->pa_type == MB_INODE_PA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) 		ei = EXT4_I(pa->pa_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) 		atomic_dec(&ei->i_prealloc_active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) static void ext4_mb_pa_callback(struct rcu_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) 	struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) 	pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) 	BUG_ON(atomic_read(&pa->pa_count));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) 	BUG_ON(pa->pa_deleted == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) 	kmem_cache_free(ext4_pspace_cachep, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960)  * drops a reference to preallocated space descriptor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961)  * if this was the last reference and the space is consumed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) 			struct super_block *sb, struct ext4_prealloc_space *pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966) 	ext4_group_t grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) 	ext4_fsblk_t grp_blk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) 	/* in this short window concurrent discard can set pa_deleted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) 	spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) 	if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) 		spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) 	if (pa->pa_deleted == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) 		spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) 	ext4_mb_mark_pa_deleted(sb, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) 	spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) 	grp_blk = pa->pa_pstart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) 	 * If doing group-based preallocation, pa_pstart may be in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) 	 * next group when pa is used up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) 	if (pa->pa_type == MB_GROUP_PA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) 		grp_blk--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992) 	grp = ext4_get_group_number(sb, grp_blk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) 	 * possible race:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997) 	 *  P1 (buddy init)			P2 (regular allocation)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) 	 *					find block B in PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) 	 *  copy on-disk bitmap to buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) 	 *  					mark B in on-disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) 	 *					drop PA from group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) 	 *  mark all PAs in buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) 	 * thus, P1 initializes buddy with B available. to prevent this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) 	 * we make "copy" and "mark all PAs" atomic and serialize "drop PA"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) 	 * against that pair
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) 	ext4_lock_group(sb, grp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) 	list_del(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) 	ext4_unlock_group(sb, grp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012) 	spin_lock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) 	list_del_rcu(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) 	spin_unlock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) 	call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020)  * creates new preallocated space for given inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022) static noinline_for_stack void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) 	struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) 	struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) 	struct ext4_group_info *grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) 	struct ext4_inode_info *ei;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) 	/* preallocate only when found space is larger then requested */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) 	BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) 	BUG_ON(ac->ac_status != AC_STATUS_FOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) 	BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) 	BUG_ON(ac->ac_pa == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) 	pa = ac->ac_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) 	if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) 		int winl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) 		int wins;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) 		int win;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) 		int offs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) 		/* we can't allocate as much as normalizer wants.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) 		 * so, found space must get proper lstart
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) 		 * to cover original request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) 		BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) 		BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) 		/* we're limited by original request in that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) 		 * logical block must be covered any way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) 		 * winl is window we can move our chunk within */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) 		winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) 		/* also, we should cover whole original request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) 		wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059) 		/* the smallest one defines real window */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) 		win = min(winl, wins);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) 		offs = ac->ac_o_ex.fe_logical %
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) 			EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) 		if (offs && offs < win)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) 			win = offs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) 		ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) 			EXT4_NUM_B2C(sbi, win);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) 		BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) 		BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) 	/* preallocation can change ac_b_ex, thus we store actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) 	 * allocated blocks for history */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) 	ac->ac_f_ex = ac->ac_b_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) 	pa->pa_lstart = ac->ac_b_ex.fe_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078) 	pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) 	pa->pa_len = ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) 	pa->pa_free = pa->pa_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081) 	spin_lock_init(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082) 	INIT_LIST_HEAD(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) 	INIT_LIST_HEAD(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) 	pa->pa_deleted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085) 	pa->pa_type = MB_INODE_PA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) 	mb_debug(sb, "new inode pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088) 		 pa->pa_len, pa->pa_lstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) 	trace_ext4_mb_new_inode_pa(ac, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) 	ext4_mb_use_inode_pa(ac, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092) 	atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) 	ei = EXT4_I(ac->ac_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) 	grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) 	pa->pa_obj_lock = &ei->i_prealloc_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) 	pa->pa_inode = ac->ac_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) 	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) 	spin_lock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103) 	list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104) 	spin_unlock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) 	atomic_inc(&ei->i_prealloc_active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109)  * creates new preallocated space for locality group inodes belongs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) static noinline_for_stack void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114) 	struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115) 	struct ext4_locality_group *lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) 	struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) 	struct ext4_group_info *grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) 	/* preallocate only when found space is larger then requested */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120) 	BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121) 	BUG_ON(ac->ac_status != AC_STATUS_FOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) 	BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123) 	BUG_ON(ac->ac_pa == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125) 	pa = ac->ac_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127) 	/* preallocation can change ac_b_ex, thus we store actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128) 	 * allocated blocks for history */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129) 	ac->ac_f_ex = ac->ac_b_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131) 	pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) 	pa->pa_lstart = pa->pa_pstart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) 	pa->pa_len = ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134) 	pa->pa_free = pa->pa_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135) 	spin_lock_init(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136) 	INIT_LIST_HEAD(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) 	INIT_LIST_HEAD(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) 	pa->pa_deleted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) 	pa->pa_type = MB_GROUP_PA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141) 	mb_debug(sb, "new group pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) 		 pa->pa_len, pa->pa_lstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143) 	trace_ext4_mb_new_group_pa(ac, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) 	ext4_mb_use_group_pa(ac, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146) 	atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) 	grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149) 	lg = ac->ac_lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150) 	BUG_ON(lg == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152) 	pa->pa_obj_lock = &lg->lg_prealloc_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) 	pa->pa_inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) 	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) 	 * We will later add the new pa to the right bucket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) 	 * after updating the pa_free in ext4_mb_release_context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163) static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165) 	if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166) 		ext4_mb_new_group_pa(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168) 		ext4_mb_new_inode_pa(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172)  * finds all unused blocks in on-disk bitmap, frees them in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173)  * in-core bitmap and buddy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174)  * @pa must be unlinked from inode and group lists, so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175)  * nobody else can find/use it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176)  * the caller MUST hold group/inode locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177)  * TODO: optimize the case when there are no in-core structures yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180) ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) 			struct ext4_prealloc_space *pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) 	struct super_block *sb = e4b->bd_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185) 	unsigned int end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) 	unsigned int next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187) 	ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) 	ext4_grpblk_t bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189) 	unsigned long long grp_blk_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190) 	int free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192) 	BUG_ON(pa->pa_deleted == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) 	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) 	grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195) 	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196) 	end = bit + pa->pa_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198) 	while (bit < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) 		bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) 		if (bit >= end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202) 		next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) 		mb_debug(sb, "free preallocated %u/%u in group %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) 			 (unsigned) ext4_group_first_block_no(sb, group) + bit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205) 			 (unsigned) next - bit, (unsigned) group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) 		free += next - bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208) 		trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) 		trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) 						    EXT4_C2B(sbi, bit)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) 					       next - bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) 		mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213) 		bit = next + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) 	if (free != pa->pa_free) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216) 		ext4_msg(e4b->bd_sb, KERN_CRIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217) 			 "pa %p: logic %lu, phys. %lu, len %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218) 			 pa, (unsigned long) pa->pa_lstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) 			 (unsigned long) pa->pa_pstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220) 			 pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221) 		ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222) 					free, pa->pa_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224) 		 * pa is already deleted so we use the value obtained
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225) 		 * from the bitmap and continue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) 	atomic_add(free, &sbi->s_mb_discarded);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) ext4_mb_release_group_pa(struct ext4_buddy *e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) 				struct ext4_prealloc_space *pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) 	struct super_block *sb = e4b->bd_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) 	ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) 	ext4_grpblk_t bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) 	trace_ext4_mb_release_group_pa(sb, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) 	BUG_ON(pa->pa_deleted == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) 	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) 	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245) 	mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246) 	atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) 	trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253)  * releases all preallocations in given group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255)  * first, we need to decide discard policy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256)  * - when do we discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257)  *   1) ENOSPC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258)  * - how many do we discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259)  *   1) how many requested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) ext4_mb_discard_group_preallocations(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263) 				     ext4_group_t group, int *busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266) 	struct buffer_head *bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) 	struct ext4_prealloc_space *pa, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268) 	struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269) 	struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271) 	int free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273) 	mb_debug(sb, "discard preallocation for group %u\n", group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274) 	if (list_empty(&grp->bb_prealloc_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) 		goto out_dbg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) 	bitmap_bh = ext4_read_block_bitmap(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) 	if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279) 		err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) 		ext4_error_err(sb, -err,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) 			       "Error %d reading block bitmap for %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) 			       err, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) 		goto out_dbg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) 	err = ext4_mb_load_buddy(sb, group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287) 	if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) 		ext4_warning(sb, "Error %d loading buddy information for %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) 			     err, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290) 		put_bh(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) 		goto out_dbg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) 	INIT_LIST_HEAD(&list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) 	ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296) 	list_for_each_entry_safe(pa, tmp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) 				&grp->bb_prealloc_list, pa_group_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) 		spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) 		if (atomic_read(&pa->pa_count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) 			spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301) 			*busy = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304) 		if (pa->pa_deleted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305) 			spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) 		/* seems this one can be freed ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310) 		ext4_mb_mark_pa_deleted(sb, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) 		if (!free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313) 			this_cpu_inc(discard_pa_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315) 		/* we can trust pa_free ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316) 		free += pa->pa_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318) 		spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320) 		list_del(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321) 		list_add(&pa->u.pa_tmp_list, &list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324) 	/* now free all selected PAs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325) 	list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327) 		/* remove from object (inode or locality group) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) 		spin_lock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329) 		list_del_rcu(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330) 		spin_unlock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332) 		if (pa->pa_type == MB_GROUP_PA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333) 			ext4_mb_release_group_pa(&e4b, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335) 			ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337) 		list_del(&pa->u.pa_tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338) 		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) 	ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342) 	ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) 	put_bh(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344) out_dbg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345) 	mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) 		 free, group, grp->bb_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347) 	return free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4350) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4351)  * releases all non-used preallocated blocks for given inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4352)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4353)  * It's important to discard preallocations under i_data_sem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4354)  * We don't want another block to be served from the prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4355)  * space when we are discarding the inode prealloc space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4356)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4357)  * FIXME!! Make sure it is valid at all the call sites
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4358)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4359) void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4360) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4361) 	struct ext4_inode_info *ei = EXT4_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4362) 	struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4363) 	struct buffer_head *bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4364) 	struct ext4_prealloc_space *pa, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4365) 	ext4_group_t group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4366) 	struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4367) 	struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4368) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4370) 	if (!S_ISREG(inode->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4371) 		/*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4372) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4373) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4375) 	if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4376) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4378) 	mb_debug(sb, "discard preallocation for inode %lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4379) 		 inode->i_ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4380) 	trace_ext4_discard_preallocations(inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4381) 			atomic_read(&ei->i_prealloc_active), needed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4383) 	INIT_LIST_HEAD(&list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4385) 	if (needed == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4386) 		needed = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4388) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4389) 	/* first, collect all pa's in the inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4390) 	spin_lock(&ei->i_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4391) 	while (!list_empty(&ei->i_prealloc_list) && needed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4392) 		pa = list_entry(ei->i_prealloc_list.prev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4393) 				struct ext4_prealloc_space, pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4394) 		BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4395) 		spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4396) 		if (atomic_read(&pa->pa_count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4397) 			/* this shouldn't happen often - nobody should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4398) 			 * use preallocation while we're discarding it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4399) 			spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4400) 			spin_unlock(&ei->i_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4401) 			ext4_msg(sb, KERN_ERR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4402) 				 "uh-oh! used pa while discarding");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4403) 			WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4404) 			schedule_timeout_uninterruptible(HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4405) 			goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4407) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4408) 		if (pa->pa_deleted == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4409) 			ext4_mb_mark_pa_deleted(sb, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4410) 			spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4411) 			list_del_rcu(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4412) 			list_add(&pa->u.pa_tmp_list, &list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4413) 			needed--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4414) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4415) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4417) 		/* someone is deleting pa right now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4418) 		spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4419) 		spin_unlock(&ei->i_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4421) 		/* we have to wait here because pa_deleted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4422) 		 * doesn't mean pa is already unlinked from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4423) 		 * the list. as we might be called from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4424) 		 * ->clear_inode() the inode will get freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4425) 		 * and concurrent thread which is unlinking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4426) 		 * pa from inode's list may access already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4427) 		 * freed memory, bad-bad-bad */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4429) 		/* XXX: if this happens too often, we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4430) 		 * add a flag to force wait only in case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4431) 		 * of ->clear_inode(), but not in case of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4432) 		 * regular truncate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4433) 		schedule_timeout_uninterruptible(HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4434) 		goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4435) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4436) 	spin_unlock(&ei->i_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4438) 	list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4439) 		BUG_ON(pa->pa_type != MB_INODE_PA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4440) 		group = ext4_get_group_number(sb, pa->pa_pstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4442) 		err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4443) 					     GFP_NOFS|__GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4444) 		if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4445) 			ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4446) 				       err, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4447) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4448) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4450) 		bitmap_bh = ext4_read_block_bitmap(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4451) 		if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4452) 			err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4453) 			ext4_error_err(sb, -err, "Error %d reading block bitmap for %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4454) 				       err, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4455) 			ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4456) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4457) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4459) 		ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4460) 		list_del(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4461) 		ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4462) 		ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4464) 		ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4465) 		put_bh(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4467) 		list_del(&pa->u.pa_tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4468) 		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4469) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4472) static int ext4_mb_pa_alloc(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4473) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4474) 	struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4475) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4476) 	BUG_ON(ext4_pspace_cachep == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4477) 	pa = kmem_cache_zalloc(ext4_pspace_cachep, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4478) 	if (!pa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4479) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4480) 	atomic_set(&pa->pa_count, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4481) 	ac->ac_pa = pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4482) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4485) static void ext4_mb_pa_free(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4486) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4487) 	struct ext4_prealloc_space *pa = ac->ac_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4489) 	BUG_ON(!pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4490) 	ac->ac_pa = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4491) 	WARN_ON(!atomic_dec_and_test(&pa->pa_count));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4492) 	kmem_cache_free(ext4_pspace_cachep, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4493) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4495) #ifdef CONFIG_EXT4_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4496) static inline void ext4_mb_show_pa(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4497) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4498) 	ext4_group_t i, ngroups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4499) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4500) 	if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4501) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4503) 	ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4504) 	mb_debug(sb, "groups: ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4505) 	for (i = 0; i < ngroups; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4506) 		struct ext4_group_info *grp = ext4_get_group_info(sb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4507) 		struct ext4_prealloc_space *pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4508) 		ext4_grpblk_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4509) 		struct list_head *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4510) 		ext4_lock_group(sb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4511) 		list_for_each(cur, &grp->bb_prealloc_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4512) 			pa = list_entry(cur, struct ext4_prealloc_space,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4513) 					pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4514) 			spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4515) 			ext4_get_group_no_and_offset(sb, pa->pa_pstart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4516) 						     NULL, &start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4517) 			spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4518) 			mb_debug(sb, "PA:%u:%d:%d\n", i, start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4519) 				 pa->pa_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4520) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4521) 		ext4_unlock_group(sb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4522) 		mb_debug(sb, "%u: %d/%d\n", i, grp->bb_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4523) 			 grp->bb_fragments);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4524) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4527) static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4529) 	struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4531) 	if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4532) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4533) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4534) 	mb_debug(sb, "Can't allocate:"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4535) 			" Allocation context details:");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4536) 	mb_debug(sb, "status %u flags 0x%x",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4537) 			ac->ac_status, ac->ac_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4538) 	mb_debug(sb, "orig %lu/%lu/%lu@%lu, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4539) 			"goal %lu/%lu/%lu@%lu, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4540) 			"best %lu/%lu/%lu@%lu cr %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4541) 			(unsigned long)ac->ac_o_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4542) 			(unsigned long)ac->ac_o_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4543) 			(unsigned long)ac->ac_o_ex.fe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4544) 			(unsigned long)ac->ac_o_ex.fe_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4545) 			(unsigned long)ac->ac_g_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4546) 			(unsigned long)ac->ac_g_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4547) 			(unsigned long)ac->ac_g_ex.fe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4548) 			(unsigned long)ac->ac_g_ex.fe_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4549) 			(unsigned long)ac->ac_b_ex.fe_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4550) 			(unsigned long)ac->ac_b_ex.fe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4551) 			(unsigned long)ac->ac_b_ex.fe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4552) 			(unsigned long)ac->ac_b_ex.fe_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4553) 			(int)ac->ac_criteria);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4554) 	mb_debug(sb, "%u found", ac->ac_found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4555) 	ext4_mb_show_pa(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4557) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4558) static inline void ext4_mb_show_pa(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4559) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4560) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4562) static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4563) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4564) 	ext4_mb_show_pa(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4565) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4567) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4569) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4570)  * We use locality group preallocation for small size file. The size of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4571)  * file is determined by the current size or the resulting size after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4572)  * allocation which ever is larger
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4573)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4574)  * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4575)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4576) static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4577) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4578) 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4579) 	int bsbits = ac->ac_sb->s_blocksize_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4580) 	loff_t size, isize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4581) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4582) 	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4583) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4585) 	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4586) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4588) 	size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4589) 	isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4590) 		>> bsbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4592) 	if ((size == isize) && !ext4_fs_is_busy(sbi) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4593) 	    !inode_is_open_for_write(ac->ac_inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4594) 		ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4595) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4596) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4598) 	if (sbi->s_mb_group_prealloc <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4599) 		ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4600) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4601) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4603) 	/* don't use group allocation for large files */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4604) 	size = max(size, isize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4605) 	if (size > sbi->s_mb_stream_request) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4606) 		ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4607) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4608) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4610) 	BUG_ON(ac->ac_lg != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4611) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4612) 	 * locality group prealloc space are per cpu. The reason for having
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4613) 	 * per cpu locality group is to reduce the contention between block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4614) 	 * request from multiple CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4615) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4616) 	ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4618) 	/* we're going to use group allocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4619) 	ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4620) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4621) 	/* serialize all allocations in the group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4622) 	mutex_lock(&ac->ac_lg->lg_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4625) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4626) ext4_mb_initialize_context(struct ext4_allocation_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4627) 				struct ext4_allocation_request *ar)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4628) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4629) 	struct super_block *sb = ar->inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4630) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4631) 	struct ext4_super_block *es = sbi->s_es;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4632) 	ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4633) 	unsigned int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4634) 	ext4_fsblk_t goal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4635) 	ext4_grpblk_t block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4637) 	/* we can't allocate > group size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4638) 	len = ar->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4640) 	/* just a dirty hack to filter too big requests  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4641) 	if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4642) 		len = EXT4_CLUSTERS_PER_GROUP(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4644) 	/* start searching from the goal */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4645) 	goal = ar->goal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4646) 	if (goal < le32_to_cpu(es->s_first_data_block) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4647) 			goal >= ext4_blocks_count(es))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4648) 		goal = le32_to_cpu(es->s_first_data_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4649) 	ext4_get_group_no_and_offset(sb, goal, &group, &block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4651) 	/* set up allocation goals */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4652) 	ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4653) 	ac->ac_status = AC_STATUS_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4654) 	ac->ac_sb = sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4655) 	ac->ac_inode = ar->inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4656) 	ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4657) 	ac->ac_o_ex.fe_group = group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4658) 	ac->ac_o_ex.fe_start = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4659) 	ac->ac_o_ex.fe_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4660) 	ac->ac_g_ex = ac->ac_o_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4661) 	ac->ac_flags = ar->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4663) 	/* we have to define context: we'll work with a file or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4664) 	 * locality group. this is a policy, actually */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4665) 	ext4_mb_group_or_file(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4667) 	mb_debug(sb, "init ac: %u blocks @ %u, goal %u, flags 0x%x, 2^%d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4668) 			"left: %u/%u, right %u/%u to %swritable\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4669) 			(unsigned) ar->len, (unsigned) ar->logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4670) 			(unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4671) 			(unsigned) ar->lleft, (unsigned) ar->pleft,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4672) 			(unsigned) ar->lright, (unsigned) ar->pright,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4673) 			inode_is_open_for_write(ar->inode) ? "" : "non-");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4674) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4678) static noinline_for_stack void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4679) ext4_mb_discard_lg_preallocations(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4680) 					struct ext4_locality_group *lg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4681) 					int order, int total_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4682) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4683) 	ext4_group_t group = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4684) 	struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4685) 	struct list_head discard_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4686) 	struct ext4_prealloc_space *pa, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4688) 	mb_debug(sb, "discard locality group preallocation\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4690) 	INIT_LIST_HEAD(&discard_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4691) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4692) 	spin_lock(&lg->lg_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4693) 	list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4694) 				pa_inode_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4695) 				lockdep_is_held(&lg->lg_prealloc_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4696) 		spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4697) 		if (atomic_read(&pa->pa_count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4698) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4699) 			 * This is the pa that we just used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4700) 			 * for block allocation. So don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4701) 			 * free that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4702) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4703) 			spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4704) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4705) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4706) 		if (pa->pa_deleted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4707) 			spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4708) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4709) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4710) 		/* only lg prealloc space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4711) 		BUG_ON(pa->pa_type != MB_GROUP_PA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4713) 		/* seems this one can be freed ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4714) 		ext4_mb_mark_pa_deleted(sb, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4715) 		spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4717) 		list_del_rcu(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4718) 		list_add(&pa->u.pa_tmp_list, &discard_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4720) 		total_entries--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4721) 		if (total_entries <= 5) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4722) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4723) 			 * we want to keep only 5 entries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4724) 			 * allowing it to grow to 8. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4725) 			 * mak sure we don't call discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4726) 			 * soon for this list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4727) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4728) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4729) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4730) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4731) 	spin_unlock(&lg->lg_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4733) 	list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4734) 		int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4736) 		group = ext4_get_group_number(sb, pa->pa_pstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4737) 		err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4738) 					     GFP_NOFS|__GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4739) 		if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4740) 			ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4741) 				       err, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4742) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4743) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4744) 		ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4745) 		list_del(&pa->pa_group_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4746) 		ext4_mb_release_group_pa(&e4b, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4747) 		ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4749) 		ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4750) 		list_del(&pa->u.pa_tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4751) 		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4752) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4755) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4756)  * We have incremented pa_count. So it cannot be freed at this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4757)  * point. Also we hold lg_mutex. So no parallel allocation is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4758)  * possible from this lg. That means pa_free cannot be updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4759)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4760)  * A parallel ext4_mb_discard_group_preallocations is possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4761)  * which can cause the lg_prealloc_list to be updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4762)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4764) static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4765) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4766) 	int order, added = 0, lg_prealloc_count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4767) 	struct super_block *sb = ac->ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4768) 	struct ext4_locality_group *lg = ac->ac_lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4769) 	struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4771) 	order = fls(pa->pa_free) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4772) 	if (order > PREALLOC_TB_SIZE - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4773) 		/* The max size of hash table is PREALLOC_TB_SIZE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4774) 		order = PREALLOC_TB_SIZE - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4775) 	/* Add the prealloc space to lg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4776) 	spin_lock(&lg->lg_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4777) 	list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4778) 				pa_inode_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4779) 				lockdep_is_held(&lg->lg_prealloc_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4780) 		spin_lock(&tmp_pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4781) 		if (tmp_pa->pa_deleted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4782) 			spin_unlock(&tmp_pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4783) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4784) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4785) 		if (!added && pa->pa_free < tmp_pa->pa_free) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4786) 			/* Add to the tail of the previous entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4787) 			list_add_tail_rcu(&pa->pa_inode_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4788) 						&tmp_pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4789) 			added = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4790) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4791) 			 * we want to count the total
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4792) 			 * number of entries in the list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4793) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4794) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4795) 		spin_unlock(&tmp_pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4796) 		lg_prealloc_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4797) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4798) 	if (!added)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4799) 		list_add_tail_rcu(&pa->pa_inode_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4800) 					&lg->lg_prealloc_list[order]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4801) 	spin_unlock(&lg->lg_prealloc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4803) 	/* Now trim the list to be not more than 8 elements */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4804) 	if (lg_prealloc_count > 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4805) 		ext4_mb_discard_lg_preallocations(sb, lg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4806) 						  order, lg_prealloc_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4807) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4808) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4809) 	return ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4812) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4813)  * if per-inode prealloc list is too long, trim some PA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4814)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4815) static void ext4_mb_trim_inode_pa(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4816) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4817) 	struct ext4_inode_info *ei = EXT4_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4818) 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4819) 	int count, delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4821) 	count = atomic_read(&ei->i_prealloc_active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4822) 	delta = (sbi->s_mb_max_inode_prealloc >> 2) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4823) 	if (count > sbi->s_mb_max_inode_prealloc + delta) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4824) 		count -= sbi->s_mb_max_inode_prealloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4825) 		ext4_discard_preallocations(inode, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4826) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4828) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4829) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4830)  * release all resource we used in allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4831)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4832) static int ext4_mb_release_context(struct ext4_allocation_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4833) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4834) 	struct inode *inode = ac->ac_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4835) 	struct ext4_inode_info *ei = EXT4_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4836) 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4837) 	struct ext4_prealloc_space *pa = ac->ac_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4838) 	if (pa) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4839) 		if (pa->pa_type == MB_GROUP_PA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4840) 			/* see comment in ext4_mb_use_group_pa() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4841) 			spin_lock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4842) 			pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4843) 			pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4844) 			pa->pa_free -= ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4845) 			pa->pa_len -= ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4846) 			spin_unlock(&pa->pa_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4847) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4848) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4849) 			 * We want to add the pa to the right bucket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4850) 			 * Remove it from the list and while adding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4851) 			 * make sure the list to which we are adding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4852) 			 * doesn't grow big.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4853) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4854) 			if (likely(pa->pa_free)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4855) 				spin_lock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4856) 				list_del_rcu(&pa->pa_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4857) 				spin_unlock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4858) 				ext4_mb_add_n_trim(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4859) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4860) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4862) 		if (pa->pa_type == MB_INODE_PA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4863) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4864) 			 * treat per-inode prealloc list as a lru list, then try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4865) 			 * to trim the least recently used PA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4866) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4867) 			spin_lock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4868) 			list_move(&pa->pa_inode_list, &ei->i_prealloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4869) 			spin_unlock(pa->pa_obj_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4870) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4871) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4872) 		ext4_mb_put_pa(ac, ac->ac_sb, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4873) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4874) 	if (ac->ac_bitmap_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4875) 		put_page(ac->ac_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4876) 	if (ac->ac_buddy_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4877) 		put_page(ac->ac_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4878) 	if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4879) 		mutex_unlock(&ac->ac_lg->lg_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4880) 	ext4_mb_collect_stats(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4881) 	ext4_mb_trim_inode_pa(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4882) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4884) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4885) static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4886) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4887) 	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4888) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4889) 	int freed = 0, busy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4890) 	int retry = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4891) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4892) 	trace_ext4_mb_discard_preallocations(sb, needed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4894) 	if (needed == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4895) 		needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4896)  repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4897) 	for (i = 0; i < ngroups && needed > 0; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4898) 		ret = ext4_mb_discard_group_preallocations(sb, i, &busy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4899) 		freed += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4900) 		needed -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4901) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4902) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4904) 	if (needed > 0 && busy && ++retry < 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4905) 		busy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4906) 		goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4907) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4909) 	return freed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4910) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4911) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4912) static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4913) 			struct ext4_allocation_context *ac, u64 *seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4914) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4915) 	int freed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4916) 	u64 seq_retry = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4917) 	bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4919) 	freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4920) 	if (freed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4921) 		ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4922) 		goto out_dbg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4923) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4924) 	seq_retry = ext4_get_discard_pa_seq_sum();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4925) 	if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_retry != *seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4926) 		ac->ac_flags |= EXT4_MB_STRICT_CHECK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4927) 		*seq = seq_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4928) 		ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4929) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4931) out_dbg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4932) 	mb_debug(sb, "freed %d, retry ? %s\n", freed, ret ? "yes" : "no");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4933) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4936) static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4937) 				struct ext4_allocation_request *ar, int *errp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4938) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4939) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4940)  * Main entry point into mballoc to allocate blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4941)  * it tries to use preallocation first, then falls back
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4942)  * to usual allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4943)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4944) ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4945) 				struct ext4_allocation_request *ar, int *errp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4946) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4947) 	struct ext4_allocation_context *ac = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4948) 	struct ext4_sb_info *sbi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4949) 	struct super_block *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4950) 	ext4_fsblk_t block = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4951) 	unsigned int inquota = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4952) 	unsigned int reserv_clstrs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4953) 	u64 seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4955) 	might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4956) 	sb = ar->inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4957) 	sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4958) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4959) 	trace_ext4_request_blocks(ar);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4960) 	if (sbi->s_mount_state & EXT4_FC_REPLAY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4961) 		return ext4_mb_new_blocks_simple(handle, ar, errp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4963) 	/* Allow to use superuser reservation for quota file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4964) 	if (ext4_is_quota_file(ar->inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4965) 		ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4966) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4967) 	if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4968) 		/* Without delayed allocation we need to verify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4969) 		 * there is enough free blocks to do block allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4970) 		 * and verify allocation doesn't exceed the quota limits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4971) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4972) 		while (ar->len &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4973) 			ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4975) 			/* let others to free the space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4976) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4977) 			ar->len = ar->len >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4978) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4979) 		if (!ar->len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4980) 			ext4_mb_show_pa(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4981) 			*errp = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4982) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4983) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4984) 		reserv_clstrs = ar->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4985) 		if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4986) 			dquot_alloc_block_nofail(ar->inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4987) 						 EXT4_C2B(sbi, ar->len));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4988) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4989) 			while (ar->len &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4990) 				dquot_alloc_block(ar->inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4991) 						  EXT4_C2B(sbi, ar->len))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4993) 				ar->flags |= EXT4_MB_HINT_NOPREALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4994) 				ar->len--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4995) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4996) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4997) 		inquota = ar->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4998) 		if (ar->len == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4999) 			*errp = -EDQUOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5000) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5001) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5002) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5004) 	ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5005) 	if (!ac) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5006) 		ar->len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5007) 		*errp = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5008) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5009) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5010) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5011) 	*errp = ext4_mb_initialize_context(ac, ar);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5012) 	if (*errp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5013) 		ar->len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5014) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5015) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5017) 	ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5018) 	seq = this_cpu_read(discard_pa_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5019) 	if (!ext4_mb_use_preallocated(ac)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5020) 		ac->ac_op = EXT4_MB_HISTORY_ALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5021) 		ext4_mb_normalize_request(ac, ar);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5023) 		*errp = ext4_mb_pa_alloc(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5024) 		if (*errp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5025) 			goto errout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5026) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5027) 		/* allocate space in core */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5028) 		*errp = ext4_mb_regular_allocator(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5029) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5030) 		 * pa allocated above is added to grp->bb_prealloc_list only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5031) 		 * when we were able to allocate some block i.e. when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5032) 		 * ac->ac_status == AC_STATUS_FOUND.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5033) 		 * And error from above mean ac->ac_status != AC_STATUS_FOUND
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5034) 		 * So we have to free this pa here itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5035) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5036) 		if (*errp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5037) 			ext4_mb_pa_free(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5038) 			ext4_discard_allocated_blocks(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5039) 			goto errout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5040) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5041) 		if (ac->ac_status == AC_STATUS_FOUND &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5042) 			ac->ac_o_ex.fe_len >= ac->ac_f_ex.fe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5043) 			ext4_mb_pa_free(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5044) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5045) 	if (likely(ac->ac_status == AC_STATUS_FOUND)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5046) 		*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5047) 		if (*errp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5048) 			ext4_discard_allocated_blocks(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5049) 			goto errout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5050) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5051) 			block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5052) 			ar->len = ac->ac_b_ex.fe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5053) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5054) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5055) 		if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5056) 			goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5057) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5058) 		 * If block allocation fails then the pa allocated above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5059) 		 * needs to be freed here itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5060) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5061) 		ext4_mb_pa_free(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5062) 		*errp = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5063) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5065) errout:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5066) 	if (*errp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5067) 		ac->ac_b_ex.fe_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5068) 		ar->len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5069) 		ext4_mb_show_ac(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5070) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5071) 	ext4_mb_release_context(ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5072) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5073) 	if (ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5074) 		kmem_cache_free(ext4_ac_cachep, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5075) 	if (inquota && ar->len < inquota)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5076) 		dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5077) 	if (!ar->len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5078) 		if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5079) 			/* release all the reserved blocks if non delalloc */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5080) 			percpu_counter_sub(&sbi->s_dirtyclusters_counter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5081) 						reserv_clstrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5082) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5083) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5084) 	trace_ext4_allocate_blocks(ar, (unsigned long long)block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5085) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5086) 	return block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5087) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5088) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5089) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5090)  * We can merge two free data extents only if the physical blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5091)  * are contiguous, AND the extents were freed by the same transaction,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5092)  * AND the blocks are associated with the same group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5093)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5094) static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5095) 					struct ext4_free_data *entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5096) 					struct ext4_free_data *new_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5097) 					struct rb_root *entry_rb_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5098) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5099) 	if ((entry->efd_tid != new_entry->efd_tid) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5100) 	    (entry->efd_group != new_entry->efd_group))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5101) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5102) 	if (entry->efd_start_cluster + entry->efd_count ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5103) 	    new_entry->efd_start_cluster) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5104) 		new_entry->efd_start_cluster = entry->efd_start_cluster;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5105) 		new_entry->efd_count += entry->efd_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5106) 	} else if (new_entry->efd_start_cluster + new_entry->efd_count ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5107) 		   entry->efd_start_cluster) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5108) 		new_entry->efd_count += entry->efd_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5109) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5110) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5111) 	spin_lock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5112) 	list_del(&entry->efd_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5113) 	spin_unlock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5114) 	rb_erase(&entry->efd_node, entry_rb_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5115) 	kmem_cache_free(ext4_free_data_cachep, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5118) static noinline_for_stack int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5119) ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5120) 		      struct ext4_free_data *new_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5122) 	ext4_group_t group = e4b->bd_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5123) 	ext4_grpblk_t cluster;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5124) 	ext4_grpblk_t clusters = new_entry->efd_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5125) 	struct ext4_free_data *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5126) 	struct ext4_group_info *db = e4b->bd_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5127) 	struct super_block *sb = e4b->bd_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5128) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5129) 	struct rb_node **n = &db->bb_free_root.rb_node, *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5130) 	struct rb_node *parent = NULL, *new_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5132) 	BUG_ON(!ext4_handle_valid(handle));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5133) 	BUG_ON(e4b->bd_bitmap_page == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5134) 	BUG_ON(e4b->bd_buddy_page == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5136) 	new_node = &new_entry->efd_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5137) 	cluster = new_entry->efd_start_cluster;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5139) 	if (!*n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5140) 		/* first free block exent. We need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5141) 		   protect buddy cache from being freed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5142) 		 * otherwise we'll refresh it from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5143) 		 * on-disk bitmap and lose not-yet-available
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5144) 		 * blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5145) 		get_page(e4b->bd_buddy_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5146) 		get_page(e4b->bd_bitmap_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5147) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5148) 	while (*n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5149) 		parent = *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5150) 		entry = rb_entry(parent, struct ext4_free_data, efd_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5151) 		if (cluster < entry->efd_start_cluster)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5152) 			n = &(*n)->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5153) 		else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5154) 			n = &(*n)->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5155) 		else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5156) 			ext4_grp_locked_error(sb, group, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5157) 				ext4_group_first_block_no(sb, group) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5158) 				EXT4_C2B(sbi, cluster),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5159) 				"Block already on to-be-freed list");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5160) 			kmem_cache_free(ext4_free_data_cachep, new_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5161) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5162) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5163) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5165) 	rb_link_node(new_node, parent, n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5166) 	rb_insert_color(new_node, &db->bb_free_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5168) 	/* Now try to see the extent can be merged to left and right */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5169) 	node = rb_prev(new_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5170) 	if (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5171) 		entry = rb_entry(node, struct ext4_free_data, efd_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5172) 		ext4_try_merge_freed_extent(sbi, entry, new_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5173) 					    &(db->bb_free_root));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5174) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5176) 	node = rb_next(new_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5177) 	if (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5178) 		entry = rb_entry(node, struct ext4_free_data, efd_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5179) 		ext4_try_merge_freed_extent(sbi, entry, new_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5180) 					    &(db->bb_free_root));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5181) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5183) 	spin_lock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5184) 	list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5185) 	sbi->s_mb_free_pending += clusters;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5186) 	spin_unlock(&sbi->s_md_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5187) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5190) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5191)  * Simple allocator for Ext4 fast commit replay path. It searches for blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5192)  * linearly starting at the goal block and also excludes the blocks which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5193)  * are going to be in use after fast commit replay.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5194)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5195) static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5196) 				struct ext4_allocation_request *ar, int *errp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5198) 	struct buffer_head *bitmap_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5199) 	struct super_block *sb = ar->inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5200) 	ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5201) 	ext4_grpblk_t blkoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5202) 	ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5203) 	ext4_grpblk_t i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5204) 	ext4_fsblk_t goal, block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5205) 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5207) 	goal = ar->goal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5208) 	if (goal < le32_to_cpu(es->s_first_data_block) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5209) 			goal >= ext4_blocks_count(es))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5210) 		goal = le32_to_cpu(es->s_first_data_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5212) 	ar->len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5213) 	ext4_get_group_no_and_offset(sb, goal, &group, &blkoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5214) 	for (; group < ext4_get_groups_count(sb); group++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5215) 		bitmap_bh = ext4_read_block_bitmap(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5216) 		if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5217) 			*errp = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5218) 			pr_warn("Failed to read block bitmap\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5219) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5220) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5221) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5222) 		ext4_get_group_no_and_offset(sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5223) 			max(ext4_group_first_block_no(sb, group), goal),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5224) 			NULL, &blkoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5225) 		while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5226) 			i = mb_find_next_zero_bit(bitmap_bh->b_data, max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5227) 						blkoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5228) 			if (i >= max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5229) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5230) 			if (ext4_fc_replay_check_excluded(sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5231) 				ext4_group_first_block_no(sb, group) + i)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5232) 				blkoff = i + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5233) 			} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5234) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5235) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5236) 		brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5237) 		if (i < max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5238) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5239) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5241) 	if (group >= ext4_get_groups_count(sb) || i >= max) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5242) 		*errp = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5243) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5244) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5246) 	block = ext4_group_first_block_no(sb, group) + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5247) 	ext4_mb_mark_bb(sb, block, 1, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5248) 	ar->len = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5250) 	return block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5253) static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5254) 					unsigned long count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5255) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5256) 	struct buffer_head *bitmap_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5257) 	struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5258) 	struct ext4_group_desc *gdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5259) 	struct buffer_head *gdp_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5260) 	ext4_group_t group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5261) 	ext4_grpblk_t blkoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5262) 	int already_freed = 0, err, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5264) 	ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5265) 	bitmap_bh = ext4_read_block_bitmap(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5266) 	if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5267) 		err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5268) 		pr_warn("Failed to read block bitmap\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5269) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5270) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5271) 	gdp = ext4_get_group_desc(sb, group, &gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5272) 	if (!gdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5273) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5275) 	for (i = 0; i < count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5276) 		if (!mb_test_bit(blkoff + i, bitmap_bh->b_data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5277) 			already_freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5278) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5279) 	mb_clear_bits(bitmap_bh->b_data, blkoff, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5280) 	err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5281) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5282) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5283) 	ext4_free_group_clusters_set(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5284) 		sb, gdp, ext4_free_group_clusters(sb, gdp) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5285) 		count - already_freed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5286) 	ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5287) 	ext4_group_desc_csum_set(sb, group, gdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5288) 	ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5289) 	sync_dirty_buffer(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5290) 	sync_dirty_buffer(gdp_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5291) 	brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5294) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5295)  * ext4_free_blocks() -- Free given blocks and update quota
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5296)  * @handle:		handle for this transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5297)  * @inode:		inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5298)  * @bh:			optional buffer of the block to be freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5299)  * @block:		starting physical block to be freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5300)  * @count:		number of blocks to be freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5301)  * @flags:		flags used by ext4_free_blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5302)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5303) void ext4_free_blocks(handle_t *handle, struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5304) 		      struct buffer_head *bh, ext4_fsblk_t block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5305) 		      unsigned long count, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5306) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5307) 	struct buffer_head *bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5308) 	struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5309) 	struct ext4_group_desc *gdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5310) 	unsigned int overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5311) 	ext4_grpblk_t bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5312) 	struct buffer_head *gd_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5313) 	ext4_group_t block_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5314) 	struct ext4_sb_info *sbi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5315) 	struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5316) 	unsigned int count_clusters;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5317) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5318) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5320) 	sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5322) 	if (sbi->s_mount_state & EXT4_FC_REPLAY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5323) 		ext4_free_blocks_simple(inode, block, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5324) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5325) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5327) 	might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5328) 	if (bh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5329) 		if (block)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5330) 			BUG_ON(block != bh->b_blocknr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5331) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5332) 			block = bh->b_blocknr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5333) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5335) 	if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5336) 	    !ext4_inode_block_valid(inode, block, count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5337) 		ext4_error(sb, "Freeing blocks not in datazone - "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5338) 			   "block = %llu, count = %lu", block, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5339) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5340) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5342) 	ext4_debug("freeing block %llu\n", block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5343) 	trace_ext4_free_blocks(inode, block, count, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5345) 	if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5346) 		BUG_ON(count > 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5348) 		ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5349) 			    inode, bh, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5350) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5352) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5353) 	 * If the extent to be freed does not begin on a cluster
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5354) 	 * boundary, we need to deal with partial clusters at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5355) 	 * beginning and end of the extent.  Normally we will free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5356) 	 * blocks at the beginning or the end unless we are explicitly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5357) 	 * requested to avoid doing so.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5358) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5359) 	overflow = EXT4_PBLK_COFF(sbi, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5360) 	if (overflow) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5361) 		if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5362) 			overflow = sbi->s_cluster_ratio - overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5363) 			block += overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5364) 			if (count > overflow)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5365) 				count -= overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5366) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5367) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5368) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5369) 			block -= overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5370) 			count += overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5371) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5372) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5373) 	overflow = EXT4_LBLK_COFF(sbi, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5374) 	if (overflow) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5375) 		if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5376) 			if (count > overflow)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5377) 				count -= overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5378) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5379) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5380) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5381) 			count += sbi->s_cluster_ratio - overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5382) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5384) 	if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5385) 		int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5386) 		int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5388) 		for (i = 0; i < count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5389) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5390) 			if (is_metadata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5391) 				bh = sb_find_get_block(inode->i_sb, block + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5392) 			ext4_forget(handle, is_metadata, inode, bh, block + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5393) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5394) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5395) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5396) do_more:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5397) 	overflow = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5398) 	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5399) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5400) 	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5401) 			ext4_get_group_info(sb, block_group))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5402) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5404) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5405) 	 * Check to see if we are freeing blocks across a group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5406) 	 * boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5407) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5408) 	if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5409) 		overflow = EXT4_C2B(sbi, bit) + count -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5410) 			EXT4_BLOCKS_PER_GROUP(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5411) 		count -= overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5412) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5413) 	count_clusters = EXT4_NUM_B2C(sbi, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5414) 	bitmap_bh = ext4_read_block_bitmap(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5415) 	if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5416) 		err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5417) 		bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5418) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5419) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5420) 	gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5421) 	if (!gdp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5422) 		err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5423) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5424) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5426) 	if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5427) 	    in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5428) 	    in_range(block, ext4_inode_table(sb, gdp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5429) 		     sbi->s_itb_per_group) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5430) 	    in_range(block + count - 1, ext4_inode_table(sb, gdp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5431) 		     sbi->s_itb_per_group)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5433) 		ext4_error(sb, "Freeing blocks in system zone - "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5434) 			   "Block = %llu, count = %lu", block, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5435) 		/* err = 0. ext4_std_error should be a no op */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5436) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5437) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5439) 	BUFFER_TRACE(bitmap_bh, "getting write access");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5440) 	err = ext4_journal_get_write_access(handle, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5441) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5442) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5444) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5445) 	 * We are about to modify some metadata.  Call the journal APIs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5446) 	 * to unshare ->b_data if a currently-committing transaction is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5447) 	 * using it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5448) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5449) 	BUFFER_TRACE(gd_bh, "get_write_access");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5450) 	err = ext4_journal_get_write_access(handle, gd_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5451) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5452) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5453) #ifdef AGGRESSIVE_CHECK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5454) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5455) 		int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5456) 		for (i = 0; i < count_clusters; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5457) 			BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5458) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5459) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5460) 	trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5462) 	/* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5463) 	err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5464) 				     GFP_NOFS|__GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5465) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5466) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5468) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5469) 	 * We need to make sure we don't reuse the freed block until after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5470) 	 * transaction is committed. We make an exception if the inode is to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5471) 	 * written in writeback mode since writeback mode has weak data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5472) 	 * consistency guarantees.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5473) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5474) 	if (ext4_handle_valid(handle) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5475) 	    ((flags & EXT4_FREE_BLOCKS_METADATA) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5476) 	     !ext4_should_writeback_data(inode))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5477) 		struct ext4_free_data *new_entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5478) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5479) 		 * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5480) 		 * to fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5481) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5482) 		new_entry = kmem_cache_alloc(ext4_free_data_cachep,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5483) 				GFP_NOFS|__GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5484) 		new_entry->efd_start_cluster = bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5485) 		new_entry->efd_group = block_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5486) 		new_entry->efd_count = count_clusters;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5487) 		new_entry->efd_tid = handle->h_transaction->t_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5489) 		ext4_lock_group(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5490) 		mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5491) 		ext4_mb_free_metadata(handle, &e4b, new_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5492) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5493) 		/* need to update group_info->bb_free and bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5494) 		 * with group lock held. generate_buddy look at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5495) 		 * them with group lock_held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5496) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5497) 		if (test_opt(sb, DISCARD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5498) 			err = ext4_issue_discard(sb, block_group, bit, count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5499) 						 NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5500) 			if (err && err != -EOPNOTSUPP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5501) 				ext4_msg(sb, KERN_WARNING, "discard request in"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5502) 					 " group:%d block:%d count:%lu failed"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5503) 					 " with %d", block_group, bit, count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5504) 					 err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5505) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5506) 			EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5508) 		ext4_lock_group(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5509) 		mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5510) 		mb_free_blocks(inode, &e4b, bit, count_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5511) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5513) 	ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5514) 	ext4_free_group_clusters_set(sb, gdp, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5515) 	ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5516) 	ext4_group_desc_csum_set(sb, block_group, gdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5517) 	ext4_unlock_group(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5519) 	if (sbi->s_log_groups_per_flex) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5520) 		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5521) 		atomic64_add(count_clusters,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5522) 			     &sbi_array_rcu_deref(sbi, s_flex_groups,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5523) 						  flex_group)->free_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5524) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5526) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5527) 	 * on a bigalloc file system, defer the s_freeclusters_counter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5528) 	 * update to the caller (ext4_remove_space and friends) so they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5529) 	 * can determine if a cluster freed here should be rereserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5530) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5531) 	if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5532) 		if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5533) 			dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5534) 		percpu_counter_add(&sbi->s_freeclusters_counter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5535) 				   count_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5536) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5538) 	ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5540) 	/* We dirtied the bitmap block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5541) 	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5542) 	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5544) 	/* And the group descriptor block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5545) 	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5546) 	ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5547) 	if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5548) 		err = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5549) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5550) 	if (overflow && !err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5551) 		block += count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5552) 		count = overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5553) 		put_bh(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5554) 		goto do_more;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5555) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5556) error_return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5557) 	brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5558) 	ext4_std_error(sb, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5559) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5561) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5562) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5563)  * ext4_group_add_blocks() -- Add given blocks to an existing group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5564)  * @handle:			handle to this transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5565)  * @sb:				super block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5566)  * @block:			start physical block to add to the block group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5567)  * @count:			number of blocks to free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5568)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5569)  * This marks the blocks as free in the bitmap and buddy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5570)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5571) int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5572) 			 ext4_fsblk_t block, unsigned long count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5573) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5574) 	struct buffer_head *bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5575) 	struct buffer_head *gd_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5576) 	ext4_group_t block_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5577) 	ext4_grpblk_t bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5578) 	unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5579) 	struct ext4_group_desc *desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5580) 	struct ext4_sb_info *sbi = EXT4_SB(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5581) 	struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5582) 	int err = 0, ret, free_clusters_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5583) 	ext4_grpblk_t clusters_freed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5584) 	ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5585) 	ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5586) 	unsigned long cluster_count = last_cluster - first_cluster + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5588) 	ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5590) 	if (count == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5591) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5593) 	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5594) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5595) 	 * Check to see if we are freeing blocks across a group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5596) 	 * boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5597) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5598) 	if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5599) 		ext4_warning(sb, "too many blocks added to group %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5600) 			     block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5601) 		err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5602) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5603) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5605) 	bitmap_bh = ext4_read_block_bitmap(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5606) 	if (IS_ERR(bitmap_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5607) 		err = PTR_ERR(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5608) 		bitmap_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5609) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5610) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5612) 	desc = ext4_get_group_desc(sb, block_group, &gd_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5613) 	if (!desc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5614) 		err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5615) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5616) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5618) 	if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5619) 	    in_range(ext4_inode_bitmap(sb, desc), block, count) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5620) 	    in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5621) 	    in_range(block + count - 1, ext4_inode_table(sb, desc),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5622) 		     sbi->s_itb_per_group)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5623) 		ext4_error(sb, "Adding blocks in system zones - "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5624) 			   "Block = %llu, count = %lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5625) 			   block, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5626) 		err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5627) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5628) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5630) 	BUFFER_TRACE(bitmap_bh, "getting write access");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5631) 	err = ext4_journal_get_write_access(handle, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5632) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5633) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5635) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5636) 	 * We are about to modify some metadata.  Call the journal APIs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5637) 	 * to unshare ->b_data if a currently-committing transaction is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5638) 	 * using it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5639) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5640) 	BUFFER_TRACE(gd_bh, "get_write_access");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5641) 	err = ext4_journal_get_write_access(handle, gd_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5642) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5643) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5645) 	for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5646) 		BUFFER_TRACE(bitmap_bh, "clear bit");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5647) 		if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5648) 			ext4_error(sb, "bit already cleared for block %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5649) 				   (ext4_fsblk_t)(block + i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5650) 			BUFFER_TRACE(bitmap_bh, "bit already cleared");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5651) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5652) 			clusters_freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5653) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5654) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5656) 	err = ext4_mb_load_buddy(sb, block_group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5657) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5658) 		goto error_return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5660) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5661) 	 * need to update group_info->bb_free and bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5662) 	 * with group lock held. generate_buddy look at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5663) 	 * them with group lock_held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5664) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5665) 	ext4_lock_group(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5666) 	mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5667) 	mb_free_blocks(NULL, &e4b, bit, cluster_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5668) 	free_clusters_count = clusters_freed +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5669) 		ext4_free_group_clusters(sb, desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5670) 	ext4_free_group_clusters_set(sb, desc, free_clusters_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5671) 	ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5672) 	ext4_group_desc_csum_set(sb, block_group, desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5673) 	ext4_unlock_group(sb, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5674) 	percpu_counter_add(&sbi->s_freeclusters_counter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5675) 			   clusters_freed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5676) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5677) 	if (sbi->s_log_groups_per_flex) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5678) 		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5679) 		atomic64_add(clusters_freed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5680) 			     &sbi_array_rcu_deref(sbi, s_flex_groups,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5681) 						  flex_group)->free_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5682) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5684) 	ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5686) 	/* We dirtied the bitmap block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5687) 	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5688) 	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5690) 	/* And the group descriptor block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5691) 	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5692) 	ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5693) 	if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5694) 		err = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5696) error_return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5697) 	brelse(bitmap_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5698) 	ext4_std_error(sb, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5699) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5702) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5703)  * ext4_trim_extent -- function to TRIM one single free extent in the group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5704)  * @sb:		super block for the file system
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5705)  * @start:	starting block of the free extent in the alloc. group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5706)  * @count:	number of blocks to TRIM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5707)  * @group:	alloc. group we are working with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5708)  * @e4b:	ext4 buddy for the group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5709)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5710)  * Trim "count" blocks starting at "start" in the "group". To assure that no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5711)  * one will allocate those blocks, mark it as used in buddy bitmap. This must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5712)  * be called with under the group lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5713)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5714) static int ext4_trim_extent(struct super_block *sb, int start, int count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5715) 			     ext4_group_t group, struct ext4_buddy *e4b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5716) __releases(bitlock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5717) __acquires(bitlock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5718) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5719) 	struct ext4_free_extent ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5720) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5722) 	trace_ext4_trim_extent(sb, group, start, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5724) 	assert_spin_locked(ext4_group_lock_ptr(sb, group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5726) 	ex.fe_start = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5727) 	ex.fe_group = group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5728) 	ex.fe_len = count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5730) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5731) 	 * Mark blocks used, so no one can reuse them while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5732) 	 * being trimmed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5733) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5734) 	mb_mark_used(e4b, &ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5735) 	ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5736) 	ret = ext4_issue_discard(sb, group, start, count, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5737) 	ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5738) 	mb_free_blocks(NULL, e4b, start, ex.fe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5739) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5742) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5743)  * ext4_trim_all_free -- function to trim all free space in alloc. group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5744)  * @sb:			super block for file system
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5745)  * @group:		group to be trimmed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5746)  * @start:		first group block to examine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5747)  * @max:		last group block to examine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5748)  * @minblocks:		minimum extent block count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5749)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5750)  * ext4_trim_all_free walks through group's buddy bitmap searching for free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5751)  * extents. When the free block is found, ext4_trim_extent is called to TRIM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5752)  * the extent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5753)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5754)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5755)  * ext4_trim_all_free walks through group's block bitmap searching for free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5756)  * extents. When the free extent is found, mark it as used in group buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5757)  * bitmap. Then issue a TRIM command on this extent and free the extent in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5758)  * the group buddy bitmap. This is done until whole group is scanned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5759)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5760) static ext4_grpblk_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5761) ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5762) 		   ext4_grpblk_t start, ext4_grpblk_t max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5763) 		   ext4_grpblk_t minblocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5765) 	void *bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5766) 	ext4_grpblk_t next, count = 0, free_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5767) 	struct ext4_buddy e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5768) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5770) 	trace_ext4_trim_all_free(sb, group, start, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5771) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5772) 	ret = ext4_mb_load_buddy(sb, group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5773) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5774) 		ext4_warning(sb, "Error %d loading buddy information for %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5775) 			     ret, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5776) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5777) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5778) 	bitmap = e4b.bd_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5780) 	ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5781) 	if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5782) 	    minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5783) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5784) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5785) 	start = (e4b.bd_info->bb_first_free > start) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5786) 		e4b.bd_info->bb_first_free : start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5788) 	while (start <= max) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5789) 		start = mb_find_next_zero_bit(bitmap, max + 1, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5790) 		if (start > max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5791) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5792) 		next = mb_find_next_bit(bitmap, max + 1, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5794) 		if ((next - start) >= minblocks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5795) 			ret = ext4_trim_extent(sb, start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5796) 					       next - start, group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5797) 			if (ret && ret != -EOPNOTSUPP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5798) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5799) 			ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5800) 			count += next - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5801) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5802) 		free_count += next - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5803) 		start = next + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5804) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5805) 		if (fatal_signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5806) 			count = -ERESTARTSYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5807) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5808) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5809) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5810) 		if (need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5811) 			ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5812) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5813) 			ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5814) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5816) 		if ((e4b.bd_info->bb_free - free_count) < minblocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5817) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5818) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5820) 	if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5821) 		ret = count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5822) 		EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5823) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5824) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5825) 	ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5826) 	ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5828) 	ext4_debug("trimmed %d blocks in the group %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5829) 		count, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5830) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5831) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5833) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5834) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5835)  * ext4_trim_fs() -- trim ioctl handle function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5836)  * @sb:			superblock for filesystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5837)  * @range:		fstrim_range structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5838)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5839)  * start:	First Byte to trim
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5840)  * len:		number of Bytes to trim from start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5841)  * minlen:	minimum extent length in Bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5842)  * ext4_trim_fs goes through all allocation groups containing Bytes from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5843)  * start to start+len. For each such a group ext4_trim_all_free function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5844)  * is invoked to trim all free space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5845)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5846) int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5847) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5848) 	struct request_queue *q = bdev_get_queue(sb->s_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5849) 	struct ext4_group_info *grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5850) 	ext4_group_t group, first_group, last_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5851) 	ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5852) 	uint64_t start, end, minlen, trimmed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5853) 	ext4_fsblk_t first_data_blk =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5854) 			le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5855) 	ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5856) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5858) 	start = range->start >> sb->s_blocksize_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5859) 	end = start + (range->len >> sb->s_blocksize_bits) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5860) 	minlen = EXT4_NUM_B2C(EXT4_SB(sb),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5861) 			      range->minlen >> sb->s_blocksize_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5863) 	if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5864) 	    start >= max_blks ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5865) 	    range->len < sb->s_blocksize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5866) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5867) 	/* No point to try to trim less than discard granularity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5868) 	if (range->minlen < q->limits.discard_granularity) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5869) 		minlen = EXT4_NUM_B2C(EXT4_SB(sb),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5870) 			q->limits.discard_granularity >> sb->s_blocksize_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5871) 		if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5872) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5873) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5874) 	if (end >= max_blks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5875) 		end = max_blks - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5876) 	if (end <= first_data_blk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5877) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5878) 	if (start < first_data_blk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5879) 		start = first_data_blk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5881) 	/* Determine first and last group to examine based on start and end */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5882) 	ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5883) 				     &first_group, &first_cluster);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5884) 	ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5885) 				     &last_group, &last_cluster);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5887) 	/* end now represents the last cluster to discard in this group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5888) 	end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5890) 	for (group = first_group; group <= last_group; group++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5891) 		grp = ext4_get_group_info(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5892) 		/* We only do this if the grp has never been initialized */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5893) 		if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5894) 			ret = ext4_mb_init_group(sb, group, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5895) 			if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5896) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5897) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5899) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5900) 		 * For all the groups except the last one, last cluster will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5901) 		 * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5902) 		 * change it for the last group, note that last_cluster is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5903) 		 * already computed earlier by ext4_get_group_no_and_offset()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5904) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5905) 		if (group == last_group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5906) 			end = last_cluster;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5907) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5908) 		if (grp->bb_free >= minlen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5909) 			cnt = ext4_trim_all_free(sb, group, first_cluster,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5910) 						end, minlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5911) 			if (cnt < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5912) 				ret = cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5913) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5914) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5915) 			trimmed += cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5916) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5918) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5919) 		 * For every group except the first one, we are sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5920) 		 * that the first cluster to discard will be cluster #0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5921) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5922) 		first_cluster = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5923) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5924) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5925) 	if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5926) 		atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5928) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5929) 	range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5930) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5931) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5932) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5933) /* Iterate all the free extents in the group. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5934) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5935) ext4_mballoc_query_range(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5936) 	struct super_block		*sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5937) 	ext4_group_t			group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5938) 	ext4_grpblk_t			start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5939) 	ext4_grpblk_t			end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5940) 	ext4_mballoc_query_range_fn	formatter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5941) 	void				*priv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5942) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5943) 	void				*bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5944) 	ext4_grpblk_t			next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5945) 	struct ext4_buddy		e4b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5946) 	int				error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5948) 	error = ext4_mb_load_buddy(sb, group, &e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5949) 	if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5950) 		return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5951) 	bitmap = e4b.bd_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5952) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5953) 	ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5955) 	start = (e4b.bd_info->bb_first_free > start) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5956) 		e4b.bd_info->bb_first_free : start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5957) 	if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5958) 		end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5960) 	while (start <= end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5961) 		start = mb_find_next_zero_bit(bitmap, end + 1, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5962) 		if (start > end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5963) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5964) 		next = mb_find_next_bit(bitmap, end + 1, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5966) 		ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5967) 		error = formatter(sb, group, start, next - start, priv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5968) 		if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5969) 			goto out_unload;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5970) 		ext4_lock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5971) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5972) 		start = next + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5973) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5975) 	ext4_unlock_group(sb, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5976) out_unload:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5977) 	ext4_mb_unload_buddy(&e4b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5979) 	return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5980) }