^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * fs/ext4/mballoc.h
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Written by: Alex Tomas <alex@clusterfs.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #ifndef _EXT4_MBALLOC_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #define _EXT4_MBALLOC_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/time.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/namei.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/quotaops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/buffer_head.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/proc_fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/seq_file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/mutex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include "ext4_jbd2.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include "ext4.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * mb_debug() dynamic printk msgs could be used to debug mballoc code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #ifdef CONFIG_EXT4_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #define mb_debug(sb, fmt, ...) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) pr_debug("[%s/%d] EXT4-fs (%s): (%s, %d): %s: " fmt, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) current->comm, task_pid_nr(current), sb->s_id, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) __FILE__, __LINE__, __func__, ##__VA_ARGS__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #define mb_debug(sb, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * How long mballoc can look for a best extent (in found extents)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #define MB_DEFAULT_MAX_TO_SCAN 200
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * How long mballoc must look for a best extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #define MB_DEFAULT_MIN_TO_SCAN 10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * with 'ext4_mb_stats' allocator will collect stats that will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * shown at umount. The collecting costs though!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #define MB_DEFAULT_STATS 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * by the stream allocator, which purpose is to pack requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * as close each to other as possible to produce smooth I/O traffic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * We use locality group prealloc space for stream request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * We can tune the same via /proc/fs/ext4/<parition>/stream_req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) #define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * for which requests use 2^N search using buddies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #define MB_DEFAULT_ORDER2_REQS 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * default group prealloc size 512 blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #define MB_DEFAULT_GROUP_PREALLOC 512
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * maximum length of inode prealloc list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #define MB_DEFAULT_MAX_INODE_PREALLOC 512
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) struct ext4_free_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) /* this links the free block information from sb_info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) struct list_head efd_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) /* this links the free block information from group_info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) struct rb_node efd_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) /* group which free block extent belongs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) ext4_group_t efd_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) /* free block extent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) ext4_grpblk_t efd_start_cluster;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) ext4_grpblk_t efd_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) /* transaction which freed this extent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) tid_t efd_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) struct ext4_prealloc_space {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) struct list_head pa_inode_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) struct list_head pa_group_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) struct list_head pa_tmp_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) struct rcu_head pa_rcu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) } u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) spinlock_t pa_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) atomic_t pa_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) unsigned pa_deleted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) ext4_fsblk_t pa_pstart; /* phys. block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) ext4_lblk_t pa_lstart; /* log. block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) ext4_grpblk_t pa_len; /* len of preallocated chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) ext4_grpblk_t pa_free; /* how many blocks are free */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) unsigned short pa_type; /* pa type. inode or group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) spinlock_t *pa_obj_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) struct inode *pa_inode; /* hack, for history only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) MB_INODE_PA = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) MB_GROUP_PA = 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) struct ext4_free_extent {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) ext4_lblk_t fe_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) ext4_grpblk_t fe_start; /* In cluster units */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) ext4_group_t fe_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) ext4_grpblk_t fe_len; /* In cluster units */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * Locality group:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * we try to group all related changes together
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * so that writeback can flush/allocate them together as well
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * (512). We store prealloc space into the hash based on the pa_free blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * order value.ie, fls(pa_free)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) #define PREALLOC_TB_SIZE 10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) struct ext4_locality_group {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) /* for allocator */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) /* to serialize allocates */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) struct mutex lg_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) /* list of preallocations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) struct list_head lg_prealloc_list[PREALLOC_TB_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) spinlock_t lg_prealloc_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) struct ext4_allocation_context {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) struct inode *ac_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) struct super_block *ac_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) /* original request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) struct ext4_free_extent ac_o_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /* goal request (normalized ac_o_ex) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) struct ext4_free_extent ac_g_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) /* the best found extent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) struct ext4_free_extent ac_b_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) /* copy of the best found extent taken before preallocation efforts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) struct ext4_free_extent ac_f_ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) __u16 ac_groups_scanned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) __u16 ac_found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) __u16 ac_tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) __u16 ac_buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) __u16 ac_flags; /* allocation hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) __u8 ac_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) __u8 ac_criteria;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) __u8 ac_2order; /* if request is to allocate 2^N blocks and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * N > 0, the field stores N, otherwise 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) __u8 ac_op; /* operation, for history only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) struct page *ac_bitmap_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) struct page *ac_buddy_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) struct ext4_prealloc_space *ac_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) struct ext4_locality_group *ac_lg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) #define AC_STATUS_CONTINUE 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) #define AC_STATUS_FOUND 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) #define AC_STATUS_BREAK 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) struct ext4_buddy {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) struct page *bd_buddy_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) void *bd_buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) struct page *bd_bitmap_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) void *bd_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) struct ext4_group_info *bd_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) struct super_block *bd_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) __u16 bd_blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) ext4_group_t bd_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) struct ext4_free_extent *fex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) return ext4_group_first_block_no(sb, fex->fe_group) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) (fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) typedef int (*ext4_mballoc_query_range_fn)(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) ext4_group_t agno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) ext4_grpblk_t start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) ext4_grpblk_t len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) void *priv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) ext4_mballoc_query_range(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) ext4_group_t agno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) ext4_grpblk_t start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) ext4_grpblk_t end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) ext4_mballoc_query_range_fn formatter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) void *priv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) #endif