^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Zoned block device handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (c) 2015, Hannes Reinecke
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright (c) 2015, SUSE Linux GmbH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Copyright (c) 2016, Damien Le Moal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Copyright (c) 2016, Western Digital
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/rbtree.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/blk-mq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include "blk.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) static const char *const zone_cond_name[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) ZONE_COND_NAME(NOT_WP),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) ZONE_COND_NAME(EMPTY),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) ZONE_COND_NAME(IMP_OPEN),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) ZONE_COND_NAME(EXP_OPEN),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) ZONE_COND_NAME(CLOSED),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) ZONE_COND_NAME(READONLY),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) ZONE_COND_NAME(FULL),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) ZONE_COND_NAME(OFFLINE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #undef ZONE_COND_NAME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * @zone_cond: BLK_ZONE_COND_XXX.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * into string format. Useful in the debugging and tracing zone conditions. For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static const char *zone_cond_str = "UNKNOWN";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) zone_cond_str = zone_cond_name[zone_cond];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) return zone_cond_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) EXPORT_SYMBOL_GPL(blk_zone_cond_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) static inline sector_t blk_zone_start(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) sector_t sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) return sector & ~zone_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * Return true if a request is a write requests that needs zone write locking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) bool blk_req_needs_zone_write_lock(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) if (!rq->q->seq_zones_wlock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) if (blk_rq_is_passthrough(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) switch (req_op(rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) case REQ_OP_WRITE_ZEROES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) case REQ_OP_WRITE_SAME:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) case REQ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) return blk_rq_zone_is_seq(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) bool blk_req_zone_write_trylock(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) unsigned int zno = blk_rq_zone_no(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) if (test_and_set_bit(zno, rq->q->seq_zones_wlock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) EXPORT_SYMBOL_GPL(blk_req_zone_write_trylock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) void __blk_req_zone_write_lock(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) rq->q->seq_zones_wlock)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) void __blk_req_zone_write_unlock(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) if (rq->q->seq_zones_wlock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) rq->q->seq_zones_wlock));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * blkdev_nr_zones - Get number of zones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * @disk: Target gendisk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * Return the total number of zones of a zoned block device. For a block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * device without zone capabilities, the number of zones is always 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) unsigned int blkdev_nr_zones(struct gendisk *disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) sector_t zone_sectors = blk_queue_zone_sectors(disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) if (!blk_queue_is_zoned(disk->queue))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) return (get_capacity(disk) + zone_sectors - 1) >> ilog2(zone_sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) EXPORT_SYMBOL_GPL(blkdev_nr_zones);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * blkdev_report_zones - Get zones information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * @bdev: Target block device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * @sector: Sector from which to report zones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * @nr_zones: Maximum number of zones to report
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * @cb: Callback function called for each reported zone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * @data: Private data for the callback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * Description:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * Get zone information starting from the zone containing @sector for at most
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * @nr_zones, and call @cb for each zone reported by the device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * To report all zones in a device starting from @sector, the BLK_ALL_ZONES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * constant can be passed to @nr_zones.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * Returns the number of zones reported by the device, or a negative errno
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) * value in case of failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * Note: The caller must use memalloc_noXX_save/restore() calls to control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * memory allocations done within this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) int blkdev_report_zones(struct block_device *bdev, sector_t sector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) unsigned int nr_zones, report_zones_cb cb, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) struct gendisk *disk = bdev->bd_disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) sector_t capacity = get_capacity(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) if (!blk_queue_is_zoned(bdev_get_queue(bdev)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) WARN_ON_ONCE(!disk->fops->report_zones))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) if (!nr_zones || sector >= capacity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) return disk->fops->report_zones(disk, sector, nr_zones, cb, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) EXPORT_SYMBOL_GPL(blkdev_report_zones);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) sector_t sector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) sector_t nr_sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * of the applicable zone range is the entire disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) return !sector && nr_sectors == get_capacity(bdev->bd_disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * blkdev_zone_mgmt - Execute a zone management operation on a range of zones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * @bdev: Target block device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * @op: Operation to be performed on the zones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) * @sector: Start sector of the first zone to operate on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * @nr_sectors: Number of sectors, should be at least the length of one zone and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) * must be zone size aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) * @gfp_mask: Memory allocation flags (for bio_alloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * Description:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * Perform the specified operation on the range of zones specified by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * is valid, but the specified range should not contain conventional zones.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * The operation to execute on each zone can be a zone reset, open, close
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * or finish request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) sector_t sector, sector_t nr_sectors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) struct request_queue *q = bdev_get_queue(bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) sector_t zone_sectors = blk_queue_zone_sectors(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) sector_t capacity = get_capacity(bdev->bd_disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) sector_t end_sector = sector + nr_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) struct bio *bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) if (!blk_queue_is_zoned(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (bdev_read_only(bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) if (!op_is_zone_mgmt(op))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) if (end_sector <= sector || end_sector > capacity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) /* Out of range */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) /* Check alignment (handle eventual smaller last zone) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) if (sector & (zone_sectors - 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) while (sector < end_sector) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) bio = blk_next_bio(bio, 0, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) bio_set_dev(bio, bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * Special case for the zone reset operation that reset all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * zones, this is useful for applications like mkfs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) if (op == REQ_OP_ZONE_RESET &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) bio->bi_opf = op | REQ_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) bio->bi_iter.bi_sector = sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) sector += zone_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) /* This may take a while, so be nice to others */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) ret = submit_bio_wait(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) EXPORT_SYMBOL_GPL(blkdev_zone_mgmt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) struct zone_report_args {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) struct blk_zone __user *zones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) struct zone_report_args *args = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) * BLKREPORTZONE ioctl processing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * Called from blkdev_ioctl.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) unsigned int cmd, unsigned long arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) void __user *argp = (void __user *)arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) struct zone_report_args args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) struct blk_zone_report rep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) if (!argp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) q = bdev_get_queue(bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) if (!q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) if (!blk_queue_is_zoned(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) return -ENOTTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) if (!rep.nr_zones)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) args.zones = argp + sizeof(struct blk_zone_report);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) blkdev_copy_zone_to_user, &args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) rep.nr_zones = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) rep.flags = BLK_ZONE_REP_CAPACITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) static int blkdev_truncate_zone_range(struct block_device *bdev, fmode_t mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) const struct blk_zone_range *zrange)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) loff_t start, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) if (zrange->sector + zrange->nr_sectors <= zrange->sector ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) /* Out of range */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) start = zrange->sector << SECTOR_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) return truncate_bdev_range(bdev, mode, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * Called from blkdev_ioctl.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) unsigned int cmd, unsigned long arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) void __user *argp = (void __user *)arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) struct blk_zone_range zrange;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) enum req_opf op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) if (!argp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) q = bdev_get_queue(bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) if (!q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) if (!blk_queue_is_zoned(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) return -ENOTTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) if (!(mode & FMODE_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) switch (cmd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) case BLKRESETZONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) op = REQ_OP_ZONE_RESET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) /* Invalidate the page cache, including dirty pages. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) case BLKOPENZONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) op = REQ_OP_ZONE_OPEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) case BLKCLOSEZONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) op = REQ_OP_ZONE_CLOSE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) case BLKFINISHZONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) op = REQ_OP_ZONE_FINISH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) return -ENOTTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) * Invalidate the page cache again for zone reset: writes can only be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) * direct for zoned devices so concurrent writes would not add any page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) * to the page cache after/during reset. The page cache may be filled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) * again due to concurrent reads though and dropping the pages for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) * these is fine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) if (!ret && cmd == BLKRESETZONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) static inline unsigned long *blk_alloc_zone_bitmap(int node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) unsigned int nr_zones)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) GFP_NOIO, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) void blk_queue_free_zone_bitmaps(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) kfree(q->conv_zones_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) q->conv_zones_bitmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) kfree(q->seq_zones_wlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) q->seq_zones_wlock = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) struct blk_revalidate_zone_args {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) struct gendisk *disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) unsigned long *conv_zones_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) unsigned long *seq_zones_wlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) unsigned int nr_zones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) sector_t zone_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) sector_t sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) * Helper function to check the validity of zones of a zoned block device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) struct blk_revalidate_zone_args *args = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) struct gendisk *disk = args->disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) struct request_queue *q = disk->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) sector_t capacity = get_capacity(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) * All zones must have the same size, with the exception on an eventual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * smaller last zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) if (zone->start == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) if (zone->len == 0 || !is_power_of_2(zone->len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) disk->disk_name, zone->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) args->zone_sectors = zone->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) } else if (zone->start + args->zone_sectors < capacity) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) if (zone->len != args->zone_sectors) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) pr_warn("%s: Invalid zoned device with non constant zone size\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) disk->disk_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) if (zone->len > args->zone_sectors) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) pr_warn("%s: Invalid zoned device with larger last zone size\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) disk->disk_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) /* Check for holes in the zone report */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) if (zone->start != args->sector) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) pr_warn("%s: Zone gap at sectors %llu..%llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) disk->disk_name, args->sector, zone->start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) /* Check zone type */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) switch (zone->type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) case BLK_ZONE_TYPE_CONVENTIONAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) if (!args->conv_zones_bitmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) args->conv_zones_bitmap =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) blk_alloc_zone_bitmap(q->node, args->nr_zones);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) if (!args->conv_zones_bitmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) set_bit(idx, args->conv_zones_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) case BLK_ZONE_TYPE_SEQWRITE_REQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) case BLK_ZONE_TYPE_SEQWRITE_PREF:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) if (!args->seq_zones_wlock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) args->seq_zones_wlock =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) blk_alloc_zone_bitmap(q->node, args->nr_zones);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) if (!args->seq_zones_wlock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) disk->disk_name, (int)zone->type, zone->start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) args->sector += zone->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) * @disk: Target disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) * @update_driver_data: Callback to update driver data on the frozen disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) * Helper function for low-level device drivers to (re) allocate and initialize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) * a disk request queue zone bitmaps. This functions should normally be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) * within the disk ->revalidate method for blk-mq based drivers. For BIO based
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) * drivers only q->nr_zones needs to be updated so that the sysfs exposed value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) * is correct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * If the @update_driver_data callback function is not NULL, the callback is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * executed with the device request queue frozen after all zones have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * checked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) int blk_revalidate_disk_zones(struct gendisk *disk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) void (*update_driver_data)(struct gendisk *disk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) struct request_queue *q = disk->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) struct blk_revalidate_zone_args args = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) .disk = disk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) unsigned int noio_flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) if (WARN_ON_ONCE(!queue_is_mq(q)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) if (!get_capacity(disk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * Ensure that all memory allocations in this context are done as if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) * GFP_NOIO was specified.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) noio_flag = memalloc_noio_save();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) ret = disk->fops->report_zones(disk, 0, UINT_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) blk_revalidate_zone_cb, &args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) memalloc_noio_restore(noio_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) * Install the new bitmaps and update nr_zones only once the queue is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) * stopped and all I/Os are completed (i.e. a scheduler is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) * referencing the bitmaps).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) blk_mq_freeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) if (ret >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) blk_queue_chunk_sectors(q, args.zone_sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) q->nr_zones = args.nr_zones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) swap(q->seq_zones_wlock, args.seq_zones_wlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) if (update_driver_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) update_driver_data(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) blk_queue_free_zone_bitmaps(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) blk_mq_unfreeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) kfree(args.seq_zones_wlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) kfree(args.conv_zones_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);