^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * Copyright (C) 2014 Facebook. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * This file is released under the GPL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/device-mapper.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/bio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/dax.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/freezer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #define DM_MSG_PREFIX "log-writes"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * This target will sequentially log all writes to the target device onto the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * log device. This is helpful for replaying writes to check for fs consistency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * at all times. This target provides a mechanism to mark specific events to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * check data at a later time. So for example you would:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * write data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * fsync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * dmsetup message /dev/whatever mark mymark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * unmount /mnt/test
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * Then replay the log up to mymark and check the contents of the replay to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * verify it matches what was written.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * We log writes only after they have been flushed, this makes the log describe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * close to the order in which the data hits the actual disk, not its cache. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * for example the following sequence (W means write, C means complete)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * Wa,Wb,Wc,Cc,Ca,FLUSH,FUAd,Cb,CFLUSH,CFUAd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * Would result in the log looking like this:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * c,a,b,flush,fuad,<other writes>,<next flush>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * This is meant to help expose problems where file systems do not properly wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * on data being written before invoking a FLUSH. FUA bypasses cache so once it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * completes it is added to the log as it should be on disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * We treat DISCARDs as if they don't bypass cache so that they are logged in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * order of completion along with the normal writes. If we didn't do it this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * way we would process all the discards first and then write all the data, when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * in fact we want to do the data and the discard in the order that they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * completed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #define LOG_FLUSH_FLAG (1 << 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #define LOG_FUA_FLAG (1 << 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #define LOG_DISCARD_FLAG (1 << 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #define LOG_MARK_FLAG (1 << 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #define LOG_METADATA_FLAG (1 << 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #define WRITE_LOG_VERSION 1ULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #define WRITE_LOG_MAGIC 0x6a736677736872ULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #define WRITE_LOG_SUPER_SECTOR 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * The disk format for this is braindead simple.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * At byte 0 we have our super, followed by the following sequence for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * nr_entries:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * [ 1 sector ][ entry->nr_sectors ]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * [log_write_entry][ data written ]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * The log_write_entry takes up a full sector so we can have arbitrary length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * marks and it leaves us room for extra content in the future.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * Basic info about the log for userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) struct log_write_super {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) __le64 magic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) __le64 version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) __le64 nr_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) __le32 sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * sector - the sector we wrote.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * nr_sectors - the number of sectors we wrote.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * flags - flags for this log entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * data_len - the size of the data in this log entry, this is for private log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * entry stuff, the MARK data provided by userspace for example.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) struct log_write_entry {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) __le64 sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) __le64 nr_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) __le64 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) __le64 data_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) struct log_writes_c {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) struct dm_dev *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) struct dm_dev *logdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) u64 logged_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) u32 sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) u32 sectorshift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) atomic_t io_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) atomic_t pending_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) sector_t next_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) sector_t end_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) bool logging_enabled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) bool device_supports_discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) spinlock_t blocks_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) struct list_head unflushed_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) struct list_head logging_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) wait_queue_head_t wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) struct task_struct *log_kthread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) struct completion super_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) struct pending_block {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) int vec_cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) u64 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) sector_t sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) sector_t nr_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) char *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) u32 datalen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) struct bio_vec vecs[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) struct per_bio_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) struct pending_block *block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) static inline sector_t bio_to_dev_sectors(struct log_writes_c *lc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) sector_t sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) return sectors >> (lc->sectorshift - SECTOR_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) static inline sector_t dev_to_bio_sectors(struct log_writes_c *lc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) sector_t sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) return sectors << (lc->sectorshift - SECTOR_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) static void put_pending_block(struct log_writes_c *lc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (atomic_dec_and_test(&lc->pending_blocks)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) if (waitqueue_active(&lc->wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) wake_up(&lc->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) static void put_io_block(struct log_writes_c *lc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) if (atomic_dec_and_test(&lc->io_blocks)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) if (waitqueue_active(&lc->wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) wake_up(&lc->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) static void log_end_io(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) struct log_writes_c *lc = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) if (bio->bi_status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) DMERR("Error writing log block, error=%d", bio->bi_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) spin_lock_irqsave(&lc->blocks_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) lc->logging_enabled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) spin_unlock_irqrestore(&lc->blocks_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) bio_free_pages(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) put_io_block(lc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) static void log_end_super(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) struct log_writes_c *lc = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) complete(&lc->super_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) log_end_io(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * Meant to be called if there is an error, it will free all the pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * associated with the block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) static void free_pending_block(struct log_writes_c *lc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) struct pending_block *block)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) for (i = 0; i < block->vec_cnt; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) if (block->vecs[i].bv_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) __free_page(block->vecs[i].bv_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) kfree(block->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) kfree(block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) put_pending_block(lc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) static int write_metadata(struct log_writes_c *lc, void *entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) size_t entrylen, void *data, size_t datalen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) sector_t sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) void *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) size_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) bio = bio_alloc(GFP_KERNEL, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (!bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) DMERR("Couldn't alloc log bio");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) bio->bi_iter.bi_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) bio->bi_iter.bi_sector = sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) bio_set_dev(bio, lc->logdev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) log_end_super : log_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) bio->bi_private = lc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) page = alloc_page(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) DMERR("Couldn't alloc log page");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) ptr = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) memcpy(ptr, entry, entrylen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) if (datalen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) memcpy(ptr + entrylen, data, datalen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) memset(ptr + entrylen + datalen, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) lc->sectorsize - entrylen - datalen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) kunmap_atomic(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) ret = bio_add_page(bio, page, lc->sectorsize, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) if (ret != lc->sectorsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) DMERR("Couldn't add page to the log block");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) goto error_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) error_bio:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) __free_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) put_io_block(lc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) static int write_inline_data(struct log_writes_c *lc, void *entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) size_t entrylen, void *data, size_t datalen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) sector_t sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) int num_pages, bio_pages, pg_datalen, pg_sectorlen, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) size_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) void *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) while (datalen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) num_pages = ALIGN(datalen, PAGE_SIZE) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) bio_pages = min(num_pages, BIO_MAX_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) atomic_inc(&lc->io_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) bio = bio_alloc(GFP_KERNEL, bio_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) if (!bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) DMERR("Couldn't alloc inline data bio");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) bio->bi_iter.bi_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) bio->bi_iter.bi_sector = sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) bio_set_dev(bio, lc->logdev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) bio->bi_end_io = log_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) bio->bi_private = lc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) for (i = 0; i < bio_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) pg_datalen = min_t(int, datalen, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) pg_sectorlen = ALIGN(pg_datalen, lc->sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) page = alloc_page(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) DMERR("Couldn't alloc inline data page");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) goto error_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) ptr = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) memcpy(ptr, data, pg_datalen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) if (pg_sectorlen > pg_datalen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) memset(ptr + pg_datalen, 0, pg_sectorlen - pg_datalen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) kunmap_atomic(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) ret = bio_add_page(bio, page, pg_sectorlen, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) if (ret != pg_sectorlen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) DMERR("Couldn't add page of inline data");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) __free_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) goto error_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) datalen -= pg_datalen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) data += pg_datalen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) sector += bio_pages * PAGE_SECTORS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) error_bio:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) bio_free_pages(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) put_io_block(lc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) static int log_one_block(struct log_writes_c *lc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) struct pending_block *block, sector_t sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) struct log_write_entry entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) size_t metadatalen, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) entry.sector = cpu_to_le64(block->sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) entry.nr_sectors = cpu_to_le64(block->nr_sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) entry.flags = cpu_to_le64(block->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) entry.data_len = cpu_to_le64(block->datalen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) metadatalen = (block->flags & LOG_MARK_FLAG) ? block->datalen : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) if (write_metadata(lc, &entry, sizeof(entry), block->data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) metadatalen, sector)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) free_pending_block(lc, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) sector += dev_to_bio_sectors(lc, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) if (block->datalen && metadatalen == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) if (write_inline_data(lc, &entry, sizeof(entry), block->data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) block->datalen, sector)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) free_pending_block(lc, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) /* we don't support both inline data & bio data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) if (!block->vec_cnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) atomic_inc(&lc->io_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) if (!bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) DMERR("Couldn't alloc log bio");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) bio->bi_iter.bi_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) bio->bi_iter.bi_sector = sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) bio_set_dev(bio, lc->logdev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) bio->bi_end_io = log_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) bio->bi_private = lc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) for (i = 0; i < block->vec_cnt; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) * The page offset is always 0 because we allocate a new page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) * for every bvec in the original bio for simplicity sake.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) ret = bio_add_page(bio, block->vecs[i].bv_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) block->vecs[i].bv_len, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) if (ret != block->vecs[i].bv_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) atomic_inc(&lc->io_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) if (!bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) DMERR("Couldn't alloc log bio");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) bio->bi_iter.bi_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) bio->bi_iter.bi_sector = sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) bio_set_dev(bio, lc->logdev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) bio->bi_end_io = log_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) bio->bi_private = lc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) ret = bio_add_page(bio, block->vecs[i].bv_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) block->vecs[i].bv_len, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) if (ret != block->vecs[i].bv_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) DMERR("Couldn't add page on new bio?");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) sector += block->vecs[i].bv_len >> SECTOR_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) kfree(block->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) kfree(block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) put_pending_block(lc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) free_pending_block(lc, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) put_io_block(lc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) static int log_super(struct log_writes_c *lc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) struct log_write_super super;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) super.magic = cpu_to_le64(WRITE_LOG_MAGIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) super.version = cpu_to_le64(WRITE_LOG_VERSION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) super.nr_entries = cpu_to_le64(lc->logged_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) super.sectorsize = cpu_to_le32(lc->sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) if (write_metadata(lc, &super, sizeof(super), NULL, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) WRITE_LOG_SUPER_SECTOR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) DMERR("Couldn't write super");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * Super sector should be writen in-order, otherwise the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) * nr_entries could be rewritten incorrectly by an old bio.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) wait_for_completion_io(&lc->super_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) static inline sector_t logdev_last_sector(struct log_writes_c *lc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) return i_size_read(lc->logdev->bdev->bd_inode) >> SECTOR_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) static int log_writes_kthread(void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) struct log_writes_c *lc = (struct log_writes_c *)arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) sector_t sector = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) while (!kthread_should_stop()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) bool super = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) bool logging_enabled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) struct pending_block *block = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) spin_lock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) if (!list_empty(&lc->logging_blocks)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) block = list_first_entry(&lc->logging_blocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) struct pending_block, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) list_del_init(&block->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) if (!lc->logging_enabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) sector = lc->next_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) if (!(block->flags & LOG_DISCARD_FLAG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) lc->next_sector += dev_to_bio_sectors(lc, block->nr_sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) lc->next_sector += dev_to_bio_sectors(lc, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) * Apparently the size of the device may not be known
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) * right away, so handle this properly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) if (!lc->end_sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) lc->end_sector = logdev_last_sector(lc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) if (lc->end_sector &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) lc->next_sector >= lc->end_sector) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) DMERR("Ran out of space on the logdev");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) lc->logging_enabled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) lc->logged_entries++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) atomic_inc(&lc->io_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) super = (block->flags & (LOG_FUA_FLAG | LOG_MARK_FLAG));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) if (super)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) atomic_inc(&lc->io_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) logging_enabled = lc->logging_enabled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) spin_unlock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) if (block) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) if (logging_enabled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) ret = log_one_block(lc, block, sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) if (!ret && super)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) ret = log_super(lc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) spin_lock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) lc->logging_enabled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) spin_unlock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) free_pending_block(lc, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) if (!try_to_freeze()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) set_current_state(TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) if (!kthread_should_stop() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) list_empty(&lc->logging_blocks))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) __set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) * Construct a log-writes mapping:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) * log-writes <dev_path> <log_dev_path>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) struct log_writes_c *lc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) struct dm_arg_set as;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) const char *devname, *logdevname;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) as.argc = argc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) as.argv = argv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) if (argc < 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) ti->error = "Invalid argument count";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) lc = kzalloc(sizeof(struct log_writes_c), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) if (!lc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) ti->error = "Cannot allocate context";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) spin_lock_init(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) INIT_LIST_HEAD(&lc->unflushed_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) INIT_LIST_HEAD(&lc->logging_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) init_waitqueue_head(&lc->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) init_completion(&lc->super_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) atomic_set(&lc->io_blocks, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) atomic_set(&lc->pending_blocks, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) devname = dm_shift_arg(&as);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) ret = dm_get_device(ti, devname, dm_table_get_mode(ti->table), &lc->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) ti->error = "Device lookup failed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) logdevname = dm_shift_arg(&as);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) ret = dm_get_device(ti, logdevname, dm_table_get_mode(ti->table),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) &lc->logdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) ti->error = "Log device lookup failed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) dm_put_device(ti, lc->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) lc->sectorsize = bdev_logical_block_size(lc->dev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) lc->sectorshift = ilog2(lc->sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) if (IS_ERR(lc->log_kthread)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) ret = PTR_ERR(lc->log_kthread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) ti->error = "Couldn't alloc kthread";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) dm_put_device(ti, lc->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) dm_put_device(ti, lc->logdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) * next_sector is in 512b sectors to correspond to what bi_sector expects.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) * The super starts at sector 0, and the next_sector is the next logical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) * one based on the sectorsize of the device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) lc->next_sector = lc->sectorsize >> SECTOR_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) lc->logging_enabled = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) lc->end_sector = logdev_last_sector(lc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) lc->device_supports_discard = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) ti->num_flush_bios = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) ti->flush_supported = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) ti->num_discard_bios = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) ti->discards_supported = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) ti->per_io_data_size = sizeof(struct per_bio_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) ti->private = lc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) kfree(lc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) static int log_mark(struct log_writes_c *lc, char *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) struct pending_block *block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) size_t maxsize = lc->sectorsize - sizeof(struct log_write_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) if (!block) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) DMERR("Error allocating pending block");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) block->data = kstrndup(data, maxsize - 1, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) if (!block->data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) DMERR("Error copying mark data");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) kfree(block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) atomic_inc(&lc->pending_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) block->datalen = strlen(block->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) block->flags |= LOG_MARK_FLAG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) spin_lock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) list_add_tail(&block->list, &lc->logging_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) spin_unlock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) wake_up_process(lc->log_kthread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) static void log_writes_dtr(struct dm_target *ti)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) struct log_writes_c *lc = ti->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) spin_lock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) list_splice_init(&lc->unflushed_blocks, &lc->logging_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) spin_unlock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) * This is just nice to have since it'll update the super to include the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) * unflushed blocks, if it fails we don't really care.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) log_mark(lc, "dm-log-writes-end");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) wake_up_process(lc->log_kthread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) wait_event(lc->wait, !atomic_read(&lc->io_blocks) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) !atomic_read(&lc->pending_blocks));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) kthread_stop(lc->log_kthread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) WARN_ON(!list_empty(&lc->logging_blocks));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) WARN_ON(!list_empty(&lc->unflushed_blocks));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) dm_put_device(ti, lc->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) dm_put_device(ti, lc->logdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) kfree(lc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) static void normal_map_bio(struct dm_target *ti, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) struct log_writes_c *lc = ti->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) bio_set_dev(bio, lc->dev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) static int log_writes_map(struct dm_target *ti, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) struct log_writes_c *lc = ti->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) struct pending_block *block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) struct bvec_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) struct bio_vec bv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) size_t alloc_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) bool flush_bio = (bio->bi_opf & REQ_PREFLUSH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) bool fua_bio = (bio->bi_opf & REQ_FUA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) bool discard_bio = (bio_op(bio) == REQ_OP_DISCARD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) bool meta_bio = (bio->bi_opf & REQ_META);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) pb->block = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) /* Don't bother doing anything if logging has been disabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) if (!lc->logging_enabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) goto map_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) * Map reads as normal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) if (bio_data_dir(bio) == READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) goto map_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) /* No sectors and not a flush? Don't care */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) if (!bio_sectors(bio) && !flush_bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) goto map_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * Discards will have bi_size set but there's no actual data, so just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) * allocate the size of the pending block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) if (discard_bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) alloc_size = sizeof(struct pending_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) alloc_size = struct_size(block, vecs, bio_segments(bio));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) block = kzalloc(alloc_size, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) if (!block) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) DMERR("Error allocating pending block");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) spin_lock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) lc->logging_enabled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) spin_unlock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) return DM_MAPIO_KILL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) INIT_LIST_HEAD(&block->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) pb->block = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) atomic_inc(&lc->pending_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) if (flush_bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) block->flags |= LOG_FLUSH_FLAG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) if (fua_bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) block->flags |= LOG_FUA_FLAG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) if (discard_bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) block->flags |= LOG_DISCARD_FLAG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) if (meta_bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) block->flags |= LOG_METADATA_FLAG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) block->sector = bio_to_dev_sectors(lc, bio->bi_iter.bi_sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) block->nr_sectors = bio_to_dev_sectors(lc, bio_sectors(bio));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) /* We don't need the data, just submit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) if (discard_bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) WARN_ON(flush_bio || fua_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) if (lc->device_supports_discard)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) goto map_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) bio_endio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) return DM_MAPIO_SUBMITTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) /* Flush bio, splice the unflushed blocks onto this list and submit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) if (flush_bio && !bio_sectors(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) spin_lock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) list_splice_init(&lc->unflushed_blocks, &block->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) spin_unlock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) goto map_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) * We will write this bio somewhere else way later so we need to copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) * the actual contents into new pages so we know the data will always be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) * there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) * We do this because this could be a bio from O_DIRECT in which case we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) * can't just hold onto the page until some later point, we have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) * manually copy the contents.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) bio_for_each_segment(bv, bio, iter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) void *src, *dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) page = alloc_page(GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) DMERR("Error allocing page");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) free_pending_block(lc, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) spin_lock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) lc->logging_enabled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) spin_unlock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) return DM_MAPIO_KILL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) src = kmap_atomic(bv.bv_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) dst = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) memcpy(dst, src + bv.bv_offset, bv.bv_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) kunmap_atomic(dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) kunmap_atomic(src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) block->vecs[i].bv_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) block->vecs[i].bv_len = bv.bv_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) block->vec_cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) /* Had a flush with data in it, weird */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) if (flush_bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) spin_lock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) list_splice_init(&lc->unflushed_blocks, &block->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) spin_unlock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) map_bio:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) normal_map_bio(ti, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) return DM_MAPIO_REMAPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) static int normal_end_io(struct dm_target *ti, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) blk_status_t *error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) struct log_writes_c *lc = ti->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) if (bio_data_dir(bio) == WRITE && pb->block) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) struct pending_block *block = pb->block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) spin_lock_irqsave(&lc->blocks_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) if (block->flags & LOG_FLUSH_FLAG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) list_splice_tail_init(&block->list, &lc->logging_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) list_add_tail(&block->list, &lc->logging_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) wake_up_process(lc->log_kthread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) } else if (block->flags & LOG_FUA_FLAG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) list_add_tail(&block->list, &lc->logging_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) wake_up_process(lc->log_kthread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) list_add_tail(&block->list, &lc->unflushed_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) spin_unlock_irqrestore(&lc->blocks_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) return DM_ENDIO_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) * INFO format: <logged entries> <highest allocated sector>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) static void log_writes_status(struct dm_target *ti, status_type_t type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) unsigned status_flags, char *result,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) unsigned maxlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) unsigned sz = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) struct log_writes_c *lc = ti->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) case STATUSTYPE_INFO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) DMEMIT("%llu %llu", lc->logged_entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) (unsigned long long)lc->next_sector - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) if (!lc->logging_enabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) DMEMIT(" logging_disabled");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) case STATUSTYPE_TABLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) DMEMIT("%s %s", lc->dev->name, lc->logdev->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) static int log_writes_prepare_ioctl(struct dm_target *ti,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) struct block_device **bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) struct log_writes_c *lc = ti->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) struct dm_dev *dev = lc->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) *bdev = dev->bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) * Only pass ioctls through if the device sizes match exactly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) if (ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) static int log_writes_iterate_devices(struct dm_target *ti,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) iterate_devices_callout_fn fn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) struct log_writes_c *lc = ti->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) return fn(ti, lc->dev, 0, ti->len, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) * Messages supported:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) * mark <mark data> - specify the marked data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) char *result, unsigned maxlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) int r = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) struct log_writes_c *lc = ti->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) if (argc != 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) DMWARN("Invalid log-writes message arguments, expect 2 arguments, got %d", argc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) if (!strcasecmp(argv[0], "mark"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) r = log_mark(lc, argv[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) DMWARN("Unrecognised log writes target message received: %s", argv[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) struct log_writes_c *lc = ti->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) struct request_queue *q = bdev_get_queue(lc->dev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) if (!q || !blk_queue_discard(q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) lc->device_supports_discard = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) limits->discard_granularity = lc->sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) limits->logical_block_size = bdev_logical_block_size(lc->dev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) limits->physical_block_size = bdev_physical_block_size(lc->dev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) limits->io_min = limits->physical_block_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) #if IS_ENABLED(CONFIG_DAX_DRIVER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) struct iov_iter *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) struct pending_block *block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) if (!bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) if (!block) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) DMERR("Error allocating dax pending block");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) block->data = kzalloc(bytes, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) if (!block->data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) DMERR("Error allocating dax data space");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) kfree(block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) /* write data provided via the iterator */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) if (!copy_from_iter(block->data, bytes, i)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) DMERR("Error copying dax data");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) kfree(block->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) kfree(block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) /* rewind the iterator so that the block driver can use it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) iov_iter_revert(i, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) block->datalen = bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) block->sector = bio_to_dev_sectors(lc, sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) atomic_inc(&lc->pending_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) spin_lock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) list_add_tail(&block->list, &lc->unflushed_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) spin_unlock_irq(&lc->blocks_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) wake_up_process(lc->log_kthread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) long nr_pages, void **kaddr, pfn_t *pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) struct log_writes_c *lc = ti->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) sector_t sector = pgoff * PAGE_SECTORS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages * PAGE_SIZE, &pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) return dax_direct_access(lc->dev->dax_dev, pgoff, nr_pages, kaddr, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) pgoff_t pgoff, void *addr, size_t bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) struct iov_iter *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) struct log_writes_c *lc = ti->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) sector_t sector = pgoff * PAGE_SECTORS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) /* Don't bother doing anything if logging has been disabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) if (!lc->logging_enabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) goto dax_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) err = log_dax(lc, sector, bytes, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) DMWARN("Error %d logging DAX write", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) dax_copy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) pgoff_t pgoff, void *addr, size_t bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) struct iov_iter *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) struct log_writes_c *lc = ti->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) sector_t sector = pgoff * PAGE_SECTORS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) size_t nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) struct log_writes_c *lc = ti->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) sector_t sector = pgoff * PAGE_SECTORS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages << PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) &pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) return dax_zero_page_range(lc->dev->dax_dev, pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) nr_pages << PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) #define log_writes_dax_direct_access NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) #define log_writes_dax_copy_from_iter NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) #define log_writes_dax_copy_to_iter NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) #define log_writes_dax_zero_page_range NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) static struct target_type log_writes_target = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) .name = "log-writes",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) .version = {1, 1, 0},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) .module = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) .ctr = log_writes_ctr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) .dtr = log_writes_dtr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) .map = log_writes_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) .end_io = normal_end_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) .status = log_writes_status,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) .prepare_ioctl = log_writes_prepare_ioctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) .message = log_writes_message,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) .iterate_devices = log_writes_iterate_devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) .io_hints = log_writes_io_hints,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) .direct_access = log_writes_dax_direct_access,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) .dax_copy_from_iter = log_writes_dax_copy_from_iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) .dax_copy_to_iter = log_writes_dax_copy_to_iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) .dax_zero_page_range = log_writes_dax_zero_page_range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) static int __init dm_log_writes_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) int r = dm_register_target(&log_writes_target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) DMERR("register failed %d", r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) static void __exit dm_log_writes_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) dm_unregister_target(&log_writes_target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) module_init(dm_log_writes_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) module_exit(dm_log_writes_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) MODULE_DESCRIPTION(DM_NAME " log writes target");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) MODULE_AUTHOR("Josef Bacik <jbacik@fb.com>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) MODULE_LICENSE("GPL");