^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) drbd_actlog.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/crc32c.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/drbd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/drbd_limits.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "drbd_int.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) enum al_transaction_types {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) AL_TR_UPDATE = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) AL_TR_INITIALIZED = 0xffff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) /* all fields on disc in big endian */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) struct __packed al_transaction_on_disk {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) /* don't we all like magic */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) __be32 magic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) /* to identify the most recent transaction block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * in the on disk ring buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) __be32 tr_number;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) /* checksum on the full 4k block, with this field set to 0. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) __be32 crc32c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) /* type of transaction, special transaction types like:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * purge-all, set-all-idle, set-all-active, ... to-be-defined
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * see also enum al_transaction_types */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) __be16 transaction_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) /* we currently allow only a few thousand extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * so 16bit will be enough for the slot number. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /* how many updates in this transaction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) __be16 n_updates;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) /* maximum slot number, "al-extents" in drbd.conf speak.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * Having this in each transaction should make reconfiguration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * of that parameter easier. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) __be16 context_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) /* slot number the context starts with */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) __be16 context_start_slot_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) /* Some reserved bytes. Expected usage is a 64bit counter of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * sectors-written since device creation, and other data generation tag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * supporting usage */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) __be32 __reserved[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) /* --- 36 byte used --- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * in one transaction, then use the remaining byte in the 4k block for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * context information. "Flexible" number of updates per transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * does not help, as we have to account for the case when all update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * slots are used anyways, so it would only complicate code without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * additional benefit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) /* but the extent number is 32bit, which at an extent size of 4 MiB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * allows to cover device sizes of up to 2**54 Byte (16 PiB) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) /* --- 420 bytes used (36 + 64*6) --- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) /* 4096 - 420 = 3676 = 919 * 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) __be32 context[AL_CONTEXT_PER_TRANSACTION];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) void *drbd_md_get_buffer(struct drbd_device *device, const char *intent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) wait_event(device->misc_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) (r = atomic_cmpxchg(&device->md_io.in_use, 0, 1)) == 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) device->state.disk <= D_FAILED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) device->md_io.current_use = intent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) device->md_io.start_jif = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) device->md_io.submit_jif = device->md_io.start_jif - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) return page_address(device->md_io.page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) void drbd_md_put_buffer(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) if (atomic_dec_and_test(&device->md_io.in_use))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) wake_up(&device->misc_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) unsigned int *done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) long dt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) dt = rcu_dereference(bdev->disk_conf)->disk_timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) dt = dt * HZ / 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) if (dt == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) dt = MAX_SCHEDULE_TIMEOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) dt = wait_event_timeout(device->misc_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) *done || test_bit(FORCE_DETACH, &device->flags), dt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) if (dt == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) drbd_err(device, "meta-data IO operation timed out\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) drbd_chk_io_error(device, 1, DRBD_FORCE_DETACH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) static int _drbd_md_sync_page_io(struct drbd_device *device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) struct drbd_backing_dev *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) sector_t sector, int op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) /* we do all our meta data IO in aligned 4k blocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) const int size = 4096;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) int err, op_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) device->md_io.done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) device->md_io.error = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) if ((op == REQ_OP_WRITE) && !test_bit(MD_NO_FUA, &device->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) op_flags |= REQ_FUA | REQ_PREFLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) op_flags |= REQ_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) bio = bio_alloc_drbd(GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) bio_set_dev(bio, bdev->md_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) bio->bi_iter.bi_sector = sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) if (bio_add_page(bio, device->md_io.page, size, 0) != size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) bio->bi_private = device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) bio->bi_end_io = drbd_md_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) bio_set_op_attrs(bio, op, op_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (op != REQ_OP_WRITE && device->state.disk == D_DISKLESS && device->ldev == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) else if (!get_ldev_if_state(device, D_ATTACHING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /* Corresponding put_ldev in drbd_md_endio() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) err = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) bio_get(bio); /* one bio_put() is in the completion handler */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) device->md_io.submit_jif = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) bio_io_error(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) if (!bio->bi_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) err = device->md_io.error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) sector_t sector, int op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) D_ASSERT(device, atomic_read(&device->md_io.in_use) == 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) BUG_ON(!bdev->md_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) current->comm, current->pid, __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) (unsigned long long)sector, (op == REQ_OP_WRITE) ? "WRITE" : "READ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) (void*)_RET_IP_ );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) if (sector < drbd_md_first_sector(bdev) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) sector + 7 > drbd_md_last_sector(bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) drbd_alert(device, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) current->comm, current->pid, __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) (unsigned long long)sector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) (op == REQ_OP_WRITE) ? "WRITE" : "READ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) err = _drbd_md_sync_page_io(device, bdev, sector, op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) (unsigned long long)sector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) (op == REQ_OP_WRITE) ? "WRITE" : "READ", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) static struct bm_extent *find_active_resync_extent(struct drbd_device *device, unsigned int enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) struct lc_element *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) if (unlikely(tmp != NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) if (test_bit(BME_NO_WRITES, &bm_ext->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) return bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) static struct lc_element *_al_get(struct drbd_device *device, unsigned int enr, bool nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) struct lc_element *al_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) struct bm_extent *bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) int wake;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) bm_ext = find_active_resync_extent(device, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) if (bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) if (wake)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) if (nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) al_ext = lc_try_get(device->act_log, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) al_ext = lc_get(device->act_log, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) return al_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) /* for bios crossing activity log extent boundaries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * we may need to activate two extents in one go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) D_ASSERT(device, first <= last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) D_ASSERT(device, atomic_read(&device->local_cnt) > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) /* FIXME figure out a fast path for bios crossing AL extent boundaries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) if (first != last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) return _al_get(device, first, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /* for bios crossing activity log extent boundaries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * we may need to activate two extents in one go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) unsigned enr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) bool need_transaction = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) D_ASSERT(device, first <= last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) D_ASSERT(device, atomic_read(&device->local_cnt) > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) for (enr = first; enr <= last; enr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) struct lc_element *al_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) wait_event(device->al_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) (al_ext = _al_get(device, enr, false)) != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) if (al_ext->lc_number != enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) need_transaction = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) return need_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * are still coupled, or assume too much about their relation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * Code below will not work if this is violated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) * Will be cleaned up with some followup patch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) # error FIXME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) static unsigned int al_extent_to_bm_page(unsigned int al_enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) return al_enr >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) /* bit to page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) ((PAGE_SHIFT + 3) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) /* al extent number to bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) const unsigned int stripes = device->ldev->md.al_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) /* transaction number, modulo on-disk ring buffer wrap around */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) /* ... to aligned 4k on disk block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) t = ((t % stripes) * stripe_size_4kB) + t/stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) /* ... to 512 byte sector in activity log */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) t *= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) /* ... plus offset to the on disk position */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) static int __al_write_transaction(struct drbd_device *device, struct al_transaction_on_disk *buffer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) struct lc_element *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) sector_t sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) int i, mx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) unsigned extent_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) unsigned crc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) memset(buffer, 0, sizeof(*buffer));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) buffer->tr_number = cpu_to_be32(device->al_tr_number);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) drbd_bm_reset_al_hints(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) /* Even though no one can start to change this list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) * once we set the LC_LOCKED -- from drbd_al_begin_io(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) * lc_try_lock_for_transaction() --, someone may still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * be in the process of changing it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) list_for_each_entry(e, &device->act_log->to_be_changed, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) if (i == AL_UPDATES_PER_TRANSACTION) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) if (e->lc_number != LC_FREE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) drbd_bm_mark_for_writeout(device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) al_extent_to_bm_page(e->lc_number));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) buffer->n_updates = cpu_to_be16(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) buffer->update_slot_nr[i] = cpu_to_be16(-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) device->act_log->nr_elements - device->al_tr_cycle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) for (i = 0; i < mx; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) unsigned idx = device->al_tr_cycle + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) buffer->context[i] = cpu_to_be32(extent_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) buffer->context[i] = cpu_to_be32(LC_FREE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) if (device->al_tr_cycle >= device->act_log->nr_elements)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) device->al_tr_cycle = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) sector = al_tr_number_to_on_disk_sector(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) crc = crc32c(0, buffer, 4096);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) buffer->crc32c = cpu_to_be32(crc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) if (drbd_bm_write_hinted(device))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) bool write_al_updates;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) if (write_al_updates) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) device->al_tr_number++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) device->al_writ_cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) static int al_write_transaction(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) struct al_transaction_on_disk *buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) if (!get_ldev(device)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) drbd_err(device, "disk is %s, cannot start al transaction\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) drbd_disk_str(device->state.disk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) /* The bitmap write may have failed, causing a state change. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) if (device->state.disk < D_INCONSISTENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) drbd_err(device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) "disk is %s, cannot write al transaction\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) drbd_disk_str(device->state.disk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) /* protects md_io_buffer, al_tr_cycle, ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) buffer = drbd_md_get_buffer(device, __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) if (!buffer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) drbd_err(device, "disk failed while waiting for md_io buffer\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) err = __al_write_transaction(device, buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) drbd_md_put_buffer(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) void drbd_al_begin_io_commit(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) bool locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) /* Serialize multiple transactions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) * This uses test_and_set_bit, memory barrier is implicit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) wait_event(device->al_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) device->act_log->pending_changes == 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) (locked = lc_try_lock_for_transaction(device->act_log)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) if (locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) /* Double check: it may have been committed by someone else,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * while we have been waiting for the lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) if (device->act_log->pending_changes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) bool write_al_updates;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) if (write_al_updates)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) al_write_transaction(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) /* FIXME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) we need an "lc_cancel" here;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) lc_committed(device->act_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) lc_unlock(device->act_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) * @delegate: delegate activity log I/O to the worker thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) if (drbd_al_begin_io_prepare(device, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) drbd_al_begin_io_commit(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) struct lru_cache *al = device->act_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) /* for bios crossing activity log extent boundaries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * we may need to activate two extents in one go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) unsigned nr_al_extents;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) unsigned available_update_slots;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) unsigned enr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) D_ASSERT(device, first <= last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) available_update_slots = min(al->nr_elements - al->used,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) al->max_pending_changes - al->pending_changes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) /* We want all necessary updates for a given request within the same transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) * We could first check how many updates are *actually* needed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) * and use that instead of the worst-case nr_al_extents */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) if (available_update_slots < nr_al_extents) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) /* Too many activity log extents are currently "hot".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) * If we have accumulated pending changes already,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) * we made progress.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * If we cannot get even a single pending change through,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * stop the fast path until we made some progress,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * or requests to "cold" extents could be starved. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) if (!al->pending_changes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) __set_bit(__LC_STARVING, &device->act_log->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) return -ENOBUFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) /* Is resync active in this area? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) for (enr = first; enr <= last; enr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) struct lc_element *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) if (unlikely(tmp != NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) if (!test_and_set_bit(BME_PRIORITY, &bm_ext->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) return -EWOULDBLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) /* Checkout the refcounts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * Given that we checked for available elements and update slots above,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) * this has to be successful. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) for (enr = first; enr <= last; enr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) struct lc_element *al_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) al_ext = lc_get_cumulative(device->act_log, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) if (!al_ext)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) drbd_info(device, "LOGIC BUG for enr=%u\n", enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) /* for bios crossing activity log extent boundaries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * we may need to activate two extents in one go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) unsigned enr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) struct lc_element *extent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) D_ASSERT(device, first <= last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) spin_lock_irqsave(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) for (enr = first; enr <= last; enr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) extent = lc_find(device->act_log, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) if (!extent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) lc_put(device->act_log, extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) spin_unlock_irqrestore(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) int rv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) rv = (al_ext->refcnt == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) if (likely(rv))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) lc_del(device->act_log, al_ext);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) return rv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) * drbd_al_shrink() - Removes all active extents form the activity log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) * @device: DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) * Removes all active extents form the activity log, waiting until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) * the reference count of each entry dropped to 0 first, of course.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) * You need to lock device->act_log with lc_try_lock() / lc_unlock()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) void drbd_al_shrink(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) struct lc_element *al_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) D_ASSERT(device, test_bit(__LC_LOCKED, &device->act_log->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) for (i = 0; i < device->act_log->nr_elements; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) al_ext = lc_element_by_index(device->act_log, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) if (al_ext->lc_number == LC_FREE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) wait_event(device->al_wait, _try_lc_del(device, al_ext));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) int drbd_al_initialize(struct drbd_device *device, void *buffer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) struct al_transaction_on_disk *al = buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) struct drbd_md *md = &device->ldev->md;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) __al_write_transaction(device, al);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) /* There may or may not have been a pending transaction. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) lc_committed(device->act_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) /* The rest of the transactions will have an empty "updates" list, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) * are written out only to provide the context, and to initialize the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) * on-disk ring buffer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) for (i = 1; i < al_size_4k; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) int err = __al_write_transaction(device, al);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) static const char *drbd_change_sync_fname[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) [RECORD_RS_FAILED] = "drbd_rs_failed_io",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) [SET_IN_SYNC] = "drbd_set_in_sync",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) [SET_OUT_OF_SYNC] = "drbd_set_out_of_sync"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) /* ATTENTION. The AL's extents are 4MB each, while the extents in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) * resync LRU-cache are 16MB each.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) * The caller of this function has to hold an get_ldev() reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) * potentially pulling in (and recounting the corresponding bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) * this resync extent into the resync extent lru cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) * Returns whether all bits have been cleared for this resync extent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) * precisely: (rs_left <= rs_failed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) * TODO will be obsoleted once we have a caching lru of the on disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) static bool update_rs_extent(struct drbd_device *device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) unsigned int enr, int count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) enum update_sync_bits_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) struct lc_element *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) D_ASSERT(device, atomic_read(&device->local_cnt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) /* When setting out-of-sync bits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) * we don't need it cached (lc_find).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) * But if it is present in the cache,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) * we should update the cached bit count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) * Otherwise, that extent should be in the resync extent lru cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) * already -- or we want to pull it in if necessary -- (lc_get),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) * then update and check rs_left and rs_failed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) if (mode == SET_OUT_OF_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) e = lc_find(device->resync, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) e = lc_get(device->resync, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) if (e) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) if (ext->lce.lc_number == enr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) if (mode == SET_IN_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) ext->rs_left -= count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) else if (mode == SET_OUT_OF_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) ext->rs_left += count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) ext->rs_failed += count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) if (ext->rs_left < ext->rs_failed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) drbd_warn(device, "BAD! enr=%u rs_left=%d "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) "rs_failed=%d count=%d cstate=%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) ext->lce.lc_number, ext->rs_left,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) ext->rs_failed, count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) drbd_conn_str(device->state.conn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) /* We don't expect to be able to clear more bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) * than have been set when we originally counted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) * the set bits to cache that value in ext->rs_left.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) * Whatever the reason (disconnect during resync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) * delayed local completion of an application write),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) * try to fix it up by recounting here. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) ext->rs_left = drbd_bm_e_weight(device, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) /* Normally this element should be in the cache,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * since drbd_rs_begin_io() pulled it already in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) * But maybe an application write finished, and we set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) * something outside the resync lru_cache in sync.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) int rs_left = drbd_bm_e_weight(device, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) if (ext->flags != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) drbd_warn(device, "changing resync lce: %d[%u;%02lx]"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) " -> %d[%u;00]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) ext->lce.lc_number, ext->rs_left,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) ext->flags, enr, rs_left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) ext->flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) if (ext->rs_failed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) drbd_warn(device, "Kicking resync_lru element enr=%u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) "out with rs_failed=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) ext->lce.lc_number, ext->rs_failed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) ext->rs_left = rs_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) /* we don't keep a persistent log of the resync lru,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) * we can commit any change right away. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) lc_committed(device->resync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) if (mode != SET_OUT_OF_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) lc_put(device->resync, &ext->lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) /* no race, we are within the al_lock! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) if (ext->rs_left <= ext->rs_failed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) ext->rs_failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) } else if (mode != SET_OUT_OF_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) /* be quiet if lc_find() did not find it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) device->resync_locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) device->resync->nr_elements,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) device->resync->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) unsigned long last = device->rs_mark_time[device->rs_last_mark];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) if (device->rs_mark_left[device->rs_last_mark] != still_to_go &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) device->state.conn != C_PAUSED_SYNC_T &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) device->state.conn != C_PAUSED_SYNC_S) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) device->rs_mark_time[next] = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) device->rs_mark_left[next] = still_to_go;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) device->rs_last_mark = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) /* It is called lazy update, so don't do write-out too often. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) static bool lazy_bitmap_update_due(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) return time_after(jiffies, device->rs_last_bcast + 2*HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) if (rs_done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) struct drbd_connection *connection = first_peer_device(device)->connection;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) if (connection->agreed_pro_version <= 95 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) is_sync_target_state(device->state.conn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) set_bit(RS_DONE, &device->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) /* and also set RS_PROGRESS below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) /* Else: rather wait for explicit notification via receive_state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) * to avoid uuids-rotated-too-fast causing full resync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) * in next handshake, in case the replication link breaks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) * at the most unfortunate time... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) } else if (!lazy_bitmap_update_due(device))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) drbd_device_post_work(device, RS_PROGRESS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) static int update_sync_bits(struct drbd_device *device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) unsigned long sbnr, unsigned long ebnr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) enum update_sync_bits_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) * We keep a count of set bits per resync-extent in the ->rs_left
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) * caching member, so we need to loop and work within the resync extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) * alignment. Typically this loop will execute exactly once.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) unsigned long count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) unsigned int cleared = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) while (sbnr <= ebnr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) /* set temporary boundary bit number to last bit number within
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) * the resync extent of the current start bit number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) * but cap at provided end bit number */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) unsigned long c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) if (mode == RECORD_RS_FAILED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) /* Only called from drbd_rs_failed_io(), bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) * supposedly still set. Recount, maybe some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) * of the bits have been successfully cleared
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) * by application IO meanwhile.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) c = drbd_bm_count_bits(device, sbnr, tbnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) else if (mode == SET_IN_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) c = drbd_bm_clear_bits(device, sbnr, tbnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) else /* if (mode == SET_OUT_OF_SYNC) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) c = drbd_bm_set_bits(device, sbnr, tbnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) if (c) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) spin_lock_irqsave(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) spin_unlock_irqrestore(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) count += c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) sbnr = tbnr + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) if (count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) if (mode == SET_IN_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) unsigned long still_to_go = drbd_bm_total_weight(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) bool rs_is_done = (still_to_go <= device->rs_failed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) drbd_advance_rs_marks(device, still_to_go);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) if (cleared || rs_is_done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) maybe_schedule_on_disk_bitmap_update(device, rs_is_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) } else if (mode == RECORD_RS_FAILED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) device->rs_failed += count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) static bool plausible_request_size(int size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) return size > 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) && size <= DRBD_MAX_BATCH_BIO_SIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) && IS_ALIGNED(size, 512);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) /* clear the bit corresponding to the piece of storage in question:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) * size byte of data starting from sector. Only clear a bits of the affected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) * one ore more _aligned_ BM_BLOCK_SIZE blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) * called by worker on C_SYNC_TARGET and receiver on SyncSource.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) enum update_sync_bits_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) /* Is called from worker and receiver context _only_ */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) unsigned long sbnr, ebnr, lbnr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) unsigned long count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) sector_t esector, nr_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) /* This would be an empty REQ_PREFLUSH, be silent. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) if ((mode == SET_OUT_OF_SYNC) && size == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) if (!plausible_request_size(size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) drbd_err(device, "%s: sector=%llus size=%d nonsense!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) drbd_change_sync_fname[mode],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) (unsigned long long)sector, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) if (!get_ldev(device))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) return 0; /* no disk, no metadata, no bitmap to manipulate bits in */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) nr_sectors = get_capacity(device->vdisk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) esector = sector + (size >> 9) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) if (!expect(sector < nr_sectors))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) if (!expect(esector < nr_sectors))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) esector = nr_sectors - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) lbnr = BM_SECT_TO_BIT(nr_sectors-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) if (mode == SET_IN_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) /* Round up start sector, round down end sector. We make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) * we only clear full, aligned, BM_BLOCK_SIZE blocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) if (unlikely(esector < BM_SECT_PER_BIT-1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) if (unlikely(esector == (nr_sectors-1)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) ebnr = lbnr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) /* We set it out of sync, or record resync failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) * Should not round anything here. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) sbnr = BM_SECT_TO_BIT(sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) ebnr = BM_SECT_TO_BIT(esector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) count = update_sync_bits(device, sbnr, ebnr, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) static
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) struct bm_extent *_bme_get(struct drbd_device *device, unsigned int enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) struct lc_element *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) struct bm_extent *bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) int wakeup = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) unsigned long rs_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) if (device->resync_locked > device->resync->nr_elements/2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) e = lc_get(device->resync, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) if (bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) if (bm_ext->lce.lc_number != enr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) bm_ext->rs_left = drbd_bm_e_weight(device, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) bm_ext->rs_failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) lc_committed(device->resync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) wakeup = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) if (bm_ext->lce.refcnt == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) device->resync_locked++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) set_bit(BME_NO_WRITES, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) rs_flags = device->resync->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) if (wakeup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) if (!bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) if (rs_flags & LC_STARVING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) drbd_warn(device, "Have to wait for element"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) " (resync LRU too small?)\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) BUG_ON(rs_flags & LC_LOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) return bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) static int _is_in_al(struct drbd_device *device, unsigned int enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) int rv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) rv = lc_is_used(device->act_log, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) return rv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) * @device: DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) * @sector: The sector number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) int drbd_rs_begin_io(struct drbd_device *device, sector_t sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) unsigned int enr = BM_SECT_TO_EXT(sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) struct bm_extent *bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) int i, sig;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) bool sa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) sig = wait_event_interruptible(device->al_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) (bm_ext = _bme_get(device, enr)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) if (sig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) if (test_bit(BME_LOCKED, &bm_ext->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) /* step aside only while we are above c-min-rate; unless disabled. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) sa = drbd_rs_c_min_rate_throttle(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) sig = wait_event_interruptible(device->al_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) (sa && test_bit(BME_PRIORITY, &bm_ext->flags)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) if (lc_put(device->resync, &bm_ext->lce) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) device->resync_locked--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) if (sig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) if (schedule_timeout_interruptible(HZ/10))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) set_bit(BME_LOCKED, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) * @device: DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) * @sector: The sector number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) * if there is still application IO going on in this area.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) unsigned int enr = BM_SECT_TO_EXT(sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) struct lc_element *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) struct bm_extent *bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) bool throttle = drbd_rs_should_slow_down(device, sector, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) /* If we need to throttle, a half-locked (only marked BME_NO_WRITES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) * not yet BME_LOCKED) extent needs to be kicked out explicitly if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) * need to throttle. There is at most one such half-locked extent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) * which is remembered in resync_wenr. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) if (throttle && device->resync_wenr != enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) /* in case you have very heavy scattered io, it may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) * stall the syncer undefined if we give up the ref count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) * when we try again and requeue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) * if we don't give up the refcount, but the next time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) * we are scheduled this extent has been "synced" by new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) * application writes, we'd miss the lc_put on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) * extent we keep the refcount on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) * so we remembered which extent we had to try again, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) * if the next requested one is something else, we do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) * the lc_put here...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) * we also have to wake_up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) e = lc_find(device->resync, device->resync_wenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) if (bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) clear_bit(BME_NO_WRITES, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) device->resync_wenr = LC_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) if (lc_put(device->resync, &bm_ext->lce) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) bm_ext->flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) device->resync_locked--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) drbd_alert(device, "LOGIC BUG\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) /* TRY. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) e = lc_try_get(device->resync, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) if (bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) if (test_bit(BME_LOCKED, &bm_ext->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) goto proceed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) device->resync_locked++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) /* we did set the BME_NO_WRITES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) * but then could not set BME_LOCKED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) * so we tried again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) * drop the extra reference. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) bm_ext->lce.refcnt--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) D_ASSERT(device, bm_ext->lce.refcnt > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) goto check_al;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) /* do we rather want to try later? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) if (device->resync_locked > device->resync->nr_elements-3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) goto try_again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) /* Do or do not. There is no try. -- Yoda */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) e = lc_get(device->resync, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) if (!bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) const unsigned long rs_flags = device->resync->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) if (rs_flags & LC_STARVING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) drbd_warn(device, "Have to wait for element"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) " (resync LRU too small?)\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) BUG_ON(rs_flags & LC_LOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) goto try_again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) if (bm_ext->lce.lc_number != enr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) bm_ext->rs_left = drbd_bm_e_weight(device, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) bm_ext->rs_failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) lc_committed(device->resync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) D_ASSERT(device, test_bit(BME_LOCKED, &bm_ext->flags) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) set_bit(BME_NO_WRITES, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) D_ASSERT(device, bm_ext->lce.refcnt == 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) device->resync_locked++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) goto check_al;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) check_al:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) if (lc_is_used(device->act_log, al_enr+i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) goto try_again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) set_bit(BME_LOCKED, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) proceed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) device->resync_wenr = LC_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) try_again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) if (bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) if (throttle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) clear_bit(BME_NO_WRITES, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) device->resync_wenr = LC_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) if (lc_put(device->resync, &bm_ext->lce) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) bm_ext->flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) device->resync_locked--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) device->resync_wenr = enr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) void drbd_rs_complete_io(struct drbd_device *device, sector_t sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) unsigned int enr = BM_SECT_TO_EXT(sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) struct lc_element *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) struct bm_extent *bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) spin_lock_irqsave(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) e = lc_find(device->resync, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) if (!bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) spin_unlock_irqrestore(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) if (__ratelimit(&drbd_ratelimit_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) drbd_err(device, "drbd_rs_complete_io() called, but extent not found\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) if (bm_ext->lce.refcnt == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) spin_unlock_irqrestore(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) drbd_err(device, "drbd_rs_complete_io(,%llu [=%u]) called, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) "but refcnt is 0!?\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) (unsigned long long)sector, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) if (lc_put(device->resync, &bm_ext->lce) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) device->resync_locked--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) spin_unlock_irqrestore(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) * @device: DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) void drbd_rs_cancel_all(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) if (get_ldev_if_state(device, D_FAILED)) { /* Makes sure ->resync is there. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) lc_reset(device->resync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) device->resync_locked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) device->resync_wenr = LC_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) * @device: DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) * Returns 0 upon success, -EAGAIN if at least one reference count was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) * not zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) int drbd_rs_del_all(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) struct lc_element *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) struct bm_extent *bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) if (get_ldev_if_state(device, D_FAILED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) /* ok, ->resync is there. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) for (i = 0; i < device->resync->nr_elements; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) e = lc_element_by_index(device->resync, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) bm_ext = lc_entry(e, struct bm_extent, lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) if (bm_ext->lce.lc_number == LC_FREE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) if (bm_ext->lce.lc_number == device->resync_wenr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) drbd_info(device, "dropping %u in drbd_rs_del_all, apparently"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) " got 'synced' by application io\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) device->resync_wenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) clear_bit(BME_NO_WRITES, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) device->resync_wenr = LC_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) lc_put(device->resync, &bm_ext->lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) if (bm_ext->lce.refcnt != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) drbd_info(device, "Retrying drbd_rs_del_all() later. "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) "refcnt=%d\n", bm_ext->lce.refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) D_ASSERT(device, !test_bit(BME_NO_WRITES, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) lc_del(device->resync, &bm_ext->lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) D_ASSERT(device, device->resync->used == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) }