Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)    drbd_actlog.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)    Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)    Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)    Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/crc32c.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/drbd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/drbd_limits.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include "drbd_int.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) enum al_transaction_types {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) 	AL_TR_UPDATE = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) 	AL_TR_INITIALIZED = 0xffff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) /* all fields on disc in big endian */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) struct __packed al_transaction_on_disk {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) 	/* don't we all like magic */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) 	__be32	magic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) 	/* to identify the most recent transaction block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) 	 * in the on disk ring buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) 	__be32	tr_number;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) 	/* checksum on the full 4k block, with this field set to 0. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) 	__be32	crc32c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) 	/* type of transaction, special transaction types like:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) 	 * purge-all, set-all-idle, set-all-active, ... to-be-defined
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) 	 * see also enum al_transaction_types */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) 	__be16	transaction_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) 	/* we currently allow only a few thousand extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) 	 * so 16bit will be enough for the slot number. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) 	/* how many updates in this transaction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) 	__be16	n_updates;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) 	/* maximum slot number, "al-extents" in drbd.conf speak.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) 	 * Having this in each transaction should make reconfiguration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) 	 * of that parameter easier. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 	__be16	context_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) 	/* slot number the context starts with */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) 	__be16	context_start_slot_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 	/* Some reserved bytes.  Expected usage is a 64bit counter of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 	 * sectors-written since device creation, and other data generation tag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 	 * supporting usage */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) 	__be32	__reserved[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 	/* --- 36 byte used --- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) 	/* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 	 * in one transaction, then use the remaining byte in the 4k block for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) 	 * context information.  "Flexible" number of updates per transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 	 * does not help, as we have to account for the case when all update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 	 * slots are used anyways, so it would only complicate code without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 	 * additional benefit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 	__be16	update_slot_nr[AL_UPDATES_PER_TRANSACTION];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 	/* but the extent number is 32bit, which at an extent size of 4 MiB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 	 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 	__be32	update_extent_nr[AL_UPDATES_PER_TRANSACTION];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 	/* --- 420 bytes used (36 + 64*6) --- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 	/* 4096 - 420 = 3676 = 919 * 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 	__be32	context[AL_CONTEXT_PER_TRANSACTION];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) void *drbd_md_get_buffer(struct drbd_device *device, const char *intent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 	wait_event(device->misc_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 		   (r = atomic_cmpxchg(&device->md_io.in_use, 0, 1)) == 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 		   device->state.disk <= D_FAILED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	device->md_io.current_use = intent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 	device->md_io.start_jif = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 	device->md_io.submit_jif = device->md_io.start_jif - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	return page_address(device->md_io.page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) void drbd_md_put_buffer(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	if (atomic_dec_and_test(&device->md_io.in_use))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 		wake_up(&device->misc_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 				     unsigned int *done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	long dt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 	dt = rcu_dereference(bdev->disk_conf)->disk_timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 	dt = dt * HZ / 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 	if (dt == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 		dt = MAX_SCHEDULE_TIMEOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 	dt = wait_event_timeout(device->misc_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 			*done || test_bit(FORCE_DETACH, &device->flags), dt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	if (dt == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 		drbd_err(device, "meta-data IO operation timed out\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 		drbd_chk_io_error(device, 1, DRBD_FORCE_DETACH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) static int _drbd_md_sync_page_io(struct drbd_device *device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 				 struct drbd_backing_dev *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 				 sector_t sector, int op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	/* we do all our meta data IO in aligned 4k blocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	const int size = 4096;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	int err, op_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	device->md_io.done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 	device->md_io.error = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 	if ((op == REQ_OP_WRITE) && !test_bit(MD_NO_FUA, &device->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 		op_flags |= REQ_FUA | REQ_PREFLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 	op_flags |= REQ_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 	bio = bio_alloc_drbd(GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	bio_set_dev(bio, bdev->md_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	bio->bi_iter.bi_sector = sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	if (bio_add_page(bio, device->md_io.page, size, 0) != size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 	bio->bi_private = device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	bio->bi_end_io = drbd_md_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 	bio_set_op_attrs(bio, op, op_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	if (op != REQ_OP_WRITE && device->state.disk == D_DISKLESS && device->ldev == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 		/* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 		;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 	else if (!get_ldev_if_state(device, D_ATTACHING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 		/* Corresponding put_ldev in drbd_md_endio() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 		drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 		err = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	bio_get(bio); /* one bio_put() is in the completion handler */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	device->md_io.submit_jif = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 		bio_io_error(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 		submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	if (!bio->bi_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 		err = device->md_io.error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172)  out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 	bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 			 sector_t sector, int op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 	D_ASSERT(device, atomic_read(&device->md_io.in_use) == 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 	BUG_ON(!bdev->md_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 	dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 	     current->comm, current->pid, __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 	     (unsigned long long)sector, (op == REQ_OP_WRITE) ? "WRITE" : "READ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 	     (void*)_RET_IP_ );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	if (sector < drbd_md_first_sector(bdev) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 	    sector + 7 > drbd_md_last_sector(bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 		drbd_alert(device, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 		     current->comm, current->pid, __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 		     (unsigned long long)sector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 		     (op == REQ_OP_WRITE) ? "WRITE" : "READ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 	err = _drbd_md_sync_page_io(device, bdev, sector, op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 	if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 		drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 		    (unsigned long long)sector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 		    (op == REQ_OP_WRITE) ? "WRITE" : "READ", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) static struct bm_extent *find_active_resync_extent(struct drbd_device *device, unsigned int enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	struct lc_element *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 	if (unlikely(tmp != NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 		struct bm_extent  *bm_ext = lc_entry(tmp, struct bm_extent, lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 		if (test_bit(BME_NO_WRITES, &bm_ext->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 			return bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) static struct lc_element *_al_get(struct drbd_device *device, unsigned int enr, bool nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 	struct lc_element *al_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	struct bm_extent *bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 	int wake;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 	spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 	bm_ext = find_active_resync_extent(device, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 	if (bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 		wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 		spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 		if (wake)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 			wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	if (nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 		al_ext = lc_try_get(device->act_log, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 		al_ext = lc_get(device->act_log, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 	spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	return al_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	/* for bios crossing activity log extent boundaries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	 * we may need to activate two extents in one go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 	D_ASSERT(device, first <= last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	D_ASSERT(device, atomic_read(&device->local_cnt) > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 	/* FIXME figure out a fast path for bios crossing AL extent boundaries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 	if (first != last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 	return _al_get(device, first, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 	/* for bios crossing activity log extent boundaries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 	 * we may need to activate two extents in one go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 	unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 	unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	unsigned enr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 	bool need_transaction = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 	D_ASSERT(device, first <= last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 	D_ASSERT(device, atomic_read(&device->local_cnt) > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 	for (enr = first; enr <= last; enr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 		struct lc_element *al_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 		wait_event(device->al_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 				(al_ext = _al_get(device, enr, false)) != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 		if (al_ext->lc_number != enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 			need_transaction = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 	return need_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282)  * are still coupled, or assume too much about their relation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283)  * Code below will not work if this is violated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284)  * Will be cleaned up with some followup patch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) # error FIXME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) static unsigned int al_extent_to_bm_page(unsigned int al_enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 	return al_enr >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 		/* bit to page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 		((PAGE_SHIFT + 3) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 		/* al extent number to bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 		 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 	const unsigned int stripes = device->ldev->md.al_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 	/* transaction number, modulo on-disk ring buffer wrap around */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 	unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 	/* ... to aligned 4k on disk block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	t = ((t % stripes) * stripe_size_4kB) + t/stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	/* ... to 512 byte sector in activity log */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	t *= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 	/* ... plus offset to the on disk position */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 	return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) static int __al_write_transaction(struct drbd_device *device, struct al_transaction_on_disk *buffer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 	struct lc_element *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 	sector_t sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 	int i, mx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	unsigned extent_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 	unsigned crc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 	memset(buffer, 0, sizeof(*buffer));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 	buffer->tr_number = cpu_to_be32(device->al_tr_number);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 	i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	drbd_bm_reset_al_hints(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 	/* Even though no one can start to change this list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 	 * once we set the LC_LOCKED -- from drbd_al_begin_io(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 	 * lc_try_lock_for_transaction() --, someone may still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	 * be in the process of changing it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	list_for_each_entry(e, &device->act_log->to_be_changed, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 		if (i == AL_UPDATES_PER_TRANSACTION) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 			i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 		buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 		buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 		if (e->lc_number != LC_FREE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 			drbd_bm_mark_for_writeout(device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 					al_extent_to_bm_page(e->lc_number));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 		i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 	spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 	BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	buffer->n_updates = cpu_to_be16(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 	for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 		buffer->update_slot_nr[i] = cpu_to_be16(-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 		buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 		   device->act_log->nr_elements - device->al_tr_cycle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	for (i = 0; i < mx; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 		unsigned idx = device->al_tr_cycle + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 		extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 		buffer->context[i] = cpu_to_be32(extent_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 	for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 		buffer->context[i] = cpu_to_be32(LC_FREE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 	device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 	if (device->al_tr_cycle >= device->act_log->nr_elements)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 		device->al_tr_cycle = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 	sector = al_tr_number_to_on_disk_sector(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	crc = crc32c(0, buffer, 4096);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 	buffer->crc32c = cpu_to_be32(crc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 	if (drbd_bm_write_hinted(device))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 		err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 		bool write_al_updates;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 		write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 		if (write_al_updates) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 			if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 				err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 				drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 				device->al_tr_number++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 				device->al_writ_cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) static int al_write_transaction(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	struct al_transaction_on_disk *buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 	if (!get_ldev(device)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 		drbd_err(device, "disk is %s, cannot start al transaction\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 			drbd_disk_str(device->state.disk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 		return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	/* The bitmap write may have failed, causing a state change. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 	if (device->state.disk < D_INCONSISTENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 		drbd_err(device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 			"disk is %s, cannot write al transaction\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 			drbd_disk_str(device->state.disk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 		put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 		return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	/* protects md_io_buffer, al_tr_cycle, ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	buffer = drbd_md_get_buffer(device, __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	if (!buffer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 		drbd_err(device, "disk failed while waiting for md_io buffer\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 		put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 		return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	err = __al_write_transaction(device, buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 	drbd_md_put_buffer(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 	put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) void drbd_al_begin_io_commit(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	bool locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	/* Serialize multiple transactions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 	 * This uses test_and_set_bit, memory barrier is implicit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	wait_event(device->al_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 			device->act_log->pending_changes == 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 			(locked = lc_try_lock_for_transaction(device->act_log)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 	if (locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 		/* Double check: it may have been committed by someone else,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 		 * while we have been waiting for the lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 		if (device->act_log->pending_changes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 			bool write_al_updates;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 			rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 			write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 			rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 			if (write_al_updates)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 				al_write_transaction(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 			spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 			/* FIXME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 			if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 				we need an "lc_cancel" here;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 			*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 			lc_committed(device->act_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 			spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 		lc_unlock(device->act_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 		wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476)  * @delegate:   delegate activity log I/O to the worker thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	if (drbd_al_begin_io_prepare(device, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 		drbd_al_begin_io_commit(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	struct lru_cache *al = device->act_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 	/* for bios crossing activity log extent boundaries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 	 * we may need to activate two extents in one go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 	unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 	unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	unsigned nr_al_extents;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 	unsigned available_update_slots;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	unsigned enr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 	D_ASSERT(device, first <= last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 	nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	available_update_slots = min(al->nr_elements - al->used,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 				al->max_pending_changes - al->pending_changes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 	/* We want all necessary updates for a given request within the same transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 	 * We could first check how many updates are *actually* needed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 	 * and use that instead of the worst-case nr_al_extents */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 	if (available_update_slots < nr_al_extents) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 		/* Too many activity log extents are currently "hot".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 		 * If we have accumulated pending changes already,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 		 * we made progress.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 		 * If we cannot get even a single pending change through,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 		 * stop the fast path until we made some progress,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		 * or requests to "cold" extents could be starved. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 		if (!al->pending_changes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 			__set_bit(__LC_STARVING, &device->act_log->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 		return -ENOBUFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 	/* Is resync active in this area? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	for (enr = first; enr <= last; enr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 		struct lc_element *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 		tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 		if (unlikely(tmp != NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 			struct bm_extent  *bm_ext = lc_entry(tmp, struct bm_extent, lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 			if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 				if (!test_and_set_bit(BME_PRIORITY, &bm_ext->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 					return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 				return -EWOULDBLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 	/* Checkout the refcounts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 	 * Given that we checked for available elements and update slots above,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 	 * this has to be successful. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 	for (enr = first; enr <= last; enr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 		struct lc_element *al_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 		al_ext = lc_get_cumulative(device->act_log, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 		if (!al_ext)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 			drbd_info(device, "LOGIC BUG for enr=%u\n", enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 	/* for bios crossing activity log extent boundaries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 	 * we may need to activate two extents in one go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 	unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 	unsigned enr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	struct lc_element *extent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 	D_ASSERT(device, first <= last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 	spin_lock_irqsave(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 	for (enr = first; enr <= last; enr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 		extent = lc_find(device->act_log, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 		if (!extent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 			drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 		lc_put(device->act_log, extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 	spin_unlock_irqrestore(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 	wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 	int rv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 	rv = (al_ext->refcnt == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	if (likely(rv))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 		lc_del(device->act_log, al_ext);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 	spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 	return rv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583)  * drbd_al_shrink() - Removes all active extents form the activity log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584)  * @device:	DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586)  * Removes all active extents form the activity log, waiting until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587)  * the reference count of each entry dropped to 0 first, of course.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589)  * You need to lock device->act_log with lc_try_lock() / lc_unlock()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) void drbd_al_shrink(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 	struct lc_element *al_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 	D_ASSERT(device, test_bit(__LC_LOCKED, &device->act_log->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 	for (i = 0; i < device->act_log->nr_elements; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 		al_ext = lc_element_by_index(device->act_log, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 		if (al_ext->lc_number == LC_FREE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 		wait_event(device->al_wait, _try_lc_del(device, al_ext));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 	wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) int drbd_al_initialize(struct drbd_device *device, void *buffer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	struct al_transaction_on_disk *al = buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	struct drbd_md *md = &device->ldev->md;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	__al_write_transaction(device, al);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 	/* There may or may not have been a pending transaction. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 	lc_committed(device->act_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	/* The rest of the transactions will have an empty "updates" list, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	 * are written out only to provide the context, and to initialize the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	 * on-disk ring buffer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	for (i = 1; i < al_size_4k; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 		int err = __al_write_transaction(device, al);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 			return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) static const char *drbd_change_sync_fname[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	[RECORD_RS_FAILED] = "drbd_rs_failed_io",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	[SET_IN_SYNC] = "drbd_set_in_sync",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 	[SET_OUT_OF_SYNC] = "drbd_set_out_of_sync"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) /* ATTENTION. The AL's extents are 4MB each, while the extents in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639)  * resync LRU-cache are 16MB each.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640)  * The caller of this function has to hold an get_ldev() reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642)  * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643)  * potentially pulling in (and recounting the corresponding bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644)  * this resync extent into the resync extent lru cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646)  * Returns whether all bits have been cleared for this resync extent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647)  * precisely: (rs_left <= rs_failed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649)  * TODO will be obsoleted once we have a caching lru of the on disk bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) static bool update_rs_extent(struct drbd_device *device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 		unsigned int enr, int count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 		enum update_sync_bits_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 	struct lc_element *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 	D_ASSERT(device, atomic_read(&device->local_cnt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	/* When setting out-of-sync bits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 	 * we don't need it cached (lc_find).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 	 * But if it is present in the cache,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 	 * we should update the cached bit count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 	 * Otherwise, that extent should be in the resync extent lru cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 	 * already -- or we want to pull it in if necessary -- (lc_get),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	 * then update and check rs_left and rs_failed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 	if (mode == SET_OUT_OF_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 		e = lc_find(device->resync, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 		e = lc_get(device->resync, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 	if (e) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 		struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 		if (ext->lce.lc_number == enr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 			if (mode == SET_IN_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 				ext->rs_left -= count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 			else if (mode == SET_OUT_OF_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 				ext->rs_left += count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 				ext->rs_failed += count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 			if (ext->rs_left < ext->rs_failed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 				drbd_warn(device, "BAD! enr=%u rs_left=%d "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 				    "rs_failed=%d count=%d cstate=%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 				     ext->lce.lc_number, ext->rs_left,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 				     ext->rs_failed, count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 				     drbd_conn_str(device->state.conn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 				/* We don't expect to be able to clear more bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 				 * than have been set when we originally counted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 				 * the set bits to cache that value in ext->rs_left.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 				 * Whatever the reason (disconnect during resync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 				 * delayed local completion of an application write),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 				 * try to fix it up by recounting here. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 				ext->rs_left = drbd_bm_e_weight(device, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 			/* Normally this element should be in the cache,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 			 * since drbd_rs_begin_io() pulled it already in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 			 * But maybe an application write finished, and we set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 			 * something outside the resync lru_cache in sync.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 			int rs_left = drbd_bm_e_weight(device, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 			if (ext->flags != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 				drbd_warn(device, "changing resync lce: %d[%u;%02lx]"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 				     " -> %d[%u;00]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 				     ext->lce.lc_number, ext->rs_left,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 				     ext->flags, enr, rs_left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 				ext->flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 			if (ext->rs_failed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 				drbd_warn(device, "Kicking resync_lru element enr=%u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 				     "out with rs_failed=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 				     ext->lce.lc_number, ext->rs_failed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 			ext->rs_left = rs_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 			ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 			/* we don't keep a persistent log of the resync lru,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 			 * we can commit any change right away. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 			lc_committed(device->resync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 		if (mode != SET_OUT_OF_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 			lc_put(device->resync, &ext->lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 		/* no race, we are within the al_lock! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 		if (ext->rs_left <= ext->rs_failed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 			ext->rs_failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 	} else if (mode != SET_OUT_OF_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 		/* be quiet if lc_find() did not find it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 		drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 		    device->resync_locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 		    device->resync->nr_elements,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 		    device->resync->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 	unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 	unsigned long last = device->rs_mark_time[device->rs_last_mark];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 	int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 	if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 		if (device->rs_mark_left[device->rs_last_mark] != still_to_go &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 		    device->state.conn != C_PAUSED_SYNC_T &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 		    device->state.conn != C_PAUSED_SYNC_S) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 			device->rs_mark_time[next] = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 			device->rs_mark_left[next] = still_to_go;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 			device->rs_last_mark = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) /* It is called lazy update, so don't do write-out too often. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) static bool lazy_bitmap_update_due(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	return time_after(jiffies, device->rs_last_bcast + 2*HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	if (rs_done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 		struct drbd_connection *connection = first_peer_device(device)->connection;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 		if (connection->agreed_pro_version <= 95 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 		    is_sync_target_state(device->state.conn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 			set_bit(RS_DONE, &device->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 			/* and also set RS_PROGRESS below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 		/* Else: rather wait for explicit notification via receive_state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 		 * to avoid uuids-rotated-too-fast causing full resync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 		 * in next handshake, in case the replication link breaks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 		 * at the most unfortunate time... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 	} else if (!lazy_bitmap_update_due(device))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 	drbd_device_post_work(device, RS_PROGRESS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) static int update_sync_bits(struct drbd_device *device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 		unsigned long sbnr, unsigned long ebnr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 		enum update_sync_bits_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 	 * We keep a count of set bits per resync-extent in the ->rs_left
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	 * caching member, so we need to loop and work within the resync extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	 * alignment. Typically this loop will execute exactly once.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 	unsigned long count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 	unsigned int cleared = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	while (sbnr <= ebnr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 		/* set temporary boundary bit number to last bit number within
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 		 * the resync extent of the current start bit number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 		 * but cap at provided end bit number */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 		unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 		unsigned long c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 		if (mode == RECORD_RS_FAILED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 			/* Only called from drbd_rs_failed_io(), bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 			 * supposedly still set.  Recount, maybe some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 			 * of the bits have been successfully cleared
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 			 * by application IO meanwhile.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 			c = drbd_bm_count_bits(device, sbnr, tbnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 		else if (mode == SET_IN_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 			c = drbd_bm_clear_bits(device, sbnr, tbnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 		else /* if (mode == SET_OUT_OF_SYNC) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 			c = drbd_bm_set_bits(device, sbnr, tbnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 		if (c) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 			spin_lock_irqsave(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 			cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 			spin_unlock_irqrestore(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 			count += c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 		sbnr = tbnr + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	if (count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 		if (mode == SET_IN_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 			unsigned long still_to_go = drbd_bm_total_weight(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 			bool rs_is_done = (still_to_go <= device->rs_failed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 			drbd_advance_rs_marks(device, still_to_go);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 			if (cleared || rs_is_done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 				maybe_schedule_on_disk_bitmap_update(device, rs_is_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 		} else if (mode == RECORD_RS_FAILED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 			device->rs_failed += count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 		wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 	return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) static bool plausible_request_size(int size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 	return size > 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 		&& size <= DRBD_MAX_BATCH_BIO_SIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 		&& IS_ALIGNED(size, 512);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) /* clear the bit corresponding to the piece of storage in question:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840)  * size byte of data starting from sector.  Only clear a bits of the affected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841)  * one ore more _aligned_ BM_BLOCK_SIZE blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843)  * called by worker on C_SYNC_TARGET and receiver on SyncSource.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 		enum update_sync_bits_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	/* Is called from worker and receiver context _only_ */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 	unsigned long sbnr, ebnr, lbnr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 	unsigned long count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 	sector_t esector, nr_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	/* This would be an empty REQ_PREFLUSH, be silent. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	if ((mode == SET_OUT_OF_SYNC) && size == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 	if (!plausible_request_size(size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 		drbd_err(device, "%s: sector=%llus size=%d nonsense!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 				drbd_change_sync_fname[mode],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 				(unsigned long long)sector, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	if (!get_ldev(device))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 		return 0; /* no disk, no metadata, no bitmap to manipulate bits in */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	nr_sectors = get_capacity(device->vdisk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	esector = sector + (size >> 9) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	if (!expect(sector < nr_sectors))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 	if (!expect(esector < nr_sectors))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 		esector = nr_sectors - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 	if (mode == SET_IN_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 		/* Round up start sector, round down end sector.  We make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 		 * we only clear full, aligned, BM_BLOCK_SIZE blocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 		if (unlikely(esector < BM_SECT_PER_BIT-1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 		if (unlikely(esector == (nr_sectors-1)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 			ebnr = lbnr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 			ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 		sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 		/* We set it out of sync, or record resync failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 		 * Should not round anything here. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		sbnr = BM_SECT_TO_BIT(sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 		ebnr = BM_SECT_TO_BIT(esector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 	count = update_sync_bits(device, sbnr, ebnr, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 	put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 	return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) static
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) struct bm_extent *_bme_get(struct drbd_device *device, unsigned int enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	struct lc_element *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	struct bm_extent *bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	int wakeup = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	unsigned long rs_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	if (device->resync_locked > device->resync->nr_elements/2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 		spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	e = lc_get(device->resync, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 	if (bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 		if (bm_ext->lce.lc_number != enr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 			bm_ext->rs_left = drbd_bm_e_weight(device, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 			bm_ext->rs_failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 			lc_committed(device->resync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 			wakeup = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 		if (bm_ext->lce.refcnt == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 			device->resync_locked++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 		set_bit(BME_NO_WRITES, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	rs_flags = device->resync->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 	spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	if (wakeup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 		wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	if (!bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 		if (rs_flags & LC_STARVING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 			drbd_warn(device, "Have to wait for element"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 			     " (resync LRU too small?)\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 		BUG_ON(rs_flags & LC_LOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 	return bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) static int _is_in_al(struct drbd_device *device, unsigned int enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	int rv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 	spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	rv = lc_is_used(device->act_log, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 	spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 	return rv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954)  * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955)  * @device:	DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956)  * @sector:	The sector number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958)  * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) int drbd_rs_begin_io(struct drbd_device *device, sector_t sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 	unsigned int enr = BM_SECT_TO_EXT(sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 	struct bm_extent *bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	int i, sig;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 	bool sa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 	sig = wait_event_interruptible(device->al_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 			(bm_ext = _bme_get(device, enr)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 	if (sig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 		return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 	if (test_bit(BME_LOCKED, &bm_ext->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 	/* step aside only while we are above c-min-rate; unless disabled. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 	sa = drbd_rs_c_min_rate_throttle(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 		sig = wait_event_interruptible(device->al_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 					       !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 					       (sa && test_bit(BME_PRIORITY, &bm_ext->flags)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 		if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 			spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 			if (lc_put(device->resync, &bm_ext->lce) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 				bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 				device->resync_locked--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 				wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 			spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 			if (sig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 				return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 			if (schedule_timeout_interruptible(HZ/10))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 				return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 			goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 	set_bit(BME_LOCKED, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004)  * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)  * @device:	DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)  * @sector:	The sector number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)  * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009)  * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010)  * if there is still application IO going on in this area.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	unsigned int enr = BM_SECT_TO_EXT(sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 	const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	struct lc_element *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 	struct bm_extent *bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 	bool throttle = drbd_rs_should_slow_down(device, sector, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	/* If we need to throttle, a half-locked (only marked BME_NO_WRITES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 	 * not yet BME_LOCKED) extent needs to be kicked out explicitly if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 	 * need to throttle. There is at most one such half-locked extent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	 * which is remembered in resync_wenr. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 	if (throttle && device->resync_wenr != enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 	spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 	if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 		/* in case you have very heavy scattered io, it may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 		 * stall the syncer undefined if we give up the ref count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 		 * when we try again and requeue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 		 * if we don't give up the refcount, but the next time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 		 * we are scheduled this extent has been "synced" by new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 		 * application writes, we'd miss the lc_put on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 		 * extent we keep the refcount on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 		 * so we remembered which extent we had to try again, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 		 * if the next requested one is something else, we do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 		 * the lc_put here...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 		 * we also have to wake_up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 		e = lc_find(device->resync, device->resync_wenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 		bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 		if (bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 			D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 			D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 			clear_bit(BME_NO_WRITES, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 			device->resync_wenr = LC_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 			if (lc_put(device->resync, &bm_ext->lce) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 				bm_ext->flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 				device->resync_locked--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 			wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 			drbd_alert(device, "LOGIC BUG\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 	/* TRY. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 	e = lc_try_get(device->resync, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	if (bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 		if (test_bit(BME_LOCKED, &bm_ext->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 			goto proceed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 		if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 			device->resync_locked++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 			/* we did set the BME_NO_WRITES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 			 * but then could not set BME_LOCKED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 			 * so we tried again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 			 * drop the extra reference. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 			bm_ext->lce.refcnt--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 			D_ASSERT(device, bm_ext->lce.refcnt > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 		goto check_al;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 		/* do we rather want to try later? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 		if (device->resync_locked > device->resync->nr_elements-3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 			goto try_again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 		/* Do or do not. There is no try. -- Yoda */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 		e = lc_get(device->resync, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 		bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 		if (!bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 			const unsigned long rs_flags = device->resync->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 			if (rs_flags & LC_STARVING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 				drbd_warn(device, "Have to wait for element"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 				     " (resync LRU too small?)\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 			BUG_ON(rs_flags & LC_LOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 			goto try_again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 		if (bm_ext->lce.lc_number != enr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 			bm_ext->rs_left = drbd_bm_e_weight(device, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 			bm_ext->rs_failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 			lc_committed(device->resync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 			wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 			D_ASSERT(device, test_bit(BME_LOCKED, &bm_ext->flags) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 		set_bit(BME_NO_WRITES, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 		D_ASSERT(device, bm_ext->lce.refcnt == 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 		device->resync_locked++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 		goto check_al;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) check_al:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 		if (lc_is_used(device->act_log, al_enr+i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 			goto try_again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 	set_bit(BME_LOCKED, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) proceed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	device->resync_wenr = LC_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 	spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) try_again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 	if (bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 		if (throttle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 			D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 			D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 			clear_bit(BME_NO_WRITES, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 			device->resync_wenr = LC_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 			if (lc_put(device->resync, &bm_ext->lce) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 				bm_ext->flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 				device->resync_locked--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 			wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 			device->resync_wenr = enr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 	spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) void drbd_rs_complete_io(struct drbd_device *device, sector_t sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	unsigned int enr = BM_SECT_TO_EXT(sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	struct lc_element *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	struct bm_extent *bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 	spin_lock_irqsave(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 	e = lc_find(device->resync, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 	if (!bm_ext) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 		spin_unlock_irqrestore(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 		if (__ratelimit(&drbd_ratelimit_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 			drbd_err(device, "drbd_rs_complete_io() called, but extent not found\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	if (bm_ext->lce.refcnt == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 		spin_unlock_irqrestore(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 		drbd_err(device, "drbd_rs_complete_io(,%llu [=%u]) called, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 		    "but refcnt is 0!?\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 		    (unsigned long long)sector, enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 	if (lc_put(device->resync, &bm_ext->lce) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 		bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 		device->resync_locked--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 		wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 	spin_unlock_irqrestore(&device->al_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169)  * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170)  * @device:	DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) void drbd_rs_cancel_all(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	if (get_ldev_if_state(device, D_FAILED)) { /* Makes sure ->resync is there. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 		lc_reset(device->resync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 		put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 	device->resync_locked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 	device->resync_wenr = LC_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187)  * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)  * @device:	DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)  * Returns 0 upon success, -EAGAIN if at least one reference count was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191)  * not zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) int drbd_rs_del_all(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	struct lc_element *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 	struct bm_extent *bm_ext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 	spin_lock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	if (get_ldev_if_state(device, D_FAILED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 		/* ok, ->resync is there. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 		for (i = 0; i < device->resync->nr_elements; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 			e = lc_element_by_index(device->resync, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 			bm_ext = lc_entry(e, struct bm_extent, lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 			if (bm_ext->lce.lc_number == LC_FREE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 			if (bm_ext->lce.lc_number == device->resync_wenr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 				drbd_info(device, "dropping %u in drbd_rs_del_all, apparently"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 				     " got 'synced' by application io\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 				     device->resync_wenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 				D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 				D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 				clear_bit(BME_NO_WRITES, &bm_ext->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 				device->resync_wenr = LC_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 				lc_put(device->resync, &bm_ext->lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 			if (bm_ext->lce.refcnt != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 				drbd_info(device, "Retrying drbd_rs_del_all() later. "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 				     "refcnt=%d\n", bm_ext->lce.refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 				put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 				spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 				return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 			D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 			D_ASSERT(device, !test_bit(BME_NO_WRITES, &bm_ext->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 			lc_del(device->resync, &bm_ext->lce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 		D_ASSERT(device, device->resync->used == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 		put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 	spin_unlock_irq(&device->al_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	wake_up(&device->al_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) }