Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)    drbd_bitmap.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)    Copyright (C) 2004-2008, LINBIT Information Technologies GmbH.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)    Copyright (C) 2004-2008, Philipp Reisner <philipp.reisner@linbit.com>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)    Copyright (C) 2004-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/bitmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/drbd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include "drbd_int.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) /* OPAQUE outside this file!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26)  * interface defined in drbd_int.h
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28)  * convention:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29)  * function name drbd_bm_... => used elsewhere, "public".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30)  * function name      bm_... => internal to implementation, "private".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35)  * LIMITATIONS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36)  * We want to support >= peta byte of backend storage, while for now still using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37)  * a granularity of one bit per 4KiB of storage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38)  * 1 << 50		bytes backend storage (1 PiB)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39)  * 1 << (50 - 12)	bits needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40)  *	38 --> we need u64 to index and count bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41)  * 1 << (38 - 3)	bitmap bytes needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42)  *	35 --> we still need u64 to index and count bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43)  *			(that's 32 GiB of bitmap for 1 PiB storage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44)  * 1 << (35 - 2)	32bit longs needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45)  *	33 --> we'd even need u64 to index and count 32bit long words.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46)  * 1 << (35 - 3)	64bit longs needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47)  *	32 --> we could get away with a 32bit unsigned int to index and count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48)  *	64bit long words, but I rather stay with unsigned long for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49)  *	We probably should neither count nor point to bytes or long words
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50)  *	directly, but either by bitnumber, or by page index and offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51)  * 1 << (35 - 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52)  *	22 --> we need that much 4KiB pages of bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53)  *	1 << (22 + 3) --> on a 64bit arch,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54)  *	we need 32 MiB to store the array of page pointers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56)  * Because I'm lazy, and because the resulting patch was too large, too ugly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57)  * and still incomplete, on 32bit we still "only" support 16 TiB (minus some),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58)  * (1 << 32) bits * 4k storage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61)  * bitmap storage and IO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62)  *	Bitmap is stored little endian on disk, and is kept little endian in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63)  *	core memory. Currently we still hold the full bitmap in core as long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64)  *	as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65)  *	seems excessive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67)  *	We plan to reduce the amount of in-core bitmap pages by paging them in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68)  *	and out against their on-disk location as necessary, but need to make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69)  *	sure we don't cause too much meta data IO, and must not deadlock in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70)  *	tight memory situations. This needs some more work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74)  * NOTE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75)  *  Access to the *bm_pages is protected by bm_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76)  *  It is safe to read the other members within the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78)  *  drbd_bm_set_bits is called from bio_endio callbacks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79)  *  We may be called with irq already disabled,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80)  *  so we need spin_lock_irqsave().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81)  *  And we need the kmap_atomic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) struct drbd_bitmap {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	struct page **bm_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 	spinlock_t bm_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 	/* exclusively to be used by __al_write_transaction(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 	 * drbd_bm_mark_for_writeout() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	 * and drbd_bm_write_hinted() -> bm_rw() called from there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 	unsigned int n_bitmap_hints;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	unsigned int al_bitmap_hints[AL_UPDATES_PER_TRANSACTION];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 	/* see LIMITATIONS: above */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	unsigned long bm_set;       /* nr of set bits; THINK maybe atomic_t? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 	unsigned long bm_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 	size_t   bm_words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	size_t   bm_number_of_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 	sector_t bm_dev_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	struct mutex bm_change; /* serializes resize operations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 	wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 	enum bm_flag bm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 	/* debugging aid, in case we are still racy somewhere */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	char          *bm_why;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	struct task_struct *bm_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) static void __bm_print_lock_info(struct drbd_device *device, const char *func)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 	if (!__ratelimit(&drbd_ratelimit_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	drbd_err(device, "FIXME %s[%d] in %s, bitmap locked for '%s' by %s[%d]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 		 current->comm, task_pid_nr(current),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 		 func, b->bm_why ?: "?",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 		 b->bm_task->comm, task_pid_nr(b->bm_task));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) void drbd_bm_lock(struct drbd_device *device, char *why, enum bm_flag flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 	int trylock_failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	if (!b) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 		drbd_err(device, "FIXME no bitmap in drbd_bm_lock!?\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	trylock_failed = !mutex_trylock(&b->bm_change);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 	if (trylock_failed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 		drbd_warn(device, "%s[%d] going to '%s' but bitmap already locked for '%s' by %s[%d]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 			  current->comm, task_pid_nr(current),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 			  why, b->bm_why ?: "?",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 			  b->bm_task->comm, task_pid_nr(b->bm_task));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 		mutex_lock(&b->bm_change);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	if (BM_LOCKED_MASK & b->bm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 		drbd_err(device, "FIXME bitmap already locked in bm_lock\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	b->bm_flags |= flags & BM_LOCKED_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 	b->bm_why  = why;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	b->bm_task = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) void drbd_bm_unlock(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 	if (!b) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 		drbd_err(device, "FIXME no bitmap in drbd_bm_unlock!?\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 	if (!(BM_LOCKED_MASK & device->bitmap->bm_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 		drbd_err(device, "FIXME bitmap not locked in bm_unlock\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	b->bm_flags &= ~BM_LOCKED_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	b->bm_why  = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	b->bm_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 	mutex_unlock(&b->bm_change);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) /* we store some "meta" info about our pages in page->private */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) /* at a granularity of 4k storage per bitmap bit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170)  * one peta byte storage: 1<<50 byte, 1<<38 * 4k storage blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171)  *  1<<38 bits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172)  *  1<<23 4k bitmap pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173)  * Use 24 bits as page index, covers 2 peta byte storage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174)  * at a granularity of 4k per bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175)  * Used to report the failed page idx on io error from the endio handlers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) #define BM_PAGE_IDX_MASK	((1UL<<24)-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) /* this page is currently read in, or written back */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) #define BM_PAGE_IO_LOCK		31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) /* if there has been an IO error for this page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) #define BM_PAGE_IO_ERROR	30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) /* this is to be able to intelligently skip disk IO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183)  * set if bits have been set since last IO. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) #define BM_PAGE_NEED_WRITEOUT	29
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) /* to mark for lazy writeout once syncer cleared all clearable bits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186)  * we if bits have been cleared since last IO. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) #define BM_PAGE_LAZY_WRITEOUT	28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) /* pages marked with this "HINT" will be considered for writeout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189)  * on activity log transactions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) #define BM_PAGE_HINT_WRITEOUT	27
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) /* store_page_idx uses non-atomic assignment. It is only used directly after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193)  * allocating the page.  All other bm_set_page_* and bm_clear_page_* need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194)  * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195)  * changes) may happen from various contexts, and wait_on_bit/wake_up_bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196)  * requires it all to be atomic as well. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) static void bm_store_page_idx(struct page *page, unsigned long idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 	BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 	set_page_private(page, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) static unsigned long bm_page_to_idx(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 	return page_private(page) & BM_PAGE_IDX_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) /* As is very unlikely that the same page is under IO from more than one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209)  * context, we can get away with a bit per page and one wait queue per bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) static void bm_page_lock_io(struct drbd_device *device, int page_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	void *addr = &page_private(b->bm_pages[page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) static void bm_page_unlock_io(struct drbd_device *device, int page_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	void *addr = &page_private(b->bm_pages[page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 	clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 	wake_up(&device->bitmap->bm_io_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) /* set _before_ submit_io, so it may be reset due to being changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227)  * while this page is in flight... will get submitted later again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) static void bm_set_page_unchanged(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 	/* use cmpxchg? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 	clear_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 	clear_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) static void bm_set_page_need_writeout(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 	set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) void drbd_bm_reset_al_hints(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	device->bitmap->n_bitmap_hints = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246)  * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247)  * @device:	DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248)  * @page_nr:	the bitmap page to mark with the "hint" flag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250)  * From within an activity log transaction, we mark a few pages with these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251)  * hints, then call drbd_bm_write_hinted(), which will only write out changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252)  * pages which are flagged with this mark.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 	if (page_nr >= device->bitmap->bm_number_of_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 		drbd_warn(device, "BAD: page_nr: %u, number_of_pages: %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 			 page_nr, (int)device->bitmap->bm_number_of_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 	page = device->bitmap->bm_pages[page_nr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	BUG_ON(b->n_bitmap_hints >= ARRAY_SIZE(b->al_bitmap_hints));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 	if (!test_and_set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 		b->al_bitmap_hints[b->n_bitmap_hints++] = page_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) static int bm_test_page_unchanged(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 	volatile const unsigned long *addr = &page_private(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 	return (*addr & ((1UL<<BM_PAGE_NEED_WRITEOUT)|(1UL<<BM_PAGE_LAZY_WRITEOUT))) == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) static void bm_set_page_io_err(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 	set_bit(BM_PAGE_IO_ERROR, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) static void bm_clear_page_io_err(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 	clear_bit(BM_PAGE_IO_ERROR, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) static void bm_set_page_lazy_writeout(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 	set_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) static int bm_test_page_lazy_writeout(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	return test_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) /* on a 32bit box, this would allow for exactly (2<<38) bits. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) static unsigned int bm_word_to_page_idx(struct drbd_bitmap *b, unsigned long long_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	/* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	unsigned int page_nr = long_nr >> (PAGE_SHIFT - LN2_BPL + 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 	BUG_ON(page_nr >= b->bm_number_of_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	return page_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 	/* page_nr = (bitnr/8) >> PAGE_SHIFT; */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 	BUG_ON(page_nr >= b->bm_number_of_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	return page_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 	struct page *page = b->bm_pages[idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 	return (unsigned long *) kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 	return __bm_map_pidx(b, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) static void __bm_unmap(unsigned long *p_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 	kunmap_atomic(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) static void bm_unmap(unsigned long *p_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 	return __bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) /* long word offset of _bitmap_ sector */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) #define S2W(s)	((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) /* word offset from start of bitmap to word number _in_page_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336)  * modulo longs per page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338)  hm, well, Philipp thinks gcc might not optimize the % into & (... - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339)  so do it explicitly:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) /* Long words per page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) #define LWPP (PAGE_SIZE/sizeof(long))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347)  * actually most functions herein should take a struct drbd_bitmap*, not a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348)  * struct drbd_device*, but for the debug macros I like to have the device around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349)  * to be able to report device specific.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) static void bm_free_pages(struct page **pages, unsigned long number)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 	unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 	if (!pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	for (i = 0; i < number; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 		if (!pages[i]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 			pr_alert("bm_free_pages tried to free a NULL pointer; i=%lu n=%lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 				 i, number);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 		__free_page(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 		pages[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) static inline void bm_vk_free(void *ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 	kvfree(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376)  * "have" and "want" are NUMBER OF PAGES.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	struct page **old_pages = b->bm_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 	struct page **new_pages, *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 	unsigned int i, bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	unsigned long have = b->bm_number_of_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	BUG_ON(have == 0 && old_pages != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 	BUG_ON(have != 0 && old_pages == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	if (have == want)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 		return old_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	/* Trying kmalloc first, falling back to vmalloc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 	 * GFP_NOIO, as this is called while drbd IO is "suspended",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	 * and during resize or attach on diskless Primary,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	 * we must not block on IO to ourselves.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	 * Context is receiver thread or dmsetup. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 	bytes = sizeof(struct page *)*want;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 	new_pages = kzalloc(bytes, GFP_NOIO | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	if (!new_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 		new_pages = __vmalloc(bytes, GFP_NOIO | __GFP_ZERO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 		if (!new_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	if (want >= have) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 		for (i = 0; i < have; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 			new_pages[i] = old_pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 		for (; i < want; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 			page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 			if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 				bm_free_pages(new_pages + have, i - have);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 				bm_vk_free(new_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 				return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 			/* we want to know which page it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 			 * from the endio handlers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 			bm_store_page_idx(page, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 			new_pages[i] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 		for (i = 0; i < want; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 			new_pages[i] = old_pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 		/* NOT HERE, we are outside the spinlock!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 		bm_free_pages(old_pages + want, have - want);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	return new_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431)  * allocates the drbd_bitmap and stores it in device->bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) int drbd_bm_init(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	WARN_ON(b != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 	b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 	if (!b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	spin_lock_init(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	mutex_init(&b->bm_change);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 	init_waitqueue_head(&b->bm_io_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 	device->bitmap = b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) sector_t drbd_bm_capacity(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	if (!expect(device->bitmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	return device->bitmap->bm_dev_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) /* called on driver unload. TODO: call when a device is destroyed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) void drbd_bm_cleanup(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 	if (!expect(device->bitmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 	bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	bm_vk_free(device->bitmap->bm_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 	kfree(device->bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 	device->bitmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469)  * since (b->bm_bits % BITS_PER_LONG) != 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470)  * this masks out the remaining bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471)  * Returns the number of bits cleared.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) #ifndef BITS_PER_PAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) #define BITS_PER_PAGE		(1UL << (PAGE_SHIFT + 3))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) # if BITS_PER_PAGE != (1UL << (PAGE_SHIFT + 3))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) #  error "ambiguous BITS_PER_PAGE"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) #define BITS_PER_LONG_MASK	(BITS_PER_LONG - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) static int bm_clear_surplus(struct drbd_bitmap *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 	unsigned long mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 	unsigned long *p_addr, *bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	int tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 	int cleared = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 	/* number of bits modulo bits per page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 	tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	/* mask the used bits of the word containing the last bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 	mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	/* bitmap is always stored little endian,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 	 * on disk and in core memory alike */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 	mask = cpu_to_lel(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 	p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	bm = p_addr + (tmp/BITS_PER_LONG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 	if (mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 		/* If mask != 0, we are not exactly aligned, so bm now points
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 		 * to the long containing the last bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 		 * If mask == 0, bm already points to the word immediately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 		 * after the last (long word aligned) bit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 		cleared = hweight_long(*bm & ~mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 		*bm &= mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 		bm++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 		/* on a 32bit arch, we may need to zero out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 		 * a padding long to align with a 64bit remote */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		cleared += hweight_long(*bm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 		*bm = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 	return cleared;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) static void bm_set_surplus(struct drbd_bitmap *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	unsigned long mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 	unsigned long *p_addr, *bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 	int tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 	/* number of bits modulo bits per page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 	tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 	/* mask the used bits of the word containing the last bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 	/* bitmap is always stored little endian,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 	 * on disk and in core memory alike */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 	mask = cpu_to_lel(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 	p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 	bm = p_addr + (tmp/BITS_PER_LONG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 	if (mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 		/* If mask != 0, we are not exactly aligned, so bm now points
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 		 * to the long containing the last bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 		 * If mask == 0, bm already points to the word immediately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 		 * after the last (long word aligned) bit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 		*bm |= ~mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 		bm++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 	if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 		/* on a 32bit arch, we may need to zero out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 		 * a padding long to align with a 64bit remote */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 		*bm = ~0UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) /* you better not modify the bitmap while this is running,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553)  * or its results will be stale */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) static unsigned long bm_count_bits(struct drbd_bitmap *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 	unsigned long *p_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 	unsigned long bits = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 	unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 	int idx, last_word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 	/* all but last page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 	for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 		p_addr = __bm_map_pidx(b, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 		bits += bitmap_weight(p_addr, BITS_PER_PAGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 		__bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 	/* last (or only) page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 	last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 	p_addr = __bm_map_pidx(b, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 	bits += bitmap_weight(p_addr, last_word * BITS_PER_LONG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 	p_addr[last_word] &= cpu_to_lel(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	bits += hweight_long(p_addr[last_word]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 	/* 32bit arch, may have an unused padding long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	if (BITS_PER_LONG == 32 && (last_word & 1) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 		p_addr[last_word+1] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 	__bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 	return bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) /* offset and len in long words.*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 	unsigned long *p_addr, *bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	unsigned int idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	size_t do_now, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	end = offset + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 	if (end > b->bm_words) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 		pr_alert("bm_memset end > bm_words\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 	while (offset < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 		do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 		idx = bm_word_to_page_idx(b, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 		p_addr = bm_map_pidx(b, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 		bm = p_addr + MLPP(offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 		if (bm+do_now > p_addr + LWPP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 			pr_alert("BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 			       p_addr, bm, (int)do_now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 			memset(bm, c, do_now * sizeof(long));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 		bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 		bm_set_page_need_writeout(b->bm_pages[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 		offset += do_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) /* For the layout, see comment above drbd_md_set_sector_offsets(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) static u64 drbd_md_on_disk_bits(struct drbd_backing_dev *ldev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 	u64 bitmap_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	if (ldev->md.al_offset == 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 		bitmap_sectors = ldev->md.md_size_sect - ldev->md.bm_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 		bitmap_sectors = ldev->md.al_offset - ldev->md.bm_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	return bitmap_sectors << (9 + 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623)  * make sure the bitmap has enough room for the attached storage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624)  * if necessary, resize.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625)  * called whenever we may have changed the device size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626)  * returns -ENOMEM if we could not allocate enough memory, 0 on success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627)  * In case this is actually a resize, we copy the old bitmap into the new one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628)  * Otherwise, the bitmap is initialized to all bits set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	unsigned long bits, words, owords, obits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	unsigned long want, have, onpages; /* number of pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 	struct page **npages, **opages = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	bool growing;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 	drbd_bm_lock(device, "resize", BM_LOCKED_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 	drbd_info(device, "drbd_bm_resize called with capacity == %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 			(unsigned long long)capacity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	if (capacity == b->bm_dev_capacity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 	if (capacity == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 		spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 		opages = b->bm_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 		onpages = b->bm_number_of_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 		owords = b->bm_words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 		b->bm_pages = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 		b->bm_number_of_pages =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 		b->bm_set   =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 		b->bm_bits  =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 		b->bm_words =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 		b->bm_dev_capacity = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 		spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 		bm_free_pages(opages, onpages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 		bm_vk_free(opages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 	bits  = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	/* if we would use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 	   words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 	   a 32bit host could present the wrong number of words
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 	   to a 64bit host.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 	*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 	words = ALIGN(bits, 64) >> LN2_BPL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 	if (get_ldev(device)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 		u64 bits_on_disk = drbd_md_on_disk_bits(device->ldev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 		put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 		if (bits > bits_on_disk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 			drbd_info(device, "bits = %lu\n", bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 			drbd_info(device, "bits_on_disk = %llu\n", bits_on_disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 			err = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	have = b->bm_number_of_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 	if (want == have) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 		D_ASSERT(device, b->bm_pages != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 		npages = b->bm_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 		if (drbd_insert_fault(device, DRBD_FAULT_BM_ALLOC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 			npages = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 			npages = bm_realloc_pages(b, want);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 	if (!npages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 		err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 	opages = b->bm_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 	owords = b->bm_words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 	obits  = b->bm_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 	growing = bits > obits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	if (opages && growing && set_new_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 		bm_set_surplus(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 	b->bm_pages = npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 	b->bm_number_of_pages = want;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 	b->bm_bits  = bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 	b->bm_words = words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	b->bm_dev_capacity = capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	if (growing) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 		if (set_new_bits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 			bm_memset(b, owords, 0xff, words-owords);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 			b->bm_set += bits - obits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 			bm_memset(b, owords, 0x00, words-owords);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 	if (want < have) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 		/* implicit: (opages != NULL) && (opages != npages) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 		bm_free_pages(opages + want, have - want);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 	(void)bm_clear_surplus(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 	spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	if (opages != npages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 		bm_vk_free(opages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 	if (!growing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 		b->bm_set = bm_count_bits(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741)  out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 	drbd_bm_unlock(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) /* inherently racy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747)  * if not protected by other means, return value may be out of date when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748)  * leaving this function...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749)  * we still need to lock it, since it is important that this returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750)  * bm_set == 0 precisely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752)  * maybe bm_set should be atomic_t ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) unsigned long _drbd_bm_total_weight(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	unsigned long s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	spin_lock_irqsave(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 	s = b->bm_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 	spin_unlock_irqrestore(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 	return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) unsigned long drbd_bm_total_weight(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 	unsigned long s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 	/* if I don't have a disk, I don't know about out-of-sync status */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 	if (!get_ldev_if_state(device, D_NEGOTIATING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	s = _drbd_bm_total_weight(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 	put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 	return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) size_t drbd_bm_words(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	return b->bm_words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) unsigned long drbd_bm_bits(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 	if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 	return b->bm_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) /* merge number words from buffer into the bitmap starting at offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804)  * buffer[i] is expected to be little endian unsigned long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805)  * bitmap must be locked by drbd_bm_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806)  * currently only used from receive_bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, size_t number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 			unsigned long *buffer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 	unsigned long *p_addr, *bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 	unsigned long word, bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 	unsigned int idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 	size_t end, do_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 	end = offset + number;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 	if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	if (number == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	WARN_ON(offset >= b->bm_words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	WARN_ON(end    >  b->bm_words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 	while (offset < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 		do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 		idx = bm_word_to_page_idx(b, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 		p_addr = bm_map_pidx(b, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 		bm = p_addr + MLPP(offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 		offset += do_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 		while (do_now--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 			bits = hweight_long(*bm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 			word = *bm | *buffer++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 			*bm++ = word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 			b->bm_set += hweight_long(word) - bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 		bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 		bm_set_page_need_writeout(b->bm_pages[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 	/* with 32bit <-> 64bit cross-platform connect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 	 * this is only correct for current usage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 	 * where we _know_ that we are 64 bit aligned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 	 * and know that this function is used in this way, too...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	if (end == b->bm_words)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 		b->bm_set -= bm_clear_surplus(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 	spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) /* copy number words from the bitmap starting at offset into the buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855)  * buffer[i] will be little endian unsigned long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) void drbd_bm_get_lel(struct drbd_device *device, size_t offset, size_t number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 		     unsigned long *buffer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 	unsigned long *p_addr, *bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 	size_t end, do_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	end = offset + number;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 	if ((offset >= b->bm_words) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 	    (end    >  b->bm_words) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	    (number <= 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 		drbd_err(device, "offset=%lu number=%lu bm_words=%lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 			(unsigned long)	offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 			(unsigned long)	number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 			(unsigned long) b->bm_words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 	else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 		while (offset < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 			do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 			p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 			bm = p_addr + MLPP(offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 			offset += do_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 			while (do_now--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 				*buffer++ = *bm++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 			bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) /* set all bits in the bitmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) void drbd_bm_set_all(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 	if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 	bm_memset(b, 0, 0xff, b->bm_words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	(void)bm_clear_surplus(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	b->bm_set = b->bm_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) /* clear all bits in the bitmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) void drbd_bm_clear_all(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	bm_memset(b, 0, 0, b->bm_words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 	b->bm_set = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 	spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) static void drbd_bm_aio_ctx_destroy(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	struct drbd_bm_aio_ctx *ctx = container_of(kref, struct drbd_bm_aio_ctx, kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	spin_lock_irqsave(&ctx->device->resource->req_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	list_del(&ctx->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	spin_unlock_irqrestore(&ctx->device->resource->req_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	put_ldev(ctx->device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 	kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) /* bv_page may be a copy, or may be the original */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) static void drbd_bm_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 	struct drbd_bm_aio_ctx *ctx = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 	struct drbd_device *device = ctx->device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 	unsigned int idx = bm_page_to_idx(bio_first_page_all(bio));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 	    !bm_test_page_unchanged(b->bm_pages[idx]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 		drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 	if (bio->bi_status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 		/* ctx error will hold the completed-last non-zero error code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 		 * in case error codes differ. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 		ctx->error = blk_status_to_errno(bio->bi_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 		bm_set_page_io_err(b->bm_pages[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 		/* Not identical to on disk version of it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 		 * Is BM_PAGE_IO_ERROR enough? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 		if (__ratelimit(&drbd_ratelimit_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 			drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 					bio->bi_status, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 		bm_clear_page_io_err(b->bm_pages[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 		dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 	bm_page_unlock_io(device, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 	if (ctx->flags & BM_AIO_COPY_PAGES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 		mempool_free(bio->bi_io_vec[0].bv_page, &drbd_md_io_page_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 	bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 	if (atomic_dec_and_test(&ctx->in_flight)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 		ctx->done = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 		wake_up(&device->misc_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 		kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 	struct bio *bio = bio_alloc_drbd(GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 	struct drbd_device *device = ctx->device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 	unsigned int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 	unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 	sector_t on_disk_sector =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 		device->ldev->md.md_offset + device->ldev->md.bm_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 	on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 	/* this might happen with very small
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	 * flexible external meta data device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 	 * or with PAGE_SIZE > 4k */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 	len = min_t(unsigned int, PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 		(drbd_md_last_sector(device->ldev) - on_disk_sector + 1)<<9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 	/* serialize IO on this page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 	bm_page_lock_io(device, page_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 	/* before memcpy and submit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 	 * so it can be redirtied any time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	bm_set_page_unchanged(b->bm_pages[page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 	if (ctx->flags & BM_AIO_COPY_PAGES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 		page = mempool_alloc(&drbd_md_io_page_pool,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 				GFP_NOIO | __GFP_HIGHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 		copy_highpage(page, b->bm_pages[page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 		bm_store_page_idx(page, page_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 		page = b->bm_pages[page_nr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 	bio_set_dev(bio, device->ldev->md_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 	bio->bi_iter.bi_sector = on_disk_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 	/* bio_add_page of a single page to an empty bio will always succeed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 	 * according to api.  Do we want to assert that? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 	bio_add_page(bio, page, len, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	bio->bi_private = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 	bio->bi_end_io = drbd_bm_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	bio_set_op_attrs(bio, op, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 	if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 		bio_io_error(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 		submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 		/* this should not count as user activity and cause the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 		 * resync to throttle -- see drbd_rs_should_slow_down(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 		atomic_add(len >> 9, &device->rs_sect_ev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)  * bm_rw: read/write the whole bitmap from/to its on disk location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 	struct drbd_bm_aio_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 	unsigned int num_pages, i, count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 	unsigned long now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 	char ppb[10];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 	 * We are protected against bitmap disappearing/resizing by holding an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 	 * ldev reference (caller must have called get_ldev()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 	 * For read/write, we are protected against changes to the bitmap by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	 * the bitmap lock (see drbd_bitmap_io).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 	 * For lazy writeout, we don't care for ongoing changes to the bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	 * as we submit copies of pages anyways.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 	ctx = kmalloc(sizeof(struct drbd_bm_aio_ctx), GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 	if (!ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 	*ctx = (struct drbd_bm_aio_ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 		.device = device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 		.start_jif = jiffies,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 		.in_flight = ATOMIC_INIT(1),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		.done = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 		.flags = flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 		.error = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 		.kref = KREF_INIT(2),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	if (!get_ldev_if_state(device, D_ATTACHING)) {  /* put is in drbd_bm_aio_ctx_destroy() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 		drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 		kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 		return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	/* Here D_ATTACHING is sufficient since drbd_bm_read() is called only from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	   drbd_adm_attach(), after device->ldev was assigned. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 	if (0 == (ctx->flags & ~BM_AIO_READ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 		WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 	spin_lock_irq(&device->resource->req_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 	list_add_tail(&ctx->list, &device->pending_bitmap_io);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 	spin_unlock_irq(&device->resource->req_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 	num_pages = b->bm_number_of_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 	now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 	/* let the layers below us try to merge these bios... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 	if (flags & BM_AIO_READ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 		for (i = 0; i < num_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 			atomic_inc(&ctx->in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 			bm_page_io_async(ctx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 			++count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 	} else if (flags & BM_AIO_WRITE_HINTED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 		/* ASSERT: BM_AIO_WRITE_ALL_PAGES is not set. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 		unsigned int hint;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 		for (hint = 0; hint < b->n_bitmap_hints; hint++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 			i = b->al_bitmap_hints[hint];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 			if (i >= num_pages) /* == -1U: no hint here. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 			/* Several AL-extents may point to the same page. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 			if (!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 			    &page_private(b->bm_pages[i])))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 			/* Has it even changed? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 			if (bm_test_page_unchanged(b->bm_pages[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 			atomic_inc(&ctx->in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 			bm_page_io_async(ctx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 			++count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 		for (i = 0; i < num_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 			/* ignore completely unchanged pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 			if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 			if (!(flags & BM_AIO_WRITE_ALL_PAGES) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 			    bm_test_page_unchanged(b->bm_pages[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 				dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 			/* during lazy writeout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 			 * ignore those pages not marked for lazy writeout. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 			if (lazy_writeout_upper_idx &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 			    !bm_test_page_lazy_writeout(b->bm_pages[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 				dynamic_drbd_dbg(device, "skipped bm lazy write for idx %u\n", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 			atomic_inc(&ctx->in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 			bm_page_io_async(ctx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 			++count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	 * We initialize ctx->in_flight to one to make sure drbd_bm_endio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	 * will not set ctx->done early, and decrement / test it here.  If there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	 * are still some bios in flight, we need to wait for them here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	 * If all IO is done already (or nothing had been submitted), there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	 * no need to wait.  Still, we need to put the kref associated with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 	 * "in_flight reached zero, all done" event.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 	if (!atomic_dec_and_test(&ctx->in_flight))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 		wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 		kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 	/* summary for global bitmap IO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 	if (flags == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 		unsigned int ms = jiffies_to_msecs(jiffies - now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 		if (ms > 5) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 			drbd_info(device, "bitmap %s of %u pages took %u ms\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 				 (flags & BM_AIO_READ) ? "READ" : "WRITE",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 				 count, ms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 	if (ctx->error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 		drbd_alert(device, "we had at least one MD IO ERROR during bitmap IO\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 		drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 		err = -EIO; /* ctx->error ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	if (atomic_read(&ctx->in_flight))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 		err = -EIO; /* Disk timeout/force-detach during IO... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 	now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 	if (flags & BM_AIO_READ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 		b->bm_set = bm_count_bits(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 		drbd_info(device, "recounting of set bits took additional %lu jiffies\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 		     jiffies - now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	now = b->bm_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	if ((flags & ~BM_AIO_READ) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 		drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 		     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 	kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182)  * drbd_bm_read() - Read the whole bitmap from its on disk location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183)  * @device:	DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) int drbd_bm_read(struct drbd_device *device) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 	return bm_rw(device, BM_AIO_READ, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191)  * drbd_bm_write() - Write the whole bitmap to its on disk location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192)  * @device:	DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194)  * Will only write pages that have changed since last IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) int drbd_bm_write(struct drbd_device *device) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	return bm_rw(device, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)  * drbd_bm_write_all() - Write the whole bitmap to its on disk location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203)  * @device:	DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205)  * Will write all pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213)  * drbd_bm_write_lazy() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214)  * @device:	DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215)  * @upper_idx:	0: write all changed pages; +ve: page index to stop scanning for changed pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	return bm_rw(device, BM_AIO_COPY_PAGES, upper_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223)  * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224)  * @device:	DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226)  * Will only write pages that have changed since last IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227)  * In contrast to drbd_bm_write(), this will copy the bitmap pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228)  * to temporary writeout pages. It is intended to trigger a full write-out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)  * while still allowing the bitmap to change, for example if a resync or online
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230)  * verify is aborted due to a failed peer disk, while local IO continues, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231)  * pending resync acks are still being processed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	return bm_rw(device, BM_AIO_COPY_PAGES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)  * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240)  * @device:	DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	return bm_rw(device, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) /* NOTE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248)  * find_first_bit returns int, we return unsigned long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249)  * For this to work on 32bit arch with bitnumbers > (1<<32),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250)  * we'd need to return u64, and get a whole lot of other places
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)  * fixed where we still use unsigned long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)  * this returns a bit number, NOT a sector!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) static unsigned long __bm_find_next(struct drbd_device *device, unsigned long bm_fo,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	const int find_zero_bit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 	unsigned long *p_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 	unsigned long bit_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	unsigned i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 	if (bm_fo > b->bm_bits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 		drbd_err(device, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 		bm_fo = DRBD_END_OF_BITMAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 		while (bm_fo < b->bm_bits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 			/* bit offset of the first bit in the page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 			bit_offset = bm_fo & ~BITS_PER_PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 			p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 			if (find_zero_bit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 				i = find_next_zero_bit_le(p_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 						PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 				i = find_next_bit_le(p_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 						PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 			__bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 			if (i < PAGE_SIZE*8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 				bm_fo = bit_offset + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 				if (bm_fo >= b->bm_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 				goto found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 			bm_fo = bit_offset + PAGE_SIZE*8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 		bm_fo = DRBD_END_OF_BITMAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291)  found:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 	return bm_fo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) static unsigned long bm_find_next(struct drbd_device *device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 	unsigned long bm_fo, const int find_zero_bit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 	unsigned long i = DRBD_END_OF_BITMAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 		return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 	if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 		return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 	spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 	if (BM_DONT_TEST & b->bm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 		bm_print_lock_info(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 	i = __bm_find_next(device, bm_fo, find_zero_bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 	spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 	return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) unsigned long drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 	return bm_find_next(device, bm_fo, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) #if 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) /* not yet needed for anything. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) unsigned long drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 	return bm_find_next(device, bm_fo, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) /* does not spin_lock_irqsave.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330)  * you must take drbd_bm_lock() first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 	/* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 	return __bm_find_next(device, bm_fo, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 	/* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 	return __bm_find_next(device, bm_fo, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) /* returns number of bits actually changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344)  * for val != 0, we change 0 -> 1, return code positive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345)  * for val == 0, we change 1 -> 0, return code negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)  * wants bitnr, not sector.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347)  * expected to be called for only a few bits (e - s about BITS_PER_LONG).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348)  * Must hold bitmap lock already. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) static int __bm_change_bits_to(struct drbd_device *device, const unsigned long s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 	unsigned long e, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 	unsigned long *p_addr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	unsigned long bitnr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 	unsigned int last_page_nr = -1U;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 	int c = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 	int changed_total = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 	if (e >= b->bm_bits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 		drbd_err(device, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 				s, e, b->bm_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 		e = b->bm_bits ? b->bm_bits -1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	for (bitnr = s; bitnr <= e; bitnr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 		unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 		if (page_nr != last_page_nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 			if (p_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 				__bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 			if (c < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 				bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 			else if (c > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 				bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 			changed_total += c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 			c = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 			p_addr = __bm_map_pidx(b, page_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 			last_page_nr = page_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 		if (val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 			c += (0 == __test_and_set_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 			c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 	if (p_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 		__bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 	if (c < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 		bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 	else if (c > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 		bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 	changed_total += c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 	b->bm_set += changed_total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 	return changed_total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) /* returns number of bits actually changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395)  * for val != 0, we change 0 -> 1, return code positive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396)  * for val == 0, we change 1 -> 0, return code negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397)  * wants bitnr, not sector */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) static int bm_change_bits_to(struct drbd_device *device, const unsigned long s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 	const unsigned long e, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 	int c = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 	if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 	if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 	spin_lock_irqsave(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 	if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 		bm_print_lock_info(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 	c = __bm_change_bits_to(device, s, e, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 	spin_unlock_irqrestore(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 	return c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) /* returns number of bits changed 0 -> 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) int drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 	return bm_change_bits_to(device, s, e, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) /* returns number of bits changed 1 -> 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) int drbd_bm_clear_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 	return -bm_change_bits_to(device, s, e, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) /* sets all bits in full words,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433)  * from first_word up to, but not including, last_word */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 		int page_nr, int first_word, int last_word)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 	int bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 	int changed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 	unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 	/* I think it is more cache line friendly to hweight_long then set to ~0UL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 	 * than to first bitmap_weight() all words, then bitmap_fill() all words */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 	for (i = first_word; i < last_word; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 		bits = hweight_long(paddr[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 		paddr[i] = ~0UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 		changed += BITS_PER_LONG - bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 	kunmap_atomic(paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 	if (changed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 		/* We only need lazy writeout, the information is still in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 		 * remote bitmap as well, and is reconstructed during the next
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 		 * bitmap exchange, if lost locally due to a crash. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 		bm_set_page_lazy_writeout(b->bm_pages[page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 		b->bm_set += changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) /* Same thing as drbd_bm_set_bits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460)  * but more efficient for a large bit range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461)  * You must first drbd_bm_lock().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462)  * Can be called to set the whole bitmap in one go.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463)  * Sets bits from s to e _inclusive_. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) void _drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 	/* First set_bit from the first bit (s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 	 * up to the next long boundary (sl),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 	 * then assign full words up to the last long boundary (el),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 	 * then set_bit up to and including the last bit (e).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 	 * Do not use memset, because we must account for changes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 	 * so we need to loop over the words with hweight() anyways.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 	unsigned long sl = ALIGN(s,BITS_PER_LONG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 	unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 	int first_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 	int last_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 	int page_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 	int first_word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 	int last_word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 	if (e - s <= 3*BITS_PER_LONG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 		/* don't bother; el and sl may even be wrong. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 		spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 		__bm_change_bits_to(device, s, e, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 		spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 	/* difference is large enough that we can trust sl and el */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 	spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 	/* bits filling the current long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 	if (sl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 		__bm_change_bits_to(device, s, sl-1, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	first_page = sl >> (3 + PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 	last_page = el >> (3 + PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 	/* MLPP: modulo longs per page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 	/* LWPP: long words per page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 	first_word = MLPP(sl >> LN2_BPL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	last_word = LWPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 	/* first and full pages, unless first page == last page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 	for (page_nr = first_page; page_nr < last_page; page_nr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 		bm_set_full_words_within_one_page(device->bitmap, page_nr, first_word, last_word);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 		spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 		first_word = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 		spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 	/* last page (respectively only page, for first page == last page) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 	last_word = MLPP(el >> LN2_BPL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 	/* consider bitmap->bm_bits = 32768, bitmap->bm_number_of_pages = 1. (or multiples).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	 * ==> e = 32767, el = 32768, last_page = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 	 * and now last_word = 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 	 * We do not want to touch last_page in this case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 	 * as we did not allocate it, it is not present in bitmap->bm_pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 	if (last_word)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 		bm_set_full_words_within_one_page(device->bitmap, last_page, first_word, last_word);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 	/* possibly trailing bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 	 * example: (e & 63) == 63, el will be e+1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 	 * if that even was the very last bit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 	 * it would trigger an assert in __bm_change_bits_to()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 	if (el <= e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 		__bm_change_bits_to(device, el, e, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 	spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) /* returns bit state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538)  * wants bitnr, NOT sector.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539)  * inherently racy... area needs to be locked by means of {al,rs}_lru
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540)  *  1 ... bit set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541)  *  0 ... bit not set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542)  * -1 ... first out of bounds access, stop testing for bits!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) int drbd_bm_test_bit(struct drbd_device *device, const unsigned long bitnr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 	unsigned long *p_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 	if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 	if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 	spin_lock_irqsave(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 	if (BM_DONT_TEST & b->bm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 		bm_print_lock_info(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 	if (bitnr < b->bm_bits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 		p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 		i = test_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 		bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 	} else if (bitnr == b->bm_bits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 		i = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 	} else { /* (bitnr > b->bm_bits) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 		drbd_err(device, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 		i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 	spin_unlock_irqrestore(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 	return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) /* returns number of bits set in the range [s, e] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) int drbd_bm_count_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 	unsigned long *p_addr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 	unsigned long bitnr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 	unsigned int page_nr = -1U;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 	int c = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 	/* If this is called without a bitmap, that is a bug.  But just to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 	 * robust in case we screwed up elsewhere, in that case pretend there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 	 * was one dirty bit in the requested area, so we won't try to do a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 	 * local read there (no bitmap probably implies no disk) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 	if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 	if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 	spin_lock_irqsave(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 	if (BM_DONT_TEST & b->bm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 		bm_print_lock_info(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 	for (bitnr = s; bitnr <= e; bitnr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 		unsigned int idx = bm_bit_to_page_idx(b, bitnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 		if (page_nr != idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 			page_nr = idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 			if (p_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 				bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 			p_addr = bm_map_pidx(b, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 		if (expect(bitnr < b->bm_bits))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 			c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 			drbd_err(device, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 	if (p_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 		bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 	spin_unlock_irqrestore(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	return c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) /* inherently racy...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617)  * return value may be already out-of-date when this function returns.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618)  * but the general usage is that this is only use during a cstate when bits are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619)  * only cleared, not set, and typically only care for the case when the return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620)  * value is zero, or we already "locked" this "bitmap extent" by other means.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622)  * enr is bm-extent number, since we chose to name one sector (512 bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623)  * worth of the bitmap a "bitmap extent".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625)  * TODO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626)  * I think since we use it like a reference count, we should use the real
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627)  * reference count of some bitmap extent element from some lru instead...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 	struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 	int count, s, e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 	unsigned long *p_addr, *bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 	if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 	if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 	spin_lock_irqsave(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 	if (BM_DONT_TEST & b->bm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 		bm_print_lock_info(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 	s = S2W(enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 	e = min((size_t)S2W(enr+1), b->bm_words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 	count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 	if (s < b->bm_words) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 		int n = e-s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 		p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 		bm = p_addr + MLPP(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 		count += bitmap_weight(bm, n * BITS_PER_LONG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 		bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 		drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 	spin_unlock_irqrestore(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 	return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) }