^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) drbd_bitmap.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) Copyright (C) 2004-2008, LINBIT Information Technologies GmbH.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) Copyright (C) 2004-2008, Philipp Reisner <philipp.reisner@linbit.com>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) Copyright (C) 2004-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/bitmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/drbd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include "drbd_int.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) /* OPAQUE outside this file!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * interface defined in drbd_int.h
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * convention:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * function name drbd_bm_... => used elsewhere, "public".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * function name bm_... => internal to implementation, "private".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * LIMITATIONS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * We want to support >= peta byte of backend storage, while for now still using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * a granularity of one bit per 4KiB of storage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * 1 << 50 bytes backend storage (1 PiB)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * 1 << (50 - 12) bits needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * 38 --> we need u64 to index and count bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * 1 << (38 - 3) bitmap bytes needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * 35 --> we still need u64 to index and count bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * (that's 32 GiB of bitmap for 1 PiB storage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * 1 << (35 - 2) 32bit longs needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * 33 --> we'd even need u64 to index and count 32bit long words.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * 1 << (35 - 3) 64bit longs needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * 32 --> we could get away with a 32bit unsigned int to index and count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * 64bit long words, but I rather stay with unsigned long for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * We probably should neither count nor point to bytes or long words
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * directly, but either by bitnumber, or by page index and offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * 1 << (35 - 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * 22 --> we need that much 4KiB pages of bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * 1 << (22 + 3) --> on a 64bit arch,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * we need 32 MiB to store the array of page pointers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * Because I'm lazy, and because the resulting patch was too large, too ugly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * and still incomplete, on 32bit we still "only" support 16 TiB (minus some),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * (1 << 32) bits * 4k storage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * bitmap storage and IO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * Bitmap is stored little endian on disk, and is kept little endian in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * core memory. Currently we still hold the full bitmap in core as long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * seems excessive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * We plan to reduce the amount of in-core bitmap pages by paging them in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * and out against their on-disk location as necessary, but need to make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * sure we don't cause too much meta data IO, and must not deadlock in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * tight memory situations. This needs some more work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * NOTE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * Access to the *bm_pages is protected by bm_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * It is safe to read the other members within the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * drbd_bm_set_bits is called from bio_endio callbacks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * We may be called with irq already disabled,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * so we need spin_lock_irqsave().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * And we need the kmap_atomic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) struct drbd_bitmap {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) struct page **bm_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) spinlock_t bm_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) /* exclusively to be used by __al_write_transaction(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * drbd_bm_mark_for_writeout() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * and drbd_bm_write_hinted() -> bm_rw() called from there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) unsigned int n_bitmap_hints;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) unsigned int al_bitmap_hints[AL_UPDATES_PER_TRANSACTION];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) /* see LIMITATIONS: above */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) unsigned long bm_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) size_t bm_words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) size_t bm_number_of_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) sector_t bm_dev_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) struct mutex bm_change; /* serializes resize operations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) enum bm_flag bm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) /* debugging aid, in case we are still racy somewhere */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) char *bm_why;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) struct task_struct *bm_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) static void __bm_print_lock_info(struct drbd_device *device, const char *func)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) if (!__ratelimit(&drbd_ratelimit_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) drbd_err(device, "FIXME %s[%d] in %s, bitmap locked for '%s' by %s[%d]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) current->comm, task_pid_nr(current),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) func, b->bm_why ?: "?",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) b->bm_task->comm, task_pid_nr(b->bm_task));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) void drbd_bm_lock(struct drbd_device *device, char *why, enum bm_flag flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) int trylock_failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) if (!b) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) drbd_err(device, "FIXME no bitmap in drbd_bm_lock!?\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) trylock_failed = !mutex_trylock(&b->bm_change);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) if (trylock_failed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) drbd_warn(device, "%s[%d] going to '%s' but bitmap already locked for '%s' by %s[%d]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) current->comm, task_pid_nr(current),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) why, b->bm_why ?: "?",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) b->bm_task->comm, task_pid_nr(b->bm_task));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) mutex_lock(&b->bm_change);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) if (BM_LOCKED_MASK & b->bm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) drbd_err(device, "FIXME bitmap already locked in bm_lock\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) b->bm_flags |= flags & BM_LOCKED_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) b->bm_why = why;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) b->bm_task = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) void drbd_bm_unlock(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) if (!b) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) drbd_err(device, "FIXME no bitmap in drbd_bm_unlock!?\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) if (!(BM_LOCKED_MASK & device->bitmap->bm_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) drbd_err(device, "FIXME bitmap not locked in bm_unlock\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) b->bm_flags &= ~BM_LOCKED_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) b->bm_why = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) b->bm_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) mutex_unlock(&b->bm_change);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /* we store some "meta" info about our pages in page->private */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) /* at a granularity of 4k storage per bitmap bit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * one peta byte storage: 1<<50 byte, 1<<38 * 4k storage blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * 1<<38 bits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * 1<<23 4k bitmap pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * Use 24 bits as page index, covers 2 peta byte storage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * at a granularity of 4k per bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * Used to report the failed page idx on io error from the endio handlers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) #define BM_PAGE_IDX_MASK ((1UL<<24)-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) /* this page is currently read in, or written back */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) #define BM_PAGE_IO_LOCK 31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) /* if there has been an IO error for this page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) #define BM_PAGE_IO_ERROR 30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) /* this is to be able to intelligently skip disk IO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * set if bits have been set since last IO. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) #define BM_PAGE_NEED_WRITEOUT 29
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) /* to mark for lazy writeout once syncer cleared all clearable bits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * we if bits have been cleared since last IO. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) #define BM_PAGE_LAZY_WRITEOUT 28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) /* pages marked with this "HINT" will be considered for writeout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * on activity log transactions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) #define BM_PAGE_HINT_WRITEOUT 27
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) /* store_page_idx uses non-atomic assignment. It is only used directly after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) * allocating the page. All other bm_set_page_* and bm_clear_page_* need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * changes) may happen from various contexts, and wait_on_bit/wake_up_bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * requires it all to be atomic as well. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) static void bm_store_page_idx(struct page *page, unsigned long idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) set_page_private(page, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) static unsigned long bm_page_to_idx(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) return page_private(page) & BM_PAGE_IDX_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) /* As is very unlikely that the same page is under IO from more than one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * context, we can get away with a bit per page and one wait queue per bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) static void bm_page_lock_io(struct drbd_device *device, int page_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) void *addr = &page_private(b->bm_pages[page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) static void bm_page_unlock_io(struct drbd_device *device, int page_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) void *addr = &page_private(b->bm_pages[page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) wake_up(&device->bitmap->bm_io_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) /* set _before_ submit_io, so it may be reset due to being changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * while this page is in flight... will get submitted later again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) static void bm_set_page_unchanged(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) /* use cmpxchg? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) clear_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) clear_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) static void bm_set_page_need_writeout(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) void drbd_bm_reset_al_hints(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) device->bitmap->n_bitmap_hints = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * @device: DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * @page_nr: the bitmap page to mark with the "hint" flag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * From within an activity log transaction, we mark a few pages with these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) * hints, then call drbd_bm_write_hinted(), which will only write out changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * pages which are flagged with this mark.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) if (page_nr >= device->bitmap->bm_number_of_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) drbd_warn(device, "BAD: page_nr: %u, number_of_pages: %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) page_nr, (int)device->bitmap->bm_number_of_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) page = device->bitmap->bm_pages[page_nr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) BUG_ON(b->n_bitmap_hints >= ARRAY_SIZE(b->al_bitmap_hints));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) if (!test_and_set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) b->al_bitmap_hints[b->n_bitmap_hints++] = page_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) static int bm_test_page_unchanged(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) volatile const unsigned long *addr = &page_private(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) return (*addr & ((1UL<<BM_PAGE_NEED_WRITEOUT)|(1UL<<BM_PAGE_LAZY_WRITEOUT))) == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) static void bm_set_page_io_err(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) set_bit(BM_PAGE_IO_ERROR, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) static void bm_clear_page_io_err(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) clear_bit(BM_PAGE_IO_ERROR, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) static void bm_set_page_lazy_writeout(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) set_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) static int bm_test_page_lazy_writeout(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) return test_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) /* on a 32bit box, this would allow for exactly (2<<38) bits. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) static unsigned int bm_word_to_page_idx(struct drbd_bitmap *b, unsigned long long_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) unsigned int page_nr = long_nr >> (PAGE_SHIFT - LN2_BPL + 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) BUG_ON(page_nr >= b->bm_number_of_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) return page_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) /* page_nr = (bitnr/8) >> PAGE_SHIFT; */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) BUG_ON(page_nr >= b->bm_number_of_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) return page_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) struct page *page = b->bm_pages[idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) return (unsigned long *) kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) return __bm_map_pidx(b, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) static void __bm_unmap(unsigned long *p_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) kunmap_atomic(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) static void bm_unmap(unsigned long *p_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) return __bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) /* long word offset of _bitmap_ sector */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) /* word offset from start of bitmap to word number _in_page_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * modulo longs per page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) hm, well, Philipp thinks gcc might not optimize the % into & (... - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) so do it explicitly:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) /* Long words per page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) #define LWPP (PAGE_SIZE/sizeof(long))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) * actually most functions herein should take a struct drbd_bitmap*, not a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) * struct drbd_device*, but for the debug macros I like to have the device around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) * to be able to report device specific.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) static void bm_free_pages(struct page **pages, unsigned long number)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) if (!pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) for (i = 0; i < number; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) if (!pages[i]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) pr_alert("bm_free_pages tried to free a NULL pointer; i=%lu n=%lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) i, number);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) __free_page(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) pages[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) static inline void bm_vk_free(void *ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) kvfree(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * "have" and "want" are NUMBER OF PAGES.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) struct page **old_pages = b->bm_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) struct page **new_pages, *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) unsigned int i, bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) unsigned long have = b->bm_number_of_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) BUG_ON(have == 0 && old_pages != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) BUG_ON(have != 0 && old_pages == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) if (have == want)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) return old_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) /* Trying kmalloc first, falling back to vmalloc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) * GFP_NOIO, as this is called while drbd IO is "suspended",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) * and during resize or attach on diskless Primary,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) * we must not block on IO to ourselves.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) * Context is receiver thread or dmsetup. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) bytes = sizeof(struct page *)*want;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) new_pages = kzalloc(bytes, GFP_NOIO | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) if (!new_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) new_pages = __vmalloc(bytes, GFP_NOIO | __GFP_ZERO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) if (!new_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) if (want >= have) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) for (i = 0; i < have; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) new_pages[i] = old_pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) for (; i < want; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) bm_free_pages(new_pages + have, i - have);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) bm_vk_free(new_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) /* we want to know which page it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * from the endio handlers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) bm_store_page_idx(page, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) new_pages[i] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) for (i = 0; i < want; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) new_pages[i] = old_pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) /* NOT HERE, we are outside the spinlock!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) bm_free_pages(old_pages + want, have - want);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) return new_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) * allocates the drbd_bitmap and stores it in device->bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) int drbd_bm_init(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) WARN_ON(b != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) if (!b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) spin_lock_init(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) mutex_init(&b->bm_change);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) init_waitqueue_head(&b->bm_io_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) device->bitmap = b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) sector_t drbd_bm_capacity(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) if (!expect(device->bitmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) return device->bitmap->bm_dev_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) /* called on driver unload. TODO: call when a device is destroyed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) void drbd_bm_cleanup(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) if (!expect(device->bitmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) bm_vk_free(device->bitmap->bm_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) kfree(device->bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) device->bitmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) * since (b->bm_bits % BITS_PER_LONG) != 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) * this masks out the remaining bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) * Returns the number of bits cleared.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) #ifndef BITS_PER_PAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) #define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) #define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) # if BITS_PER_PAGE != (1UL << (PAGE_SHIFT + 3))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) # error "ambiguous BITS_PER_PAGE"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) #define BITS_PER_LONG_MASK (BITS_PER_LONG - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) static int bm_clear_surplus(struct drbd_bitmap *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) unsigned long mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) unsigned long *p_addr, *bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) int tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) int cleared = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) /* number of bits modulo bits per page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) /* mask the used bits of the word containing the last bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) /* bitmap is always stored little endian,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) * on disk and in core memory alike */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) mask = cpu_to_lel(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) bm = p_addr + (tmp/BITS_PER_LONG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) if (mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) /* If mask != 0, we are not exactly aligned, so bm now points
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * to the long containing the last bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) * If mask == 0, bm already points to the word immediately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) * after the last (long word aligned) bit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) cleared = hweight_long(*bm & ~mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) *bm &= mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) bm++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) /* on a 32bit arch, we may need to zero out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * a padding long to align with a 64bit remote */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) cleared += hweight_long(*bm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) *bm = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) return cleared;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) static void bm_set_surplus(struct drbd_bitmap *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) unsigned long mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) unsigned long *p_addr, *bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) int tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) /* number of bits modulo bits per page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) /* mask the used bits of the word containing the last bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) /* bitmap is always stored little endian,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * on disk and in core memory alike */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) mask = cpu_to_lel(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) bm = p_addr + (tmp/BITS_PER_LONG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) if (mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) /* If mask != 0, we are not exactly aligned, so bm now points
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) * to the long containing the last bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) * If mask == 0, bm already points to the word immediately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) * after the last (long word aligned) bit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) *bm |= ~mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) bm++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) /* on a 32bit arch, we may need to zero out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) * a padding long to align with a 64bit remote */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) *bm = ~0UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) /* you better not modify the bitmap while this is running,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) * or its results will be stale */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) static unsigned long bm_count_bits(struct drbd_bitmap *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) unsigned long *p_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) unsigned long bits = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) int idx, last_word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) /* all but last page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) p_addr = __bm_map_pidx(b, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) bits += bitmap_weight(p_addr, BITS_PER_PAGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) __bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) /* last (or only) page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) p_addr = __bm_map_pidx(b, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) bits += bitmap_weight(p_addr, last_word * BITS_PER_LONG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) p_addr[last_word] &= cpu_to_lel(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) bits += hweight_long(p_addr[last_word]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) /* 32bit arch, may have an unused padding long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) if (BITS_PER_LONG == 32 && (last_word & 1) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) p_addr[last_word+1] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) __bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) return bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) /* offset and len in long words.*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) unsigned long *p_addr, *bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) unsigned int idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) size_t do_now, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) end = offset + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) if (end > b->bm_words) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) pr_alert("bm_memset end > bm_words\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) while (offset < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) idx = bm_word_to_page_idx(b, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) p_addr = bm_map_pidx(b, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) bm = p_addr + MLPP(offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) if (bm+do_now > p_addr + LWPP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) pr_alert("BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) p_addr, bm, (int)do_now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) memset(bm, c, do_now * sizeof(long));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) bm_set_page_need_writeout(b->bm_pages[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) offset += do_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) /* For the layout, see comment above drbd_md_set_sector_offsets(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) static u64 drbd_md_on_disk_bits(struct drbd_backing_dev *ldev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) u64 bitmap_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) if (ldev->md.al_offset == 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) bitmap_sectors = ldev->md.md_size_sect - ldev->md.bm_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) bitmap_sectors = ldev->md.al_offset - ldev->md.bm_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) return bitmap_sectors << (9 + 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) * make sure the bitmap has enough room for the attached storage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) * if necessary, resize.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) * called whenever we may have changed the device size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) * returns -ENOMEM if we could not allocate enough memory, 0 on success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) * In case this is actually a resize, we copy the old bitmap into the new one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) * Otherwise, the bitmap is initialized to all bits set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) unsigned long bits, words, owords, obits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) unsigned long want, have, onpages; /* number of pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) struct page **npages, **opages = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) bool growing;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) drbd_bm_lock(device, "resize", BM_LOCKED_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) drbd_info(device, "drbd_bm_resize called with capacity == %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) (unsigned long long)capacity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) if (capacity == b->bm_dev_capacity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) if (capacity == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) opages = b->bm_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) onpages = b->bm_number_of_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) owords = b->bm_words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) b->bm_pages = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) b->bm_number_of_pages =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) b->bm_set =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) b->bm_bits =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) b->bm_words =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) b->bm_dev_capacity = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) bm_free_pages(opages, onpages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) bm_vk_free(opages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) /* if we would use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) a 32bit host could present the wrong number of words
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) to a 64bit host.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) words = ALIGN(bits, 64) >> LN2_BPL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) if (get_ldev(device)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) u64 bits_on_disk = drbd_md_on_disk_bits(device->ldev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) if (bits > bits_on_disk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) drbd_info(device, "bits = %lu\n", bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) drbd_info(device, "bits_on_disk = %llu\n", bits_on_disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) err = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) have = b->bm_number_of_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) if (want == have) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) D_ASSERT(device, b->bm_pages != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) npages = b->bm_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) if (drbd_insert_fault(device, DRBD_FAULT_BM_ALLOC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) npages = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) npages = bm_realloc_pages(b, want);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) if (!npages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) opages = b->bm_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) owords = b->bm_words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) obits = b->bm_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) growing = bits > obits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) if (opages && growing && set_new_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) bm_set_surplus(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) b->bm_pages = npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) b->bm_number_of_pages = want;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) b->bm_bits = bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) b->bm_words = words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) b->bm_dev_capacity = capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) if (growing) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) if (set_new_bits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) bm_memset(b, owords, 0xff, words-owords);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) b->bm_set += bits - obits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) bm_memset(b, owords, 0x00, words-owords);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) if (want < have) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) /* implicit: (opages != NULL) && (opages != npages) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) bm_free_pages(opages + want, have - want);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) (void)bm_clear_surplus(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) if (opages != npages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) bm_vk_free(opages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) if (!growing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) b->bm_set = bm_count_bits(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) drbd_bm_unlock(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) /* inherently racy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) * if not protected by other means, return value may be out of date when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) * leaving this function...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) * we still need to lock it, since it is important that this returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) * bm_set == 0 precisely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) * maybe bm_set should be atomic_t ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) unsigned long _drbd_bm_total_weight(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) unsigned long s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) spin_lock_irqsave(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) s = b->bm_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) spin_unlock_irqrestore(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) unsigned long drbd_bm_total_weight(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) unsigned long s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) /* if I don't have a disk, I don't know about out-of-sync status */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) if (!get_ldev_if_state(device, D_NEGOTIATING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) s = _drbd_bm_total_weight(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) put_ldev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) size_t drbd_bm_words(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) return b->bm_words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) unsigned long drbd_bm_bits(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) return b->bm_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) /* merge number words from buffer into the bitmap starting at offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) * buffer[i] is expected to be little endian unsigned long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) * bitmap must be locked by drbd_bm_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) * currently only used from receive_bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, size_t number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) unsigned long *buffer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) unsigned long *p_addr, *bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) unsigned long word, bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) unsigned int idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) size_t end, do_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) end = offset + number;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) if (number == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) WARN_ON(offset >= b->bm_words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) WARN_ON(end > b->bm_words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) while (offset < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) idx = bm_word_to_page_idx(b, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) p_addr = bm_map_pidx(b, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) bm = p_addr + MLPP(offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) offset += do_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) while (do_now--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) bits = hweight_long(*bm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) word = *bm | *buffer++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) *bm++ = word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) b->bm_set += hweight_long(word) - bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) bm_set_page_need_writeout(b->bm_pages[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) /* with 32bit <-> 64bit cross-platform connect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) * this is only correct for current usage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) * where we _know_ that we are 64 bit aligned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) * and know that this function is used in this way, too...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) if (end == b->bm_words)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) b->bm_set -= bm_clear_surplus(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) /* copy number words from the bitmap starting at offset into the buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) * buffer[i] will be little endian unsigned long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) void drbd_bm_get_lel(struct drbd_device *device, size_t offset, size_t number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) unsigned long *buffer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) unsigned long *p_addr, *bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) size_t end, do_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) end = offset + number;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) if ((offset >= b->bm_words) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) (end > b->bm_words) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) (number <= 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) drbd_err(device, "offset=%lu number=%lu bm_words=%lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) (unsigned long) offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) (unsigned long) number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) (unsigned long) b->bm_words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) while (offset < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) bm = p_addr + MLPP(offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) offset += do_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) while (do_now--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) *buffer++ = *bm++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) /* set all bits in the bitmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) void drbd_bm_set_all(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) bm_memset(b, 0, 0xff, b->bm_words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) (void)bm_clear_surplus(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) b->bm_set = b->bm_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) /* clear all bits in the bitmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) void drbd_bm_clear_all(struct drbd_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) bm_memset(b, 0, 0, b->bm_words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) b->bm_set = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) static void drbd_bm_aio_ctx_destroy(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) struct drbd_bm_aio_ctx *ctx = container_of(kref, struct drbd_bm_aio_ctx, kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) spin_lock_irqsave(&ctx->device->resource->req_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) list_del(&ctx->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) spin_unlock_irqrestore(&ctx->device->resource->req_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) put_ldev(ctx->device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) /* bv_page may be a copy, or may be the original */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) static void drbd_bm_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) struct drbd_bm_aio_ctx *ctx = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) struct drbd_device *device = ctx->device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) unsigned int idx = bm_page_to_idx(bio_first_page_all(bio));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) !bm_test_page_unchanged(b->bm_pages[idx]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) if (bio->bi_status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) /* ctx error will hold the completed-last non-zero error code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) * in case error codes differ. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) ctx->error = blk_status_to_errno(bio->bi_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) bm_set_page_io_err(b->bm_pages[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) /* Not identical to on disk version of it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) * Is BM_PAGE_IO_ERROR enough? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) if (__ratelimit(&drbd_ratelimit_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) bio->bi_status, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) bm_clear_page_io_err(b->bm_pages[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) bm_page_unlock_io(device, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) if (ctx->flags & BM_AIO_COPY_PAGES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) mempool_free(bio->bi_io_vec[0].bv_page, &drbd_md_io_page_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) if (atomic_dec_and_test(&ctx->in_flight)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) ctx->done = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) wake_up(&device->misc_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) struct bio *bio = bio_alloc_drbd(GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) struct drbd_device *device = ctx->device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) unsigned int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) sector_t on_disk_sector =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) device->ldev->md.md_offset + device->ldev->md.bm_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) /* this might happen with very small
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) * flexible external meta data device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) * or with PAGE_SIZE > 4k */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) len = min_t(unsigned int, PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) (drbd_md_last_sector(device->ldev) - on_disk_sector + 1)<<9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) /* serialize IO on this page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) bm_page_lock_io(device, page_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) /* before memcpy and submit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) * so it can be redirtied any time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) bm_set_page_unchanged(b->bm_pages[page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) if (ctx->flags & BM_AIO_COPY_PAGES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) page = mempool_alloc(&drbd_md_io_page_pool,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) GFP_NOIO | __GFP_HIGHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) copy_highpage(page, b->bm_pages[page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) bm_store_page_idx(page, page_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) page = b->bm_pages[page_nr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) bio_set_dev(bio, device->ldev->md_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) bio->bi_iter.bi_sector = on_disk_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) /* bio_add_page of a single page to an empty bio will always succeed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) * according to api. Do we want to assert that? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) bio_add_page(bio, page, len, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) bio->bi_private = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) bio->bi_end_io = drbd_bm_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) bio_set_op_attrs(bio, op, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) bio_io_error(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) /* this should not count as user activity and cause the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) * resync to throttle -- see drbd_rs_should_slow_down(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) atomic_add(len >> 9, &device->rs_sect_ev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) * bm_rw: read/write the whole bitmap from/to its on disk location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) struct drbd_bm_aio_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) unsigned int num_pages, i, count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) unsigned long now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) char ppb[10];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) * We are protected against bitmap disappearing/resizing by holding an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) * ldev reference (caller must have called get_ldev()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) * For read/write, we are protected against changes to the bitmap by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) * the bitmap lock (see drbd_bitmap_io).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) * For lazy writeout, we don't care for ongoing changes to the bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) * as we submit copies of pages anyways.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) ctx = kmalloc(sizeof(struct drbd_bm_aio_ctx), GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) if (!ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) *ctx = (struct drbd_bm_aio_ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) .device = device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) .start_jif = jiffies,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) .in_flight = ATOMIC_INIT(1),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) .done = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) .flags = flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) .error = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) .kref = KREF_INIT(2),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in drbd_bm_aio_ctx_destroy() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) /* Here D_ATTACHING is sufficient since drbd_bm_read() is called only from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) drbd_adm_attach(), after device->ldev was assigned. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) if (0 == (ctx->flags & ~BM_AIO_READ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) spin_lock_irq(&device->resource->req_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) list_add_tail(&ctx->list, &device->pending_bitmap_io);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) spin_unlock_irq(&device->resource->req_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) num_pages = b->bm_number_of_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) /* let the layers below us try to merge these bios... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) if (flags & BM_AIO_READ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) for (i = 0; i < num_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) atomic_inc(&ctx->in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) bm_page_io_async(ctx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) ++count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) } else if (flags & BM_AIO_WRITE_HINTED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) /* ASSERT: BM_AIO_WRITE_ALL_PAGES is not set. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) unsigned int hint;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) for (hint = 0; hint < b->n_bitmap_hints; hint++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) i = b->al_bitmap_hints[hint];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) if (i >= num_pages) /* == -1U: no hint here. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) /* Several AL-extents may point to the same page. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) if (!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) &page_private(b->bm_pages[i])))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) /* Has it even changed? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) if (bm_test_page_unchanged(b->bm_pages[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) atomic_inc(&ctx->in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) bm_page_io_async(ctx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) ++count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) for (i = 0; i < num_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) /* ignore completely unchanged pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) if (!(flags & BM_AIO_WRITE_ALL_PAGES) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) bm_test_page_unchanged(b->bm_pages[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) /* during lazy writeout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) * ignore those pages not marked for lazy writeout. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) if (lazy_writeout_upper_idx &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) !bm_test_page_lazy_writeout(b->bm_pages[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) dynamic_drbd_dbg(device, "skipped bm lazy write for idx %u\n", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) atomic_inc(&ctx->in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) bm_page_io_async(ctx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) ++count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) * We initialize ctx->in_flight to one to make sure drbd_bm_endio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) * will not set ctx->done early, and decrement / test it here. If there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) * are still some bios in flight, we need to wait for them here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) * If all IO is done already (or nothing had been submitted), there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) * no need to wait. Still, we need to put the kref associated with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) * "in_flight reached zero, all done" event.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) if (!atomic_dec_and_test(&ctx->in_flight))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) /* summary for global bitmap IO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) if (flags == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) unsigned int ms = jiffies_to_msecs(jiffies - now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) if (ms > 5) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) drbd_info(device, "bitmap %s of %u pages took %u ms\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) (flags & BM_AIO_READ) ? "READ" : "WRITE",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) count, ms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) if (ctx->error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) drbd_alert(device, "we had at least one MD IO ERROR during bitmap IO\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) err = -EIO; /* ctx->error ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) if (atomic_read(&ctx->in_flight))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) err = -EIO; /* Disk timeout/force-detach during IO... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) if (flags & BM_AIO_READ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) b->bm_set = bm_count_bits(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) drbd_info(device, "recounting of set bits took additional %lu jiffies\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) jiffies - now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) now = b->bm_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) if ((flags & ~BM_AIO_READ) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) * drbd_bm_read() - Read the whole bitmap from its on disk location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) * @device: DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) int drbd_bm_read(struct drbd_device *device) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) return bm_rw(device, BM_AIO_READ, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) * drbd_bm_write() - Write the whole bitmap to its on disk location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) * @device: DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) * Will only write pages that have changed since last IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) int drbd_bm_write(struct drbd_device *device) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) return bm_rw(device, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) * drbd_bm_write_all() - Write the whole bitmap to its on disk location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) * @device: DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) * Will write all pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) * drbd_bm_write_lazy() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) * @device: DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) return bm_rw(device, BM_AIO_COPY_PAGES, upper_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) * @device: DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) * Will only write pages that have changed since last IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) * In contrast to drbd_bm_write(), this will copy the bitmap pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) * to temporary writeout pages. It is intended to trigger a full write-out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) * while still allowing the bitmap to change, for example if a resync or online
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) * verify is aborted due to a failed peer disk, while local IO continues, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) * pending resync acks are still being processed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) return bm_rw(device, BM_AIO_COPY_PAGES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) * @device: DRBD device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) return bm_rw(device, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) /* NOTE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) * find_first_bit returns int, we return unsigned long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) * For this to work on 32bit arch with bitnumbers > (1<<32),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) * we'd need to return u64, and get a whole lot of other places
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) * fixed where we still use unsigned long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) * this returns a bit number, NOT a sector!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) static unsigned long __bm_find_next(struct drbd_device *device, unsigned long bm_fo,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) const int find_zero_bit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) unsigned long *p_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) unsigned long bit_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) unsigned i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) if (bm_fo > b->bm_bits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) drbd_err(device, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) bm_fo = DRBD_END_OF_BITMAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) while (bm_fo < b->bm_bits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) /* bit offset of the first bit in the page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) bit_offset = bm_fo & ~BITS_PER_PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) if (find_zero_bit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) i = find_next_zero_bit_le(p_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) i = find_next_bit_le(p_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) __bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) if (i < PAGE_SIZE*8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) bm_fo = bit_offset + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) if (bm_fo >= b->bm_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) goto found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) bm_fo = bit_offset + PAGE_SIZE*8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) bm_fo = DRBD_END_OF_BITMAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) found:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) return bm_fo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) static unsigned long bm_find_next(struct drbd_device *device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) unsigned long bm_fo, const int find_zero_bit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) unsigned long i = DRBD_END_OF_BITMAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) if (BM_DONT_TEST & b->bm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) bm_print_lock_info(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) i = __bm_find_next(device, bm_fo, find_zero_bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) unsigned long drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) return bm_find_next(device, bm_fo, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) #if 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) /* not yet needed for anything. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) unsigned long drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) return bm_find_next(device, bm_fo, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) /* does not spin_lock_irqsave.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) * you must take drbd_bm_lock() first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) /* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) return __bm_find_next(device, bm_fo, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) /* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) return __bm_find_next(device, bm_fo, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) /* returns number of bits actually changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) * for val != 0, we change 0 -> 1, return code positive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) * for val == 0, we change 1 -> 0, return code negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) * wants bitnr, not sector.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) * expected to be called for only a few bits (e - s about BITS_PER_LONG).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) * Must hold bitmap lock already. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) static int __bm_change_bits_to(struct drbd_device *device, const unsigned long s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) unsigned long e, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) unsigned long *p_addr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) unsigned long bitnr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) unsigned int last_page_nr = -1U;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) int c = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) int changed_total = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) if (e >= b->bm_bits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) drbd_err(device, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) s, e, b->bm_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) e = b->bm_bits ? b->bm_bits -1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) for (bitnr = s; bitnr <= e; bitnr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) if (page_nr != last_page_nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) if (p_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) __bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) if (c < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) else if (c > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) changed_total += c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) c = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) p_addr = __bm_map_pidx(b, page_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) last_page_nr = page_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) if (val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) c += (0 == __test_and_set_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) if (p_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) __bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) if (c < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) else if (c > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) changed_total += c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) b->bm_set += changed_total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) return changed_total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) /* returns number of bits actually changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) * for val != 0, we change 0 -> 1, return code positive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) * for val == 0, we change 1 -> 0, return code negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) * wants bitnr, not sector */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) static int bm_change_bits_to(struct drbd_device *device, const unsigned long s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) const unsigned long e, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) int c = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) spin_lock_irqsave(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) bm_print_lock_info(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) c = __bm_change_bits_to(device, s, e, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) spin_unlock_irqrestore(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) return c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) /* returns number of bits changed 0 -> 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) int drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) return bm_change_bits_to(device, s, e, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) /* returns number of bits changed 1 -> 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) int drbd_bm_clear_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) return -bm_change_bits_to(device, s, e, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) /* sets all bits in full words,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) * from first_word up to, but not including, last_word */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) int page_nr, int first_word, int last_word)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) int bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) int changed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) /* I think it is more cache line friendly to hweight_long then set to ~0UL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) * than to first bitmap_weight() all words, then bitmap_fill() all words */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) for (i = first_word; i < last_word; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) bits = hweight_long(paddr[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) paddr[i] = ~0UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) changed += BITS_PER_LONG - bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) kunmap_atomic(paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) if (changed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) /* We only need lazy writeout, the information is still in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) * remote bitmap as well, and is reconstructed during the next
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) * bitmap exchange, if lost locally due to a crash. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) bm_set_page_lazy_writeout(b->bm_pages[page_nr]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) b->bm_set += changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) /* Same thing as drbd_bm_set_bits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) * but more efficient for a large bit range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) * You must first drbd_bm_lock().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) * Can be called to set the whole bitmap in one go.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) * Sets bits from s to e _inclusive_. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) void _drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) /* First set_bit from the first bit (s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) * up to the next long boundary (sl),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) * then assign full words up to the last long boundary (el),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) * then set_bit up to and including the last bit (e).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) * Do not use memset, because we must account for changes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) * so we need to loop over the words with hweight() anyways.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) unsigned long sl = ALIGN(s,BITS_PER_LONG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) int first_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) int last_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) int page_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) int first_word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) int last_word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) if (e - s <= 3*BITS_PER_LONG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) /* don't bother; el and sl may even be wrong. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) __bm_change_bits_to(device, s, e, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) /* difference is large enough that we can trust sl and el */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) /* bits filling the current long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) if (sl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) __bm_change_bits_to(device, s, sl-1, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) first_page = sl >> (3 + PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) last_page = el >> (3 + PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) /* MLPP: modulo longs per page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) /* LWPP: long words per page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) first_word = MLPP(sl >> LN2_BPL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) last_word = LWPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) /* first and full pages, unless first page == last page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) for (page_nr = first_page; page_nr < last_page; page_nr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) bm_set_full_words_within_one_page(device->bitmap, page_nr, first_word, last_word);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) first_word = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) spin_lock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) /* last page (respectively only page, for first page == last page) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) last_word = MLPP(el >> LN2_BPL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) /* consider bitmap->bm_bits = 32768, bitmap->bm_number_of_pages = 1. (or multiples).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) * ==> e = 32767, el = 32768, last_page = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) * and now last_word = 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) * We do not want to touch last_page in this case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) * as we did not allocate it, it is not present in bitmap->bm_pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) if (last_word)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) bm_set_full_words_within_one_page(device->bitmap, last_page, first_word, last_word);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) /* possibly trailing bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) * example: (e & 63) == 63, el will be e+1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) * if that even was the very last bit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) * it would trigger an assert in __bm_change_bits_to()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) if (el <= e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) __bm_change_bits_to(device, el, e, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) spin_unlock_irq(&b->bm_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) /* returns bit state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) * wants bitnr, NOT sector.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) * inherently racy... area needs to be locked by means of {al,rs}_lru
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) * 1 ... bit set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) * 0 ... bit not set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) * -1 ... first out of bounds access, stop testing for bits!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) int drbd_bm_test_bit(struct drbd_device *device, const unsigned long bitnr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) unsigned long *p_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) spin_lock_irqsave(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) if (BM_DONT_TEST & b->bm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) bm_print_lock_info(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) if (bitnr < b->bm_bits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) i = test_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) } else if (bitnr == b->bm_bits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) i = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) } else { /* (bitnr > b->bm_bits) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) drbd_err(device, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) spin_unlock_irqrestore(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) /* returns number of bits set in the range [s, e] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) int drbd_bm_count_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) unsigned long *p_addr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) unsigned long bitnr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) unsigned int page_nr = -1U;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) int c = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) /* If this is called without a bitmap, that is a bug. But just to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) * robust in case we screwed up elsewhere, in that case pretend there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) * was one dirty bit in the requested area, so we won't try to do a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) * local read there (no bitmap probably implies no disk) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) spin_lock_irqsave(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) if (BM_DONT_TEST & b->bm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) bm_print_lock_info(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) for (bitnr = s; bitnr <= e; bitnr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) unsigned int idx = bm_bit_to_page_idx(b, bitnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) if (page_nr != idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) page_nr = idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) if (p_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) p_addr = bm_map_pidx(b, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) if (expect(bitnr < b->bm_bits))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) drbd_err(device, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) if (p_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) spin_unlock_irqrestore(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) return c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) /* inherently racy...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) * return value may be already out-of-date when this function returns.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) * but the general usage is that this is only use during a cstate when bits are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) * only cleared, not set, and typically only care for the case when the return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) * value is zero, or we already "locked" this "bitmap extent" by other means.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) * enr is bm-extent number, since we chose to name one sector (512 bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) * worth of the bitmap a "bitmap extent".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) * TODO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) * I think since we use it like a reference count, we should use the real
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) * reference count of some bitmap extent element from some lru instead...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) struct drbd_bitmap *b = device->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) int count, s, e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) unsigned long *p_addr, *bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) if (!expect(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) if (!expect(b->bm_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) spin_lock_irqsave(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) if (BM_DONT_TEST & b->bm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) bm_print_lock_info(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) s = S2W(enr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) e = min((size_t)S2W(enr+1), b->bm_words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) if (s < b->bm_words) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) int n = e-s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) bm = p_addr + MLPP(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) count += bitmap_weight(bm, n * BITS_PER_LONG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) bm_unmap(p_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) spin_unlock_irqrestore(&b->bm_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) }