^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) rbd.c -- Export ceph rados objects as a Linux block device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) based on drivers/block/osdblk.c:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) Copyright 2009 Red Hat, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) This program is free software; you can redistribute it and/or modify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) it under the terms of the GNU General Public License as published by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) the Free Software Foundation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) This program is distributed in the hope that it will be useful,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) but WITHOUT ANY WARRANTY; without even the implied warranty of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) GNU General Public License for more details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) You should have received a copy of the GNU General Public License
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) along with this program; see the file COPYING. If not, write to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) For usage instructions, please refer to:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) Documentation/ABI/testing/sysfs-bus-rbd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <linux/ceph/libceph.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/ceph/osd_client.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <linux/ceph/mon_client.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/ceph/cls_lock_client.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <linux/ceph/striper.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <linux/ceph/decode.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <linux/fs_parser.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <linux/bsearch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <linux/device.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include <linux/blk-mq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #include <linux/idr.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #include <linux/workqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #include "rbd_types.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #define RBD_DEBUG /* Activate rbd_assert() calls */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * Increment the given counter and return its updated value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * If the counter is already 0 it will not be incremented.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * If the counter is already at its maximum value returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * -EINVAL without updating it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) static int atomic_inc_return_safe(atomic_t *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) unsigned int counter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) counter = (unsigned int)atomic_fetch_add_unless(v, 1, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) if (counter <= (unsigned int)INT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) return (int)counter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) atomic_dec(v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /* Decrement the counter. Return the resulting value, or -EINVAL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) static int atomic_dec_return_safe(atomic_t *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) int counter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) counter = atomic_dec_return(v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) if (counter >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) return counter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) atomic_inc(v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) #define RBD_DRV_NAME "rbd"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #define RBD_MINORS_PER_MAJOR 256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) #define RBD_SINGLE_MAJOR_PART_SHIFT 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #define RBD_MAX_PARENT_CHAIN_LEN 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) #define RBD_SNAP_DEV_NAME_PREFIX "snap_"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) #define RBD_MAX_SNAP_NAME_LEN \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) #define RBD_SNAP_HEAD_NAME "-"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) #define BAD_SNAP_INDEX U32_MAX /* invalid index into snap array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) /* This allows a single page to hold an image name sent by OSD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) #define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) #define RBD_IMAGE_ID_LEN_MAX 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) #define RBD_OBJ_PREFIX_LEN_MAX 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) #define RBD_NOTIFY_TIMEOUT 5 /* seconds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) #define RBD_RETRY_DELAY msecs_to_jiffies(1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /* Feature bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) #define RBD_FEATURE_LAYERING (1ULL<<0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) #define RBD_FEATURE_STRIPINGV2 (1ULL<<1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) #define RBD_FEATURE_EXCLUSIVE_LOCK (1ULL<<2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) #define RBD_FEATURE_OBJECT_MAP (1ULL<<3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) #define RBD_FEATURE_FAST_DIFF (1ULL<<4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) #define RBD_FEATURE_DEEP_FLATTEN (1ULL<<5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) #define RBD_FEATURE_DATA_POOL (1ULL<<7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) #define RBD_FEATURE_OPERATIONS (1ULL<<8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) #define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) RBD_FEATURE_STRIPINGV2 | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) RBD_FEATURE_EXCLUSIVE_LOCK | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) RBD_FEATURE_OBJECT_MAP | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) RBD_FEATURE_FAST_DIFF | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) RBD_FEATURE_DEEP_FLATTEN | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) RBD_FEATURE_DATA_POOL | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) RBD_FEATURE_OPERATIONS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) /* Features supported by this (client software) implementation. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) #define RBD_FEATURES_SUPPORTED (RBD_FEATURES_ALL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * An RBD device name will be "rbd#", where the "rbd" comes from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * RBD_DRV_NAME above, and # is a unique integer identifier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) #define DEV_NAME_LEN 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * block device image metadata (in-memory version)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) struct rbd_image_header {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) /* These six fields never change for a given rbd image */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) char *object_prefix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) __u8 obj_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) u64 stripe_unit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) u64 stripe_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) s64 data_pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) u64 features; /* Might be changeable someday? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /* The remaining fields need to be updated occasionally */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) u64 image_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) struct ceph_snap_context *snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) char *snap_names; /* format 1 only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) u64 *snap_sizes; /* format 1 only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * An rbd image specification.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * identify an image. Each rbd_dev structure includes a pointer to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * an rbd_spec structure that encapsulates this identity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * Each of the id's in an rbd_spec has an associated name. For a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * user-mapped image, the names are supplied and the id's associated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * with them are looked up. For a layered image, a parent image is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * defined by the tuple, and the names are looked up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * An rbd_dev structure contains a parent_spec pointer which is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * non-null if the image it represents is a child in a layered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * image. This pointer will refer to the rbd_spec structure used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * by the parent rbd_dev for its own identity (i.e., the structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * is shared between the parent and child).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * Since these structures are populated once, during the discovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * phase of image construction, they are effectively immutable so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * we make no effort to synchronize access to them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * Note that code herein does not assume the image name is known (it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * could be a null pointer).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) struct rbd_spec {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) u64 pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) const char *pool_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) const char *pool_ns; /* NULL if default, never "" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) const char *image_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) const char *image_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) u64 snap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) const char *snap_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) struct kref kref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * an instance of the client. multiple devices may share an rbd client.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) struct rbd_client {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) struct ceph_client *client;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) struct kref kref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) struct list_head node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) struct pending_result {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) int result; /* first nonzero result */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) int num_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) struct rbd_img_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) enum obj_request_type {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) OBJ_REQUEST_NODATA = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) OBJ_REQUEST_BIO, /* pointer into provided bio (list) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) OBJ_REQUEST_BVECS, /* pointer into provided bio_vec array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) OBJ_REQUEST_OWN_BVECS, /* private bio_vec array, doesn't own pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) enum obj_operation_type {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) OBJ_OP_READ = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) OBJ_OP_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) OBJ_OP_DISCARD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) OBJ_OP_ZEROOUT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) #define RBD_OBJ_FLAG_DELETION (1U << 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) #define RBD_OBJ_FLAG_COPYUP_ENABLED (1U << 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) #define RBD_OBJ_FLAG_COPYUP_ZEROS (1U << 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) #define RBD_OBJ_FLAG_MAY_EXIST (1U << 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) #define RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT (1U << 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) enum rbd_obj_read_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) RBD_OBJ_READ_START = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) RBD_OBJ_READ_OBJECT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) RBD_OBJ_READ_PARENT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * Writes go through the following state machine to deal with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * layering:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * . . . . . RBD_OBJ_WRITE_GUARD. . . . . . . . . . . . . .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * . | .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * . v .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * . RBD_OBJ_WRITE_READ_FROM_PARENT. . . .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) * . | . .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * . v v (deep-copyup .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * (image . RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC . not needed) .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * flattened) v | . .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * . v . .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * . . . .RBD_OBJ_WRITE_COPYUP_OPS. . . . . (copyup .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * | not needed) v
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) * v .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) * done . . . . . . . . . . . . . . . . . .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * ^
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * RBD_OBJ_WRITE_FLAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * Writes start in RBD_OBJ_WRITE_GUARD or _FLAT, depending on whether
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * assert_exists guard is needed or not (in some cases it's not needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * even if there is a parent).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) enum rbd_obj_write_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) RBD_OBJ_WRITE_START = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) RBD_OBJ_WRITE_PRE_OBJECT_MAP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) RBD_OBJ_WRITE_OBJECT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) __RBD_OBJ_WRITE_COPYUP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) RBD_OBJ_WRITE_COPYUP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) RBD_OBJ_WRITE_POST_OBJECT_MAP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) enum rbd_obj_copyup_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) RBD_OBJ_COPYUP_START = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) RBD_OBJ_COPYUP_READ_PARENT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) __RBD_OBJ_COPYUP_OBJECT_MAPS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) RBD_OBJ_COPYUP_OBJECT_MAPS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) __RBD_OBJ_COPYUP_WRITE_OBJECT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) RBD_OBJ_COPYUP_WRITE_OBJECT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) struct rbd_obj_request {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) struct ceph_object_extent ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) unsigned int flags; /* RBD_OBJ_FLAG_* */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) enum rbd_obj_read_state read_state; /* for reads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) enum rbd_obj_write_state write_state; /* for writes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) struct rbd_img_request *img_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) struct ceph_file_extent *img_extents;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) u32 num_img_extents;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) struct ceph_bio_iter bio_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) struct ceph_bvec_iter bvec_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) u32 bvec_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) u32 bvec_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) enum rbd_obj_copyup_state copyup_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) struct bio_vec *copyup_bvecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) u32 copyup_bvec_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) struct list_head osd_reqs; /* w/ r_private_item */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) struct mutex state_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) struct pending_result pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) struct kref kref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) enum img_req_flags {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) IMG_REQ_CHILD, /* initiator: block = 0, child image = 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) IMG_REQ_LAYERED, /* ENOENT handling: normal = 0, layered = 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) enum rbd_img_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) RBD_IMG_START = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) RBD_IMG_EXCLUSIVE_LOCK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) __RBD_IMG_OBJECT_REQUESTS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) RBD_IMG_OBJECT_REQUESTS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) struct rbd_img_request {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) struct rbd_device *rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) enum obj_operation_type op_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) enum obj_request_type data_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) enum rbd_img_state state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) u64 snap_id; /* for reads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) struct ceph_snap_context *snapc; /* for writes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) struct rbd_obj_request *obj_request; /* obj req initiator */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) struct list_head lock_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) struct list_head object_extents; /* obj_req.ex structs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) struct mutex state_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) struct pending_result pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) struct work_struct work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) int work_result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) #define for_each_obj_request(ireq, oreq) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) list_for_each_entry(oreq, &(ireq)->object_extents, ex.oe_item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) #define for_each_obj_request_safe(ireq, oreq, n) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) list_for_each_entry_safe(oreq, n, &(ireq)->object_extents, ex.oe_item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) enum rbd_watch_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) RBD_WATCH_STATE_UNREGISTERED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) RBD_WATCH_STATE_REGISTERED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) RBD_WATCH_STATE_ERROR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) enum rbd_lock_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) RBD_LOCK_STATE_UNLOCKED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) RBD_LOCK_STATE_LOCKED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) RBD_LOCK_STATE_RELEASING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) /* WatchNotify::ClientId */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) struct rbd_client_id {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) u64 gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) u64 handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) struct rbd_mapping {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) u64 size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) * a single device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) struct rbd_device {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) int dev_id; /* blkdev unique id */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) int major; /* blkdev assigned major */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) int minor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) struct gendisk *disk; /* blkdev's gendisk and rq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) u32 image_format; /* Either 1 or 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) struct rbd_client *rbd_client;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) spinlock_t lock; /* queue, flags, open_count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) struct rbd_image_header header;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) unsigned long flags; /* possibly lock protected */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) struct rbd_spec *spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) struct rbd_options *opts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) char *config_info; /* add{,_single_major} string */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) struct ceph_object_id header_oid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) struct ceph_object_locator header_oloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) struct ceph_file_layout layout; /* used for all rbd requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) struct mutex watch_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) enum rbd_watch_state watch_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) struct ceph_osd_linger_request *watch_handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) u64 watch_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) struct delayed_work watch_dwork;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) struct rw_semaphore lock_rwsem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) enum rbd_lock_state lock_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) char lock_cookie[32];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) struct rbd_client_id owner_cid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) struct work_struct acquired_lock_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) struct work_struct released_lock_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) struct delayed_work lock_dwork;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) struct work_struct unlock_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) spinlock_t lock_lists_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) struct list_head acquiring_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) struct list_head running_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) struct completion acquire_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) int acquire_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) struct completion releasing_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) spinlock_t object_map_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) u8 *object_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) u64 object_map_size; /* in objects */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) u64 object_map_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) struct workqueue_struct *task_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) struct rbd_spec *parent_spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) u64 parent_overlap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) atomic_t parent_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) struct rbd_device *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) /* Block layer tags. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) struct blk_mq_tag_set tag_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) /* protects updating the header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) struct rw_semaphore header_rwsem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) struct rbd_mapping mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) struct list_head node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) /* sysfs related */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) struct device dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) unsigned long open_count; /* protected by lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * Flag bits for rbd_dev->flags:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * - REMOVING (which is coupled with rbd_dev->open_count) is protected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) * by rbd_dev->lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) enum rbd_dev_flags {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) RBD_DEV_FLAG_EXISTS, /* rbd_dev_device_setup() ran */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) RBD_DEV_FLAG_READONLY, /* -o ro or snapshot */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) static DEFINE_MUTEX(client_mutex); /* Serialize client creation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) static LIST_HEAD(rbd_dev_list); /* devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) static DEFINE_SPINLOCK(rbd_dev_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) static LIST_HEAD(rbd_client_list); /* clients */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) static DEFINE_SPINLOCK(rbd_client_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) /* Slab caches for frequently-allocated structures */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) static struct kmem_cache *rbd_img_request_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) static struct kmem_cache *rbd_obj_request_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) static int rbd_major;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) static DEFINE_IDA(rbd_dev_id_ida);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) static struct workqueue_struct *rbd_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) static struct ceph_snap_context rbd_empty_snapc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) .nref = REFCOUNT_INIT(1),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * single-major requires >= 0.75 version of userspace rbd utility.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) static bool single_major = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) module_param(single_major, bool, 0444);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) static ssize_t add_store(struct bus_type *bus, const char *buf, size_t count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) static ssize_t remove_store(struct bus_type *bus, const char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) size_t count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) static ssize_t add_single_major_store(struct bus_type *bus, const char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) size_t count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) static ssize_t remove_single_major_store(struct bus_type *bus, const char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) size_t count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) static int rbd_dev_id_to_minor(int dev_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) return dev_id << RBD_SINGLE_MAJOR_PART_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) static int minor_to_rbd_dev_id(int minor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) return minor >> RBD_SINGLE_MAJOR_PART_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) static bool rbd_is_ro(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) return test_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) static bool rbd_is_snap(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) return rbd_dev->spec->snap_id != CEPH_NOSNAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) lockdep_assert_held(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) static bool rbd_is_lock_owner(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) bool is_lock_owner;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) down_read(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) is_lock_owner = __rbd_is_lock_owner(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) up_read(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) return is_lock_owner;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) static ssize_t supported_features_show(struct bus_type *bus, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) static BUS_ATTR_WO(add);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) static BUS_ATTR_WO(remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) static BUS_ATTR_WO(add_single_major);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) static BUS_ATTR_WO(remove_single_major);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) static BUS_ATTR_RO(supported_features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) static struct attribute *rbd_bus_attrs[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) &bus_attr_add.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) &bus_attr_remove.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) &bus_attr_add_single_major.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) &bus_attr_remove_single_major.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) &bus_attr_supported_features.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) static umode_t rbd_bus_is_visible(struct kobject *kobj,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) struct attribute *attr, int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) if (!single_major &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) (attr == &bus_attr_add_single_major.attr ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) attr == &bus_attr_remove_single_major.attr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) return attr->mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) static const struct attribute_group rbd_bus_group = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) .attrs = rbd_bus_attrs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) .is_visible = rbd_bus_is_visible,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) __ATTRIBUTE_GROUPS(rbd_bus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) static struct bus_type rbd_bus_type = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) .name = "rbd",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) .bus_groups = rbd_bus_groups,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) static void rbd_root_dev_release(struct device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) static struct device rbd_root_dev = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) .init_name = "rbd",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) .release = rbd_root_dev_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) static __printf(2, 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) struct va_format vaf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) va_list args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) va_start(args, fmt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) vaf.fmt = fmt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) vaf.va = &args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) if (!rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) else if (rbd_dev->disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) printk(KERN_WARNING "%s: %s: %pV\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) else if (rbd_dev->spec && rbd_dev->spec->image_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) printk(KERN_WARNING "%s: image %s: %pV\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) else if (rbd_dev->spec && rbd_dev->spec->image_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) printk(KERN_WARNING "%s: id %s: %pV\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) else /* punt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) printk(KERN_WARNING "%s: rbd_dev %p: %pV\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) RBD_DRV_NAME, rbd_dev, &vaf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) va_end(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) #ifdef RBD_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) #define rbd_assert(expr) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) if (unlikely(!(expr))) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) printk(KERN_ERR "\nAssertion failure in %s() " \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) "at line %d:\n\n" \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) "\trbd_assert(%s);\n\n", \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) __func__, __LINE__, #expr); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) BUG(); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) #else /* !RBD_DEBUG */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) # define rbd_assert(expr) ((void) 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) #endif /* !RBD_DEBUG */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) static int rbd_dev_refresh(struct rbd_device *rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) static int rbd_dev_header_info(struct rbd_device *rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) u64 snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) u8 *order, u64 *snap_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) static int rbd_dev_v2_get_flags(struct rbd_device *rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) static void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) static void rbd_img_handle_request(struct rbd_img_request *img_req, int result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) * Return true if nothing else is pending.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) static bool pending_result_dec(struct pending_result *pending, int *result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) rbd_assert(pending->num_pending > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) if (*result && !pending->result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) pending->result = *result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) if (--pending->num_pending)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) *result = pending->result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) static int rbd_open(struct block_device *bdev, fmode_t mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) bool removing = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) spin_lock_irq(&rbd_dev->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) removing = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) rbd_dev->open_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) spin_unlock_irq(&rbd_dev->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) if (removing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) (void) get_device(&rbd_dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) static void rbd_release(struct gendisk *disk, fmode_t mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) struct rbd_device *rbd_dev = disk->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) unsigned long open_count_before;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) spin_lock_irq(&rbd_dev->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) open_count_before = rbd_dev->open_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) spin_unlock_irq(&rbd_dev->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) rbd_assert(open_count_before > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) put_device(&rbd_dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) int ro;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) if (get_user(ro, (int __user *)arg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) * Both images mapped read-only and snapshots can't be marked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) * read-write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) if (!ro) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) if (rbd_is_ro(rbd_dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) rbd_assert(!rbd_is_snap(rbd_dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) /* Let blkdev_roset() handle it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) return -ENOTTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) static int rbd_ioctl(struct block_device *bdev, fmode_t mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) unsigned int cmd, unsigned long arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) switch (cmd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) case BLKROSET:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) ret = rbd_ioctl_set_ro(rbd_dev, arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) ret = -ENOTTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) static int rbd_compat_ioctl(struct block_device *bdev, fmode_t mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) unsigned int cmd, unsigned long arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) return rbd_ioctl(bdev, mode, cmd, arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) #endif /* CONFIG_COMPAT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) static const struct block_device_operations rbd_bd_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) .owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) .open = rbd_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) .release = rbd_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) .ioctl = rbd_ioctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) .compat_ioctl = rbd_compat_ioctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) * Initialize an rbd client instance. Success or not, this function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) * consumes ceph_opts. Caller holds client_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) struct rbd_client *rbdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) int ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) dout("%s:\n", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) if (!rbdc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) goto out_opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) kref_init(&rbdc->kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) INIT_LIST_HEAD(&rbdc->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) rbdc->client = ceph_create_client(ceph_opts, rbdc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) if (IS_ERR(rbdc->client))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) goto out_rbdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) ret = ceph_open_session(rbdc->client);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) goto out_client;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) spin_lock(&rbd_client_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) list_add_tail(&rbdc->node, &rbd_client_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) spin_unlock(&rbd_client_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) dout("%s: rbdc %p\n", __func__, rbdc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) return rbdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) out_client:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) ceph_destroy_client(rbdc->client);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) out_rbdc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) kfree(rbdc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) out_opt:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) if (ceph_opts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) ceph_destroy_options(ceph_opts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) dout("%s: error %d\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) static struct rbd_client *__rbd_get_client(struct rbd_client *rbdc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) kref_get(&rbdc->kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) return rbdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) * Find a ceph client with specific addr and configuration. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) * found, bump its reference count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) struct rbd_client *client_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) bool found = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) if (ceph_opts->flags & CEPH_OPT_NOSHARE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) spin_lock(&rbd_client_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) list_for_each_entry(client_node, &rbd_client_list, node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) if (!ceph_compare_options(ceph_opts, client_node->client)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) __rbd_get_client(client_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) found = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) spin_unlock(&rbd_client_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) return found ? client_node : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) * (Per device) rbd map options
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) Opt_queue_depth,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) Opt_alloc_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) Opt_lock_timeout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) /* int args above */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) Opt_pool_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) Opt_compression_hint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) /* string args above */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) Opt_read_only,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) Opt_read_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) Opt_lock_on_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) Opt_exclusive,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) Opt_notrim,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) Opt_compression_hint_none,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) Opt_compression_hint_compressible,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) Opt_compression_hint_incompressible,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) static const struct constant_table rbd_param_compression_hint[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) {"none", Opt_compression_hint_none},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) {"compressible", Opt_compression_hint_compressible},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) {"incompressible", Opt_compression_hint_incompressible},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) static const struct fs_parameter_spec rbd_parameters[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) fsparam_u32 ("alloc_size", Opt_alloc_size),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) fsparam_enum ("compression_hint", Opt_compression_hint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) rbd_param_compression_hint),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) fsparam_flag ("exclusive", Opt_exclusive),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) fsparam_flag ("lock_on_read", Opt_lock_on_read),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) fsparam_u32 ("lock_timeout", Opt_lock_timeout),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) fsparam_flag ("notrim", Opt_notrim),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) fsparam_string ("_pool_ns", Opt_pool_ns),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) fsparam_u32 ("queue_depth", Opt_queue_depth),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) fsparam_flag ("read_only", Opt_read_only),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) fsparam_flag ("read_write", Opt_read_write),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) fsparam_flag ("ro", Opt_read_only),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) fsparam_flag ("rw", Opt_read_write),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) struct rbd_options {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) int queue_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) int alloc_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) unsigned long lock_timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) bool read_only;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) bool lock_on_read;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) bool exclusive;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) bool trim;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) u32 alloc_hint_flags; /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) #define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) #define RBD_ALLOC_SIZE_DEFAULT (64 * 1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) #define RBD_LOCK_TIMEOUT_DEFAULT 0 /* no timeout */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) #define RBD_READ_ONLY_DEFAULT false
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) #define RBD_LOCK_ON_READ_DEFAULT false
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) #define RBD_EXCLUSIVE_DEFAULT false
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) #define RBD_TRIM_DEFAULT true
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) struct rbd_parse_opts_ctx {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) struct rbd_spec *spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) struct ceph_options *copts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) struct rbd_options *opts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) static char* obj_op_name(enum obj_operation_type op_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) switch (op_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) case OBJ_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) return "read";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) case OBJ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) return "write";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) case OBJ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) return "discard";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) case OBJ_OP_ZEROOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) return "zeroout";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) return "???";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) * Destroy ceph client
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) * Caller must hold rbd_client_list_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) static void rbd_client_release(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) dout("%s: rbdc %p\n", __func__, rbdc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) spin_lock(&rbd_client_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) list_del(&rbdc->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) spin_unlock(&rbd_client_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) ceph_destroy_client(rbdc->client);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) kfree(rbdc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) * Drop reference to ceph client node. If it's not referenced anymore, release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) * it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) static void rbd_put_client(struct rbd_client *rbdc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) if (rbdc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) kref_put(&rbdc->kref, rbd_client_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) * Get a ceph client with specific addr and configuration, if one does
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) * not exist create it. Either way, ceph_opts is consumed by this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) * function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) struct rbd_client *rbdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) mutex_lock(&client_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) rbdc = rbd_client_find(ceph_opts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) if (rbdc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) ceph_destroy_options(ceph_opts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) * Using an existing client. Make sure ->pg_pools is up to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) * date before we look up the pool id in do_rbd_add().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) ret = ceph_wait_for_latest_osdmap(rbdc->client,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) rbdc->client->options->mount_timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) rbd_warn(NULL, "failed to get latest osdmap: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) rbd_put_client(rbdc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) rbdc = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) rbdc = rbd_client_create(ceph_opts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) mutex_unlock(&client_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) return rbdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) static bool rbd_image_format_valid(u32 image_format)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) return image_format == 1 || image_format == 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) u32 snap_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) /* The header has to start with the magic rbd header text */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) /* The bio layer requires at least sector-sized I/O */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) if (ondisk->options.order < SECTOR_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) /* If we use u64 in a few spots we may be able to loosen this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) if (ondisk->options.order > 8 * sizeof (int) - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) * The size of a snapshot header has to fit in a size_t, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) * that limits the number of snapshots.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) snap_count = le32_to_cpu(ondisk->snap_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) size = SIZE_MAX - sizeof (struct ceph_snap_context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) if (snap_count > size / sizeof (__le64))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) * Not only that, but the size of the entire the snapshot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) * header must also be representable in a size_t.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) size -= snap_count * sizeof (__le64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) if ((u64) size < le64_to_cpu(ondisk->snap_names_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) * returns the size of an object in the image
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) static u32 rbd_obj_bytes(struct rbd_image_header *header)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) return 1U << header->obj_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) static void rbd_init_layout(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) if (rbd_dev->header.stripe_unit == 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) rbd_dev->header.stripe_count == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) rbd_dev->header.stripe_unit = rbd_obj_bytes(&rbd_dev->header);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) rbd_dev->header.stripe_count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) rbd_dev->layout.stripe_unit = rbd_dev->header.stripe_unit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) rbd_dev->layout.stripe_count = rbd_dev->header.stripe_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) rbd_dev->layout.object_size = rbd_obj_bytes(&rbd_dev->header);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) rbd_dev->layout.pool_id = rbd_dev->header.data_pool_id == CEPH_NOPOOL ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) rbd_dev->spec->pool_id : rbd_dev->header.data_pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) * Fill an rbd image header with information from the given format 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) * on-disk header.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) static int rbd_header_from_disk(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) struct rbd_image_header_ondisk *ondisk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) struct rbd_image_header *header = &rbd_dev->header;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) bool first_time = header->object_prefix == NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) struct ceph_snap_context *snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) char *object_prefix = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) char *snap_names = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) u64 *snap_sizes = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) u32 snap_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) int ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) /* Allocate this now to avoid having to handle failure below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) if (first_time) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) object_prefix = kstrndup(ondisk->object_prefix,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) sizeof(ondisk->object_prefix),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) if (!object_prefix)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) /* Allocate the snapshot context and fill it in */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) snap_count = le32_to_cpu(ondisk->snap_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) snapc = ceph_create_snap_context(snap_count, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) if (!snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) snapc->seq = le64_to_cpu(ondisk->snap_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) if (snap_count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) struct rbd_image_snap_ondisk *snaps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) /* We'll keep a copy of the snapshot names... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) if (snap_names_len > (u64)SIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) goto out_2big;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) snap_names = kmalloc(snap_names_len, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) if (!snap_names)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) /* ...as well as the array of their sizes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) snap_sizes = kmalloc_array(snap_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) sizeof(*header->snap_sizes),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) if (!snap_sizes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) * Copy the names, and fill in each snapshot's id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) * and size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) * Note that rbd_dev_v1_header_info() guarantees the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) * ondisk buffer we're working with has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) * snap_names_len bytes beyond the end of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) * snapshot id array, this memcpy() is safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) memcpy(snap_names, &ondisk->snaps[snap_count], snap_names_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) snaps = ondisk->snaps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) for (i = 0; i < snap_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) snapc->snaps[i] = le64_to_cpu(snaps[i].id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) snap_sizes[i] = le64_to_cpu(snaps[i].image_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) /* We won't fail any more, fill in the header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) if (first_time) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) header->object_prefix = object_prefix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) header->obj_order = ondisk->options.order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) rbd_init_layout(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) ceph_put_snap_context(header->snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) kfree(header->snap_names);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) kfree(header->snap_sizes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) /* The remaining fields always get updated (when we refresh) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) header->image_size = le64_to_cpu(ondisk->image_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) header->snapc = snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) header->snap_names = snap_names;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) header->snap_sizes = snap_sizes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) out_2big:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) kfree(snap_sizes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) kfree(snap_names);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) ceph_put_snap_context(snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) kfree(object_prefix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) static const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) const char *snap_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) rbd_assert(which < rbd_dev->header.snapc->num_snaps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) /* Skip over names until we find the one we are looking for */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) snap_name = rbd_dev->header.snap_names;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) while (which--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) snap_name += strlen(snap_name) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) return kstrdup(snap_name, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) * Snapshot id comparison function for use with qsort()/bsearch().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) * Note that result is for snapshots in *descending* order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) static int snapid_compare_reverse(const void *s1, const void *s2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) u64 snap_id1 = *(u64 *)s1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) u64 snap_id2 = *(u64 *)s2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) if (snap_id1 < snap_id2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) return snap_id1 == snap_id2 ? 0 : -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) * Search a snapshot context to see if the given snapshot id is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) * present.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) * Returns the position of the snapshot id in the array if it's found,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) * or BAD_SNAP_INDEX otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) * Note: The snapshot array is in kept sorted (by the osd) in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) * reverse order, highest snapshot id first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) static u32 rbd_dev_snap_index(struct rbd_device *rbd_dev, u64 snap_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) struct ceph_snap_context *snapc = rbd_dev->header.snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) u64 *found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) found = bsearch(&snap_id, &snapc->snaps, snapc->num_snaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) sizeof (snap_id), snapid_compare_reverse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) return found ? (u32)(found - &snapc->snaps[0]) : BAD_SNAP_INDEX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) static const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) u64 snap_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) u32 which;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) const char *snap_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) which = rbd_dev_snap_index(rbd_dev, snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) if (which == BAD_SNAP_INDEX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) return ERR_PTR(-ENOENT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) snap_name = _rbd_dev_v1_snap_name(rbd_dev, which);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) return snap_name ? snap_name : ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) if (snap_id == CEPH_NOSNAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) return RBD_SNAP_HEAD_NAME;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) if (rbd_dev->image_format == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) return rbd_dev_v1_snap_name(rbd_dev, snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) return rbd_dev_v2_snap_name(rbd_dev, snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) static int rbd_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) u64 *snap_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) if (snap_id == CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) *snap_size = rbd_dev->header.image_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) } else if (rbd_dev->image_format == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) u32 which;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) which = rbd_dev_snap_index(rbd_dev, snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) if (which == BAD_SNAP_INDEX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) *snap_size = rbd_dev->header.snap_sizes[which];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) u64 size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, NULL, &size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) *snap_size = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) static int rbd_dev_mapping_set(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) u64 snap_id = rbd_dev->spec->snap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) u64 size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) ret = rbd_snap_size(rbd_dev, snap_id, &size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) rbd_dev->mapping.size = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) rbd_dev->mapping.size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) static void zero_bvec(struct bio_vec *bv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) void *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) buf = bvec_kmap_irq(bv, &flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) memset(buf, 0, bv->bv_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) flush_dcache_page(bv->bv_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) bvec_kunmap_irq(buf, &flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) static void zero_bios(struct ceph_bio_iter *bio_pos, u32 off, u32 bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) struct ceph_bio_iter it = *bio_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) ceph_bio_iter_advance(&it, off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) ceph_bio_iter_advance_step(&it, bytes, ({
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) zero_bvec(&bv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) }));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) static void zero_bvecs(struct ceph_bvec_iter *bvec_pos, u32 off, u32 bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) struct ceph_bvec_iter it = *bvec_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) ceph_bvec_iter_advance(&it, off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) ceph_bvec_iter_advance_step(&it, bytes, ({
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) zero_bvec(&bv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) }));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) * Zero a range in @obj_req data buffer defined by a bio (list) or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) * (private) bio_vec array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) * @off is relative to the start of the data buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) static void rbd_obj_zero_range(struct rbd_obj_request *obj_req, u32 off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) u32 bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) dout("%s %p data buf %u~%u\n", __func__, obj_req, off, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) switch (obj_req->img_request->data_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) case OBJ_REQUEST_BIO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) zero_bios(&obj_req->bio_pos, off, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) case OBJ_REQUEST_BVECS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) case OBJ_REQUEST_OWN_BVECS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) zero_bvecs(&obj_req->bvec_pos, off, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) static void rbd_obj_request_destroy(struct kref *kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) rbd_assert(obj_request != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) dout("%s: obj %p (was %d)\n", __func__, obj_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) kref_read(&obj_request->kref));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) kref_put(&obj_request->kref, rbd_obj_request_destroy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) struct rbd_obj_request *obj_request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) rbd_assert(obj_request->img_request == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) /* Image request now owns object's original reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) obj_request->img_request = img_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) struct rbd_obj_request *obj_request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) list_del(&obj_request->ex.oe_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) rbd_assert(obj_request->img_request == img_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) rbd_obj_request_put(obj_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) static void rbd_osd_submit(struct ceph_osd_request *osd_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) struct rbd_obj_request *obj_req = osd_req->r_priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) dout("%s osd_req %p for obj_req %p objno %llu %llu~%llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) __func__, osd_req, obj_req, obj_req->ex.oe_objno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) obj_req->ex.oe_off, obj_req->ex.oe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) * The default/initial value for all image request flags is 0. Each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) * is conditionally set to 1 at image request initialization time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) * and currently never change thereafter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) static void img_request_layered_set(struct rbd_img_request *img_request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) set_bit(IMG_REQ_LAYERED, &img_request->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) static bool img_request_layered_test(struct rbd_img_request *img_request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) static bool rbd_obj_is_entire(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) return !obj_req->ex.oe_off &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) obj_req->ex.oe_len == rbd_dev->layout.object_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) return obj_req->ex.oe_off + obj_req->ex.oe_len ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) rbd_dev->layout.object_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) * Must be called after rbd_obj_calc_img_extents().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) static bool rbd_obj_copyup_enabled(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) if (!obj_req->num_img_extents ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) (rbd_obj_is_entire(obj_req) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) !obj_req->img_request->snapc->num_snaps))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) return ceph_file_extents_bytes(obj_req->img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) obj_req->num_img_extents);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) static bool rbd_img_is_write(struct rbd_img_request *img_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) switch (img_req->op_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) case OBJ_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) case OBJ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) case OBJ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) case OBJ_OP_ZEROOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) static void rbd_osd_req_callback(struct ceph_osd_request *osd_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) struct rbd_obj_request *obj_req = osd_req->r_priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) int result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) osd_req->r_result, obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) * Writes aren't allowed to return a data payload. In some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) * guarded write cases (e.g. stat + zero on an empty object)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) * a stat response makes it through, but we don't care.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) if (osd_req->r_result > 0 && rbd_img_is_write(obj_req->img_request))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) result = osd_req->r_result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) rbd_obj_handle_request(obj_req, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) static void rbd_osd_format_read(struct ceph_osd_request *osd_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) struct rbd_obj_request *obj_request = osd_req->r_priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) struct ceph_options *opt = rbd_dev->rbd_client->client->options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) osd_req->r_flags = CEPH_OSD_FLAG_READ | opt->read_from_replica;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) osd_req->r_snapid = obj_request->img_request->snap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) static void rbd_osd_format_write(struct ceph_osd_request *osd_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) struct rbd_obj_request *obj_request = osd_req->r_priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) osd_req->r_flags = CEPH_OSD_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) ktime_get_real_ts64(&osd_req->r_mtime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) osd_req->r_data_offset = obj_request->ex.oe_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) static struct ceph_osd_request *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) __rbd_obj_add_osd_request(struct rbd_obj_request *obj_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) struct ceph_snap_context *snapc, int num_ops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) const char *name_format = rbd_dev->image_format == 1 ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) RBD_V1_DATA_FORMAT : RBD_V2_DATA_FORMAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) if (!req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) list_add_tail(&req->r_private_item, &obj_req->osd_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) req->r_callback = rbd_osd_req_callback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) req->r_priv = obj_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) * Data objects may be stored in a separate pool, but always in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) * the same namespace in that pool as the header in its pool.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) req->r_base_oloc.pool = rbd_dev->layout.pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) ret = ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) rbd_dev->header.object_prefix,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) obj_req->ex.oe_objno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) return req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) static struct ceph_osd_request *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) num_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) static struct rbd_obj_request *rbd_obj_request_create(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) struct rbd_obj_request *obj_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) if (!obj_request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) ceph_object_extent_init(&obj_request->ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) INIT_LIST_HEAD(&obj_request->osd_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) mutex_init(&obj_request->state_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) kref_init(&obj_request->kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) dout("%s %p\n", __func__, obj_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) return obj_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) static void rbd_obj_request_destroy(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) struct rbd_obj_request *obj_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) struct ceph_osd_request *osd_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) obj_request = container_of(kref, struct rbd_obj_request, kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) dout("%s: obj %p\n", __func__, obj_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) while (!list_empty(&obj_request->osd_reqs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) osd_req = list_first_entry(&obj_request->osd_reqs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) struct ceph_osd_request, r_private_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) list_del_init(&osd_req->r_private_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) ceph_osdc_put_request(osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) switch (obj_request->img_request->data_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) case OBJ_REQUEST_NODATA:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) case OBJ_REQUEST_BIO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) case OBJ_REQUEST_BVECS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) break; /* Nothing to do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) case OBJ_REQUEST_OWN_BVECS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) kfree(obj_request->bvec_pos.bvecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) kfree(obj_request->img_extents);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) if (obj_request->copyup_bvecs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) for (i = 0; i < obj_request->copyup_bvec_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) if (obj_request->copyup_bvecs[i].bv_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) __free_page(obj_request->copyup_bvecs[i].bv_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) kfree(obj_request->copyup_bvecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) kmem_cache_free(rbd_obj_request_cache, obj_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) /* It's OK to call this for a device with no parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) static void rbd_spec_put(struct rbd_spec *spec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) static void rbd_dev_unparent(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) rbd_dev_remove_parent(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) rbd_spec_put(rbd_dev->parent_spec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) rbd_dev->parent_spec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) rbd_dev->parent_overlap = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) * Parent image reference counting is used to determine when an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) * image's parent fields can be safely torn down--after there are no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) * more in-flight requests to the parent image. When the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) * reference is dropped, cleaning them up is safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) static void rbd_dev_parent_put(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) int counter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) if (!rbd_dev->parent_spec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) counter = atomic_dec_return_safe(&rbd_dev->parent_ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) if (counter > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) /* Last reference; clean up parent data structures */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) if (!counter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) rbd_dev_unparent(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) rbd_warn(rbd_dev, "parent reference underflow");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) * If an image has a non-zero parent overlap, get a reference to its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) * parent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) * Returns true if the rbd device has a parent with a non-zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) * overlap and a reference for it was successfully taken, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) * false otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) int counter = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) if (!rbd_dev->parent_spec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) if (rbd_dev->parent_overlap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) counter = atomic_inc_return_safe(&rbd_dev->parent_ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) if (counter < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) rbd_warn(rbd_dev, "parent reference overflow");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) return counter > 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) static void rbd_img_request_init(struct rbd_img_request *img_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) enum obj_operation_type op_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) memset(img_request, 0, sizeof(*img_request));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) img_request->rbd_dev = rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) img_request->op_type = op_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) INIT_LIST_HEAD(&img_request->lock_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) INIT_LIST_HEAD(&img_request->object_extents);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) mutex_init(&img_request->state_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) static void rbd_img_capture_header(struct rbd_img_request *img_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) struct rbd_device *rbd_dev = img_req->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) lockdep_assert_held(&rbd_dev->header_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) if (rbd_img_is_write(img_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) img_req->snap_id = rbd_dev->spec->snap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) if (rbd_dev_parent_get(rbd_dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) img_request_layered_set(img_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) static void rbd_img_request_destroy(struct rbd_img_request *img_request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) struct rbd_obj_request *obj_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) struct rbd_obj_request *next_obj_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) dout("%s: img %p\n", __func__, img_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) WARN_ON(!list_empty(&img_request->lock_item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) for_each_obj_request_safe(img_request, obj_request, next_obj_request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) rbd_img_obj_request_del(img_request, obj_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) if (img_request_layered_test(img_request))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) rbd_dev_parent_put(img_request->rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) if (rbd_img_is_write(img_request))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) ceph_put_snap_context(img_request->snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) if (test_bit(IMG_REQ_CHILD, &img_request->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) kmem_cache_free(rbd_img_request_cache, img_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) #define BITS_PER_OBJ 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) #define OBJS_PER_BYTE (BITS_PER_BYTE / BITS_PER_OBJ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) #define OBJ_MASK ((1 << BITS_PER_OBJ) - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) static void __rbd_object_map_index(struct rbd_device *rbd_dev, u64 objno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) u64 *index, u8 *shift)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) u32 off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) rbd_assert(objno < rbd_dev->object_map_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) *index = div_u64_rem(objno, OBJS_PER_BYTE, &off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) *shift = (OBJS_PER_BYTE - off - 1) * BITS_PER_OBJ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) static u8 __rbd_object_map_get(struct rbd_device *rbd_dev, u64 objno)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) u64 index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) u8 shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) lockdep_assert_held(&rbd_dev->object_map_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) __rbd_object_map_index(rbd_dev, objno, &index, &shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) return (rbd_dev->object_map[index] >> shift) & OBJ_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) static void __rbd_object_map_set(struct rbd_device *rbd_dev, u64 objno, u8 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) u64 index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) u8 shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) u8 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) lockdep_assert_held(&rbd_dev->object_map_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) rbd_assert(!(val & ~OBJ_MASK));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) __rbd_object_map_index(rbd_dev, objno, &index, &shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) p = &rbd_dev->object_map[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) *p = (*p & ~(OBJ_MASK << shift)) | (val << shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) static u8 rbd_object_map_get(struct rbd_device *rbd_dev, u64 objno)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) u8 state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) spin_lock(&rbd_dev->object_map_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) state = __rbd_object_map_get(rbd_dev, objno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) spin_unlock(&rbd_dev->object_map_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) return state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) static bool use_object_map(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) * An image mapped read-only can't use the object map -- it isn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) * loaded because the header lock isn't acquired. Someone else can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) * write to the image and update the object map behind our back.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) * A snapshot can't be written to, so using the object map is always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) * safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) if (!rbd_is_snap(rbd_dev) && rbd_is_ro(rbd_dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) return ((rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) !(rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) static bool rbd_object_map_may_exist(struct rbd_device *rbd_dev, u64 objno)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) u8 state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) /* fall back to default logic if object map is disabled or invalid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) if (!use_object_map(rbd_dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) state = rbd_object_map_get(rbd_dev, objno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) return state != OBJECT_NONEXISTENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) static void rbd_object_map_name(struct rbd_device *rbd_dev, u64 snap_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) struct ceph_object_id *oid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) if (snap_id == CEPH_NOSNAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) ceph_oid_printf(oid, "%s%s", RBD_OBJECT_MAP_PREFIX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) rbd_dev->spec->image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) ceph_oid_printf(oid, "%s%s.%016llx", RBD_OBJECT_MAP_PREFIX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) rbd_dev->spec->image_id, snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) static int rbd_object_map_lock(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) CEPH_DEFINE_OID_ONSTACK(oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) u8 lock_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) char *lock_tag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) struct ceph_locker *lockers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) u32 num_lockers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) bool broke_lock = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) rbd_object_map_name(rbd_dev, CEPH_NOSNAP, &oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) ret = ceph_cls_lock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) CEPH_CLS_LOCK_EXCLUSIVE, "", "", "", 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) if (ret != -EBUSY || broke_lock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) if (ret == -EEXIST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) ret = 0; /* already locked by myself */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) rbd_warn(rbd_dev, "failed to lock object map: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) ret = ceph_cls_lock_info(osdc, &oid, &rbd_dev->header_oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) RBD_LOCK_NAME, &lock_type, &lock_tag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) &lockers, &num_lockers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) if (ret == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) rbd_warn(rbd_dev, "failed to get object map lockers: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) kfree(lock_tag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) if (num_lockers == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) rbd_warn(rbd_dev, "breaking object map lock owned by %s%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) ENTITY_NAME(lockers[0].id.name));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) ret = ceph_cls_break_lock(osdc, &oid, &rbd_dev->header_oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) RBD_LOCK_NAME, lockers[0].id.cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) &lockers[0].id.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) ceph_free_lockers(lockers, num_lockers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) if (ret == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) rbd_warn(rbd_dev, "failed to break object map lock: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) broke_lock = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) static void rbd_object_map_unlock(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) CEPH_DEFINE_OID_ONSTACK(oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) rbd_object_map_name(rbd_dev, CEPH_NOSNAP, &oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) ret = ceph_cls_unlock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) if (ret && ret != -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) rbd_warn(rbd_dev, "failed to unlock object map: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) static int decode_object_map_header(void **p, void *end, u64 *object_map_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) u8 struct_v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) u32 struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) u32 header_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) void *header_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) ceph_decode_32_safe(p, end, header_len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) header_end = *p + header_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) ret = ceph_start_decoding(p, end, 1, "BitVector header", &struct_v,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) &struct_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) ceph_decode_64_safe(p, end, *object_map_size, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) *p = header_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) static int __rbd_object_map_load(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) CEPH_DEFINE_OID_ONSTACK(oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) struct page **pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) void *p, *end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) size_t reply_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) u64 num_objects;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) u64 object_map_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) u64 object_map_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) int num_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) rbd_assert(!rbd_dev->object_map && !rbd_dev->object_map_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) num_objects = ceph_get_num_objects(&rbd_dev->layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) rbd_dev->mapping.size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) object_map_bytes = DIV_ROUND_UP_ULL(num_objects * BITS_PER_OBJ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) BITS_PER_BYTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) num_pages = calc_pages_for(0, object_map_bytes) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) if (IS_ERR(pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) return PTR_ERR(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) reply_len = num_pages * PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) rbd_object_map_name(rbd_dev, rbd_dev->spec->snap_id, &oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) ret = ceph_osdc_call(osdc, &oid, &rbd_dev->header_oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) "rbd", "object_map_load", CEPH_OSD_FLAG_READ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) NULL, 0, pages, &reply_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) p = page_address(pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) end = p + min(reply_len, (size_t)PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) ret = decode_object_map_header(&p, end, &object_map_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) if (object_map_size != num_objects) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) rbd_warn(rbd_dev, "object map size mismatch: %llu vs %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) object_map_size, num_objects);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) if (offset_in_page(p) + object_map_bytes > reply_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) rbd_dev->object_map = kvmalloc(object_map_bytes, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) if (!rbd_dev->object_map) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) rbd_dev->object_map_size = object_map_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) ceph_copy_from_page_vector(pages, rbd_dev->object_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) offset_in_page(p), object_map_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) ceph_release_page_vector(pages, num_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) static void rbd_object_map_free(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) kvfree(rbd_dev->object_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) rbd_dev->object_map = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) rbd_dev->object_map_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) static int rbd_object_map_load(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) ret = __rbd_object_map_load(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) ret = rbd_dev_v2_get_flags(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) rbd_object_map_free(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) if (rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) rbd_warn(rbd_dev, "object map is invalid");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) static int rbd_object_map_open(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) ret = rbd_object_map_lock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) ret = rbd_object_map_load(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) rbd_object_map_unlock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) static void rbd_object_map_close(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) rbd_object_map_free(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) rbd_object_map_unlock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) * This function needs snap_id (or more precisely just something to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) * distinguish between HEAD and snapshot object maps), new_state and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) * current_state that were passed to rbd_object_map_update().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) * To avoid allocating and stashing a context we piggyback on the OSD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) * request. A HEAD update has two ops (assert_locked). For new_state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) * and current_state we decode our own object_map_update op, encoded in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) * rbd_cls_object_map_update().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) static int rbd_object_map_update_finish(struct rbd_obj_request *obj_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) struct ceph_osd_request *osd_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) struct ceph_osd_data *osd_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) u64 objno;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) u8 state, new_state, current_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) bool has_current_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) if (osd_req->r_result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) return osd_req->r_result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) * Nothing to do for a snapshot object map.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) if (osd_req->r_num_ops == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) * Update in-memory HEAD object map.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) rbd_assert(osd_req->r_num_ops == 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) osd_data = osd_req_op_data(osd_req, 1, cls, request_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) rbd_assert(osd_data->type == CEPH_OSD_DATA_TYPE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) p = page_address(osd_data->pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) objno = ceph_decode_64(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) rbd_assert(objno == obj_req->ex.oe_objno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) rbd_assert(ceph_decode_64(&p) == objno + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) new_state = ceph_decode_8(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) has_current_state = ceph_decode_8(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) if (has_current_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) current_state = ceph_decode_8(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) spin_lock(&rbd_dev->object_map_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) state = __rbd_object_map_get(rbd_dev, objno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) if (!has_current_state || current_state == state ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) (current_state == OBJECT_EXISTS && state == OBJECT_EXISTS_CLEAN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) __rbd_object_map_set(rbd_dev, objno, new_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) spin_unlock(&rbd_dev->object_map_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) static void rbd_object_map_callback(struct ceph_osd_request *osd_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) struct rbd_obj_request *obj_req = osd_req->r_priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) int result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) osd_req->r_result, obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) result = rbd_object_map_update_finish(obj_req, osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) rbd_obj_handle_request(obj_req, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) static bool update_needed(struct rbd_device *rbd_dev, u64 objno, u8 new_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) u8 state = rbd_object_map_get(rbd_dev, objno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) if (state == new_state ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) (new_state == OBJECT_PENDING && state == OBJECT_NONEXISTENT) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) (new_state == OBJECT_NONEXISTENT && state != OBJECT_PENDING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) static int rbd_cls_object_map_update(struct ceph_osd_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) int which, u64 objno, u8 new_state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) const u8 *current_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) struct page **pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) void *p, *start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) ret = osd_req_op_cls_init(req, which, "rbd", "object_map_update");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) pages = ceph_alloc_page_vector(1, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) if (IS_ERR(pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) return PTR_ERR(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) p = start = page_address(pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) ceph_encode_64(&p, objno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) ceph_encode_64(&p, objno + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) ceph_encode_8(&p, new_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) if (current_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) ceph_encode_8(&p, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) ceph_encode_8(&p, *current_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) ceph_encode_8(&p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) osd_req_op_cls_request_data_pages(req, which, pages, p - start, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) false, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) * Return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) * 0 - object map update sent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) * 1 - object map update isn't needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) * <0 - error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) static int rbd_object_map_update(struct rbd_obj_request *obj_req, u64 snap_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) u8 new_state, const u8 *current_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) int num_ops = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) int which = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) if (snap_id == CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) if (!update_needed(rbd_dev, obj_req->ex.oe_objno, new_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) num_ops++; /* assert_locked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) req = ceph_osdc_alloc_request(osdc, NULL, num_ops, false, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) if (!req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) list_add_tail(&req->r_private_item, &obj_req->osd_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) req->r_callback = rbd_object_map_callback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) req->r_priv = obj_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) rbd_object_map_name(rbd_dev, snap_id, &req->r_base_oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) req->r_flags = CEPH_OSD_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) ktime_get_real_ts64(&req->r_mtime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) if (snap_id == CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) * Protect against possible race conditions during lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) * ownership transitions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) ret = ceph_cls_assert_locked(req, which++, RBD_LOCK_NAME,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) CEPH_CLS_LOCK_EXCLUSIVE, "", "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) ret = rbd_cls_object_map_update(req, which, obj_req->ex.oe_objno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) new_state, current_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) ceph_osdc_start_request(osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) static void prune_extents(struct ceph_file_extent *img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) u32 *num_img_extents, u64 overlap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) u32 cnt = *num_img_extents;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) /* drop extents completely beyond the overlap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) while (cnt && img_extents[cnt - 1].fe_off >= overlap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) cnt--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) if (cnt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) struct ceph_file_extent *ex = &img_extents[cnt - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) /* trim final overlapping extent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) if (ex->fe_off + ex->fe_len > overlap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) ex->fe_len = overlap - ex->fe_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) *num_img_extents = cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) * Determine the byte range(s) covered by either just the object extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) * or the entire object in the parent image.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) static int rbd_obj_calc_img_extents(struct rbd_obj_request *obj_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) bool entire)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) if (!rbd_dev->parent_overlap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) ret = ceph_extent_to_file(&rbd_dev->layout, obj_req->ex.oe_objno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) entire ? 0 : obj_req->ex.oe_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) entire ? rbd_dev->layout.object_size :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) obj_req->ex.oe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) &obj_req->img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) &obj_req->num_img_extents);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) prune_extents(obj_req->img_extents, &obj_req->num_img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) rbd_dev->parent_overlap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) static void rbd_osd_setup_data(struct ceph_osd_request *osd_req, int which)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) struct rbd_obj_request *obj_req = osd_req->r_priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) switch (obj_req->img_request->data_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) case OBJ_REQUEST_BIO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) osd_req_op_extent_osd_data_bio(osd_req, which,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) &obj_req->bio_pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) obj_req->ex.oe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) case OBJ_REQUEST_BVECS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) case OBJ_REQUEST_OWN_BVECS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) rbd_assert(obj_req->bvec_pos.iter.bi_size ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) obj_req->ex.oe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) rbd_assert(obj_req->bvec_idx == obj_req->bvec_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) osd_req_op_extent_osd_data_bvec_pos(osd_req, which,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) &obj_req->bvec_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) static int rbd_osd_setup_stat(struct ceph_osd_request *osd_req, int which)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) struct page **pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) * The response data for a STAT call consists of:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) * le64 length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) * struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) * le32 tv_sec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) * le32 tv_nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) * } mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) pages = ceph_alloc_page_vector(1, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) if (IS_ERR(pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) return PTR_ERR(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) osd_req_op_init(osd_req, which, CEPH_OSD_OP_STAT, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) osd_req_op_raw_data_in_pages(osd_req, which, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) 8 + sizeof(struct ceph_timespec),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 0, false, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) static int rbd_osd_setup_copyup(struct ceph_osd_request *osd_req, int which,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) u32 bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) struct rbd_obj_request *obj_req = osd_req->r_priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) ret = osd_req_op_cls_init(osd_req, which, "rbd", "copyup");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) osd_req_op_cls_request_data_bvecs(osd_req, which, obj_req->copyup_bvecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) obj_req->copyup_bvec_count, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) static int rbd_obj_init_read(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) obj_req->read_state = RBD_OBJ_READ_START;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) static void __rbd_osd_setup_write_ops(struct ceph_osd_request *osd_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) int which)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) struct rbd_obj_request *obj_req = osd_req->r_priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) u16 opcode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) if (!use_object_map(rbd_dev) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) osd_req_op_alloc_hint_init(osd_req, which++,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) rbd_dev->layout.object_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) rbd_dev->layout.object_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) rbd_dev->opts->alloc_hint_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) if (rbd_obj_is_entire(obj_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) opcode = CEPH_OSD_OP_WRITEFULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) opcode = CEPH_OSD_OP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) osd_req_op_extent_init(osd_req, which, opcode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) rbd_osd_setup_data(osd_req, which);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) static int rbd_obj_init_write(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) /* reverse map the entire object onto the parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) ret = rbd_obj_calc_img_extents(obj_req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) if (rbd_obj_copyup_enabled(obj_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) obj_req->write_state = RBD_OBJ_WRITE_START;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) static u16 truncate_or_zero_opcode(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) return rbd_obj_is_tail(obj_req) ? CEPH_OSD_OP_TRUNCATE :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) CEPH_OSD_OP_ZERO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) static void __rbd_osd_setup_discard_ops(struct ceph_osd_request *osd_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) int which)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) struct rbd_obj_request *obj_req = osd_req->r_priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) osd_req_op_init(osd_req, which, CEPH_OSD_OP_DELETE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) osd_req_op_extent_init(osd_req, which,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) truncate_or_zero_opcode(obj_req),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) obj_req->ex.oe_off, obj_req->ex.oe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) static int rbd_obj_init_discard(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) u64 off, next_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) * Align the range to alloc_size boundary and punt on discards
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) * that are too small to free up any space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) * alloc_size == object_size && is_tail() is a special case for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) * filestore with filestore_punch_hole = false, needed to allow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) * truncate (in addition to delete).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) if (rbd_dev->opts->alloc_size != rbd_dev->layout.object_size ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) !rbd_obj_is_tail(obj_req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) off = round_up(obj_req->ex.oe_off, rbd_dev->opts->alloc_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) next_off = round_down(obj_req->ex.oe_off + obj_req->ex.oe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) rbd_dev->opts->alloc_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) if (off >= next_off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) dout("%s %p %llu~%llu -> %llu~%llu\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) obj_req, obj_req->ex.oe_off, obj_req->ex.oe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) off, next_off - off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) obj_req->ex.oe_off = off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) obj_req->ex.oe_len = next_off - off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) /* reverse map the entire object onto the parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) ret = rbd_obj_calc_img_extents(obj_req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) obj_req->flags |= RBD_OBJ_FLAG_DELETION;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) obj_req->write_state = RBD_OBJ_WRITE_START;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) static void __rbd_osd_setup_zeroout_ops(struct ceph_osd_request *osd_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) int which)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) struct rbd_obj_request *obj_req = osd_req->r_priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) u16 opcode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) if (rbd_obj_is_entire(obj_req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) if (obj_req->num_img_extents) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) if (!(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) osd_req_op_init(osd_req, which++,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) CEPH_OSD_OP_CREATE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) opcode = CEPH_OSD_OP_TRUNCATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) osd_req_op_init(osd_req, which++,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) CEPH_OSD_OP_DELETE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) opcode = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) opcode = truncate_or_zero_opcode(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) if (opcode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) osd_req_op_extent_init(osd_req, which, opcode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) obj_req->ex.oe_off, obj_req->ex.oe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) static int rbd_obj_init_zeroout(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) /* reverse map the entire object onto the parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) ret = rbd_obj_calc_img_extents(obj_req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) if (rbd_obj_copyup_enabled(obj_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) if (!obj_req->num_img_extents) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) if (rbd_obj_is_entire(obj_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) obj_req->flags |= RBD_OBJ_FLAG_DELETION;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) obj_req->write_state = RBD_OBJ_WRITE_START;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) static int count_write_ops(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) struct rbd_img_request *img_req = obj_req->img_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) switch (img_req->op_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) case OBJ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) if (!use_object_map(img_req->rbd_dev) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) return 2; /* setallochint + write/writefull */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) return 1; /* write/writefull */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) case OBJ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) return 1; /* delete/truncate/zero */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) case OBJ_OP_ZEROOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) if (rbd_obj_is_entire(obj_req) && obj_req->num_img_extents &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) !(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) return 2; /* create + truncate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) return 1; /* delete/truncate/zero */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) static void rbd_osd_setup_write_ops(struct ceph_osd_request *osd_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) int which)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) struct rbd_obj_request *obj_req = osd_req->r_priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) switch (obj_req->img_request->op_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) case OBJ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) __rbd_osd_setup_write_ops(osd_req, which);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) case OBJ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) __rbd_osd_setup_discard_ops(osd_req, which);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) case OBJ_OP_ZEROOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) __rbd_osd_setup_zeroout_ops(osd_req, which);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) * Prune the list of object requests (adjust offset and/or length, drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) * redundant requests). Prepare object request state machines and image
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) * request state machine for execution.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) static int __rbd_img_fill_request(struct rbd_img_request *img_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) struct rbd_obj_request *obj_req, *next_obj_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) for_each_obj_request_safe(img_req, obj_req, next_obj_req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) switch (img_req->op_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) case OBJ_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) ret = rbd_obj_init_read(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) case OBJ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) ret = rbd_obj_init_write(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) case OBJ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) ret = rbd_obj_init_discard(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) case OBJ_OP_ZEROOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) ret = rbd_obj_init_zeroout(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) rbd_img_obj_request_del(img_req, obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) img_req->state = RBD_IMG_START;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) union rbd_img_fill_iter {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) struct ceph_bio_iter bio_iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) struct ceph_bvec_iter bvec_iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) struct rbd_img_fill_ctx {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) enum obj_request_type pos_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) union rbd_img_fill_iter *pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) union rbd_img_fill_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) ceph_object_extent_fn_t set_pos_fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) ceph_object_extent_fn_t count_fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) ceph_object_extent_fn_t copy_fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) static struct ceph_object_extent *alloc_object_extent(void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) struct rbd_img_request *img_req = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) struct rbd_obj_request *obj_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) obj_req = rbd_obj_request_create();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) if (!obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) rbd_img_obj_request_add(img_req, obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) return &obj_req->ex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) * While su != os && sc == 1 is technically not fancy (it's the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) * layout as su == os && sc == 1), we can't use the nocopy path for it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) * because ->set_pos_fn() should be called only once per object.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) * ceph_file_to_extents() invokes action_fn once per stripe unit, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) * treat su != os && sc == 1 as fancy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) static bool rbd_layout_is_fancy(struct ceph_file_layout *l)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) return l->stripe_unit != l->object_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) struct ceph_file_extent *img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) u32 num_img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) struct rbd_img_fill_ctx *fctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) img_req->data_type = fctx->pos_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) * Create object requests and set each object request's starting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) * position in the provided bio (list) or bio_vec array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) fctx->iter = *fctx->pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) for (i = 0; i < num_img_extents; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) ret = ceph_file_to_extents(&img_req->rbd_dev->layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) img_extents[i].fe_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) img_extents[i].fe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) &img_req->object_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) alloc_object_extent, img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) fctx->set_pos_fn, &fctx->iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) return __rbd_img_fill_request(img_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) * Map a list of image extents to a list of object extents, create the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) * corresponding object requests (normally each to a different object,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) * but not always) and add them to @img_req. For each object request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) * set up its data descriptor to point to the corresponding chunk(s) of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) * @fctx->pos data buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) * Because ceph_file_to_extents() will merge adjacent object extents
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) * together, each object request's data descriptor may point to multiple
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) * different chunks of @fctx->pos data buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) * @fctx->pos data buffer is assumed to be large enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) static int rbd_img_fill_request(struct rbd_img_request *img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) struct ceph_file_extent *img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) u32 num_img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) struct rbd_img_fill_ctx *fctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) struct rbd_device *rbd_dev = img_req->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) struct rbd_obj_request *obj_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) if (fctx->pos_type == OBJ_REQUEST_NODATA ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) !rbd_layout_is_fancy(&rbd_dev->layout))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) return rbd_img_fill_request_nocopy(img_req, img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) num_img_extents, fctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) img_req->data_type = OBJ_REQUEST_OWN_BVECS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) * Create object requests and determine ->bvec_count for each object
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) * request. Note that ->bvec_count sum over all object requests may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) * be greater than the number of bio_vecs in the provided bio (list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) * or bio_vec array because when mapped, those bio_vecs can straddle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) * stripe unit boundaries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) fctx->iter = *fctx->pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) for (i = 0; i < num_img_extents; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) ret = ceph_file_to_extents(&rbd_dev->layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) img_extents[i].fe_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) img_extents[i].fe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) &img_req->object_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) alloc_object_extent, img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) fctx->count_fn, &fctx->iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) for_each_obj_request(img_req, obj_req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) sizeof(*obj_req->bvec_pos.bvecs),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) if (!obj_req->bvec_pos.bvecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) * Fill in each object request's private bio_vec array, splitting and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) * rearranging the provided bio_vecs in stripe unit chunks as needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) fctx->iter = *fctx->pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) for (i = 0; i < num_img_extents; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) ret = ceph_iterate_extents(&rbd_dev->layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) img_extents[i].fe_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) img_extents[i].fe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) &img_req->object_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) fctx->copy_fn, &fctx->iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) return __rbd_img_fill_request(img_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) static int rbd_img_fill_nodata(struct rbd_img_request *img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) u64 off, u64 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) struct ceph_file_extent ex = { off, len };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) union rbd_img_fill_iter dummy = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) struct rbd_img_fill_ctx fctx = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) .pos_type = OBJ_REQUEST_NODATA,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) .pos = &dummy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) return rbd_img_fill_request(img_req, &ex, 1, &fctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) static void set_bio_pos(struct ceph_object_extent *ex, u32 bytes, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) struct rbd_obj_request *obj_req =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) container_of(ex, struct rbd_obj_request, ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) struct ceph_bio_iter *it = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) obj_req->bio_pos = *it;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) ceph_bio_iter_advance(it, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) static void count_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) struct rbd_obj_request *obj_req =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) container_of(ex, struct rbd_obj_request, ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) struct ceph_bio_iter *it = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) ceph_bio_iter_advance_step(it, bytes, ({
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) obj_req->bvec_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) }));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) static void copy_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) struct rbd_obj_request *obj_req =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) container_of(ex, struct rbd_obj_request, ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) struct ceph_bio_iter *it = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) ceph_bio_iter_advance_step(it, bytes, ({
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) obj_req->bvec_pos.iter.bi_size += bv.bv_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) }));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) static int __rbd_img_fill_from_bio(struct rbd_img_request *img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) struct ceph_file_extent *img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) u32 num_img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) struct ceph_bio_iter *bio_pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) struct rbd_img_fill_ctx fctx = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) .pos_type = OBJ_REQUEST_BIO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) .pos = (union rbd_img_fill_iter *)bio_pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) .set_pos_fn = set_bio_pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) .count_fn = count_bio_bvecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) .copy_fn = copy_bio_bvecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) return rbd_img_fill_request(img_req, img_extents, num_img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) &fctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) static int rbd_img_fill_from_bio(struct rbd_img_request *img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) u64 off, u64 len, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) struct ceph_file_extent ex = { off, len };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) struct ceph_bio_iter it = { .bio = bio, .iter = bio->bi_iter };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) return __rbd_img_fill_from_bio(img_req, &ex, 1, &it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) static void set_bvec_pos(struct ceph_object_extent *ex, u32 bytes, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) struct rbd_obj_request *obj_req =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) container_of(ex, struct rbd_obj_request, ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) struct ceph_bvec_iter *it = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) obj_req->bvec_pos = *it;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) ceph_bvec_iter_shorten(&obj_req->bvec_pos, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) ceph_bvec_iter_advance(it, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) static void count_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) struct rbd_obj_request *obj_req =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) container_of(ex, struct rbd_obj_request, ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) struct ceph_bvec_iter *it = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) ceph_bvec_iter_advance_step(it, bytes, ({
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) obj_req->bvec_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) }));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) static void copy_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) struct rbd_obj_request *obj_req =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) container_of(ex, struct rbd_obj_request, ex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) struct ceph_bvec_iter *it = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) ceph_bvec_iter_advance_step(it, bytes, ({
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) obj_req->bvec_pos.iter.bi_size += bv.bv_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) }));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) static int __rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) struct ceph_file_extent *img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) u32 num_img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) struct ceph_bvec_iter *bvec_pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) struct rbd_img_fill_ctx fctx = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) .pos_type = OBJ_REQUEST_BVECS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) .pos = (union rbd_img_fill_iter *)bvec_pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) .set_pos_fn = set_bvec_pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) .count_fn = count_bvecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) .copy_fn = copy_bvecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) return rbd_img_fill_request(img_req, img_extents, num_img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) &fctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) static int rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) struct ceph_file_extent *img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) u32 num_img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) struct bio_vec *bvecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) struct ceph_bvec_iter it = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) .bvecs = bvecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) .iter = { .bi_size = ceph_file_extents_bytes(img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) num_img_extents) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) return __rbd_img_fill_from_bvecs(img_req, img_extents, num_img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) &it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) static void rbd_img_handle_request_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) struct rbd_img_request *img_req =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) container_of(work, struct rbd_img_request, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) rbd_img_handle_request(img_req, img_req->work_result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) static void rbd_img_schedule(struct rbd_img_request *img_req, int result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) INIT_WORK(&img_req->work, rbd_img_handle_request_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) img_req->work_result = result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) queue_work(rbd_wq, &img_req->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) static bool rbd_obj_may_exist(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) dout("%s %p objno %llu assuming dne\n", __func__, obj_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) obj_req->ex.oe_objno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) static int rbd_obj_read_object(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) struct ceph_osd_request *osd_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) osd_req = __rbd_obj_add_osd_request(obj_req, NULL, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) if (IS_ERR(osd_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) return PTR_ERR(osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) osd_req_op_extent_init(osd_req, 0, CEPH_OSD_OP_READ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) rbd_osd_setup_data(osd_req, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) rbd_osd_format_read(osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) rbd_osd_submit(osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) struct rbd_img_request *img_req = obj_req->img_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) struct rbd_device *parent = img_req->rbd_dev->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) struct rbd_img_request *child_img_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) child_img_req = kmem_cache_alloc(rbd_img_request_cache, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) if (!child_img_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) rbd_img_request_init(child_img_req, parent, OBJ_OP_READ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) __set_bit(IMG_REQ_CHILD, &child_img_req->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) child_img_req->obj_request = obj_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) down_read(&parent->header_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) rbd_img_capture_header(child_img_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) up_read(&parent->header_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) dout("%s child_img_req %p for obj_req %p\n", __func__, child_img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) if (!rbd_img_is_write(img_req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) switch (img_req->data_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) case OBJ_REQUEST_BIO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) ret = __rbd_img_fill_from_bio(child_img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) obj_req->img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) obj_req->num_img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) &obj_req->bio_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) case OBJ_REQUEST_BVECS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) case OBJ_REQUEST_OWN_BVECS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) ret = __rbd_img_fill_from_bvecs(child_img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) obj_req->img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) obj_req->num_img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) &obj_req->bvec_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) ret = rbd_img_fill_from_bvecs(child_img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) obj_req->img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) obj_req->num_img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) obj_req->copyup_bvecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) rbd_img_request_destroy(child_img_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) /* avoid parent chain recursion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) rbd_img_schedule(child_img_req, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) static bool rbd_obj_advance_read(struct rbd_obj_request *obj_req, int *result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) switch (obj_req->read_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) case RBD_OBJ_READ_START:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) rbd_assert(!*result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) if (!rbd_obj_may_exist(obj_req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) *result = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) obj_req->read_state = RBD_OBJ_READ_OBJECT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) ret = rbd_obj_read_object(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) *result = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) obj_req->read_state = RBD_OBJ_READ_OBJECT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) case RBD_OBJ_READ_OBJECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) if (*result == -ENOENT && rbd_dev->parent_overlap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) /* reverse map this object extent onto the parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) ret = rbd_obj_calc_img_extents(obj_req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) *result = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) if (obj_req->num_img_extents) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) ret = rbd_obj_read_from_parent(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) *result = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) obj_req->read_state = RBD_OBJ_READ_PARENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) * -ENOENT means a hole in the image -- zero-fill the entire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) * length of the request. A short read also implies zero-fill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) * to the end of the request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) if (*result == -ENOENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) rbd_obj_zero_range(obj_req, 0, obj_req->ex.oe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) *result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) } else if (*result >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) if (*result < obj_req->ex.oe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) rbd_obj_zero_range(obj_req, *result,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) obj_req->ex.oe_len - *result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) rbd_assert(*result == obj_req->ex.oe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) *result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) case RBD_OBJ_READ_PARENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) * The parent image is read only up to the overlap -- zero-fill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) * from the overlap to the end of the request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) if (!*result) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) u32 obj_overlap = rbd_obj_img_extents_bytes(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) if (obj_overlap < obj_req->ex.oe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) rbd_obj_zero_range(obj_req, obj_overlap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) obj_req->ex.oe_len - obj_overlap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) static bool rbd_obj_write_is_noop(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) if (!(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) (obj_req->flags & RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) dout("%s %p noop for nonexistent\n", __func__, obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) * Return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) * 0 - object map update sent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) * 1 - object map update isn't needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) * <0 - error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) static int rbd_obj_write_pre_object_map(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) u8 new_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) if (obj_req->flags & RBD_OBJ_FLAG_DELETION)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) new_state = OBJECT_PENDING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) new_state = OBJECT_EXISTS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) return rbd_object_map_update(obj_req, CEPH_NOSNAP, new_state, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) static int rbd_obj_write_object(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) struct ceph_osd_request *osd_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) int num_ops = count_write_ops(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) int which = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) num_ops++; /* stat */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) osd_req = rbd_obj_add_osd_request(obj_req, num_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) if (IS_ERR(osd_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) return PTR_ERR(osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) ret = rbd_osd_setup_stat(osd_req, which++);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) rbd_osd_setup_write_ops(osd_req, which);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) rbd_osd_format_write(osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) rbd_osd_submit(osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) * copyup_bvecs pages are never highmem pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) struct ceph_bvec_iter it = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) .bvecs = bvecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) .iter = { .bi_size = bytes },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) ceph_bvec_iter_advance_step(&it, bytes, ({
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) if (memchr_inv(page_address(bv.bv_page) + bv.bv_offset, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) bv.bv_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) }));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) #define MODS_ONLY U32_MAX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) static int rbd_obj_copyup_empty_snapc(struct rbd_obj_request *obj_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) u32 bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) struct ceph_osd_request *osd_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) rbd_assert(bytes > 0 && bytes != MODS_ONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) osd_req = __rbd_obj_add_osd_request(obj_req, &rbd_empty_snapc, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) if (IS_ERR(osd_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) return PTR_ERR(osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) ret = rbd_osd_setup_copyup(osd_req, 0, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) rbd_osd_format_write(osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) rbd_osd_submit(osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) static int rbd_obj_copyup_current_snapc(struct rbd_obj_request *obj_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) u32 bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) struct ceph_osd_request *osd_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) int num_ops = count_write_ops(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) int which = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) if (bytes != MODS_ONLY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) num_ops++; /* copyup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) osd_req = rbd_obj_add_osd_request(obj_req, num_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) if (IS_ERR(osd_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) return PTR_ERR(osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) if (bytes != MODS_ONLY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) ret = rbd_osd_setup_copyup(osd_req, which++, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) rbd_osd_setup_write_ops(osd_req, which);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) rbd_osd_format_write(osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) rbd_osd_submit(osd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) rbd_assert(!obj_req->copyup_bvecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) obj_req->copyup_bvec_count = calc_pages_for(0, obj_overlap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) obj_req->copyup_bvecs = kcalloc(obj_req->copyup_bvec_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) sizeof(*obj_req->copyup_bvecs),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) if (!obj_req->copyup_bvecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) for (i = 0; i < obj_req->copyup_bvec_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) unsigned int len = min(obj_overlap, (u64)PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) obj_req->copyup_bvecs[i].bv_page = alloc_page(GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) if (!obj_req->copyup_bvecs[i].bv_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) obj_req->copyup_bvecs[i].bv_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) obj_req->copyup_bvecs[i].bv_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) obj_overlap -= len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) rbd_assert(!obj_overlap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) * The target object doesn't exist. Read the data for the entire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) * target object up to the overlap point (if any) from the parent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) * so we can use it for a copyup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) static int rbd_obj_copyup_read_parent(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) rbd_assert(obj_req->num_img_extents);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) prune_extents(obj_req->img_extents, &obj_req->num_img_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) rbd_dev->parent_overlap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) if (!obj_req->num_img_extents) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) * The overlap has become 0 (most likely because the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) * image has been flattened). Re-submit the original write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) * request -- pass MODS_ONLY since the copyup isn't needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) * anymore.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) return rbd_obj_copyup_current_snapc(obj_req, MODS_ONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) ret = setup_copyup_bvecs(obj_req, rbd_obj_img_extents_bytes(obj_req));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) return rbd_obj_read_from_parent(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) static void rbd_obj_copyup_object_maps(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) struct ceph_snap_context *snapc = obj_req->img_request->snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) u8 new_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) for (i = 0; i < snapc->num_snaps; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) if ((rbd_dev->header.features & RBD_FEATURE_FAST_DIFF) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) i + 1 < snapc->num_snaps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) new_state = OBJECT_EXISTS_CLEAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) new_state = OBJECT_EXISTS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) ret = rbd_object_map_update(obj_req, snapc->snaps[i],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) new_state, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) obj_req->pending.result = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) rbd_assert(!ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) obj_req->pending.num_pending++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) static void rbd_obj_copyup_write_object(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) u32 bytes = rbd_obj_img_extents_bytes(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) * Only send non-zero copyup data to save some I/O and network
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) * bandwidth -- zero copyup data is equivalent to the object not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) * existing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) if (obj_req->img_request->snapc->num_snaps && bytes > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) * Send a copyup request with an empty snapshot context to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) * deep-copyup the object through all existing snapshots.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) * A second request with the current snapshot context will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) * sent for the actual modification.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) ret = rbd_obj_copyup_empty_snapc(obj_req, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) obj_req->pending.result = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) obj_req->pending.num_pending++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) bytes = MODS_ONLY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) ret = rbd_obj_copyup_current_snapc(obj_req, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) obj_req->pending.result = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) obj_req->pending.num_pending++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) static bool rbd_obj_advance_copyup(struct rbd_obj_request *obj_req, int *result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) switch (obj_req->copyup_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) case RBD_OBJ_COPYUP_START:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) rbd_assert(!*result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) ret = rbd_obj_copyup_read_parent(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) *result = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) if (obj_req->num_img_extents)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) obj_req->copyup_state = RBD_OBJ_COPYUP_READ_PARENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) case RBD_OBJ_COPYUP_READ_PARENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) if (*result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) if (is_zero_bvecs(obj_req->copyup_bvecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) rbd_obj_img_extents_bytes(obj_req))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) dout("%s %p detected zeros\n", __func__, obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ZEROS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) rbd_obj_copyup_object_maps(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) if (!obj_req->pending.num_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) *result = obj_req->pending.result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) obj_req->copyup_state = RBD_OBJ_COPYUP_OBJECT_MAPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) obj_req->copyup_state = __RBD_OBJ_COPYUP_OBJECT_MAPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) case __RBD_OBJ_COPYUP_OBJECT_MAPS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) if (!pending_result_dec(&obj_req->pending, result))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) case RBD_OBJ_COPYUP_OBJECT_MAPS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) if (*result) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) rbd_warn(rbd_dev, "snap object map update failed: %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) *result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) rbd_obj_copyup_write_object(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) if (!obj_req->pending.num_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) *result = obj_req->pending.result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) obj_req->copyup_state = __RBD_OBJ_COPYUP_WRITE_OBJECT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) case __RBD_OBJ_COPYUP_WRITE_OBJECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) if (!pending_result_dec(&obj_req->pending, result))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) case RBD_OBJ_COPYUP_WRITE_OBJECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) * Return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) * 0 - object map update sent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) * 1 - object map update isn't needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) * <0 - error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) static int rbd_obj_write_post_object_map(struct rbd_obj_request *obj_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) u8 current_state = OBJECT_PENDING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) if (!(obj_req->flags & RBD_OBJ_FLAG_DELETION))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) return rbd_object_map_update(obj_req, CEPH_NOSNAP, OBJECT_NONEXISTENT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) ¤t_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) switch (obj_req->write_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) case RBD_OBJ_WRITE_START:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) rbd_assert(!*result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) if (rbd_obj_write_is_noop(obj_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) ret = rbd_obj_write_pre_object_map(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) *result = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) obj_req->write_state = RBD_OBJ_WRITE_PRE_OBJECT_MAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) case RBD_OBJ_WRITE_PRE_OBJECT_MAP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) if (*result) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) rbd_warn(rbd_dev, "pre object map update failed: %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) *result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) ret = rbd_obj_write_object(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) *result = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) obj_req->write_state = RBD_OBJ_WRITE_OBJECT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) case RBD_OBJ_WRITE_OBJECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) if (*result == -ENOENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) *result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) obj_req->copyup_state = RBD_OBJ_COPYUP_START;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) obj_req->write_state = __RBD_OBJ_WRITE_COPYUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) * On a non-existent object:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) * delete - -ENOENT, truncate/zero - 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) if (obj_req->flags & RBD_OBJ_FLAG_DELETION)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) *result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) if (*result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) obj_req->write_state = RBD_OBJ_WRITE_COPYUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) case __RBD_OBJ_WRITE_COPYUP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) if (!rbd_obj_advance_copyup(obj_req, result))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) case RBD_OBJ_WRITE_COPYUP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) if (*result) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) rbd_warn(rbd_dev, "copyup failed: %d", *result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) ret = rbd_obj_write_post_object_map(obj_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) *result = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) obj_req->write_state = RBD_OBJ_WRITE_POST_OBJECT_MAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) case RBD_OBJ_WRITE_POST_OBJECT_MAP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) if (*result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) rbd_warn(rbd_dev, "post object map update failed: %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) *result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) * Return true if @obj_req is completed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) int *result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) struct rbd_img_request *img_req = obj_req->img_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) struct rbd_device *rbd_dev = img_req->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) bool done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) mutex_lock(&obj_req->state_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) if (!rbd_img_is_write(img_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) done = rbd_obj_advance_read(obj_req, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) done = rbd_obj_advance_write(obj_req, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) mutex_unlock(&obj_req->state_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) if (done && *result) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) rbd_assert(*result < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) rbd_warn(rbd_dev, "%s at objno %llu %llu~%llu result %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) obj_op_name(img_req->op_type), obj_req->ex.oe_objno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) obj_req->ex.oe_off, obj_req->ex.oe_len, *result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) return done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) * This is open-coded in rbd_img_handle_request() to avoid parent chain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) * recursion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) static void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) if (__rbd_obj_handle_request(obj_req, &result))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) rbd_img_handle_request(obj_req->img_request, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) static bool need_exclusive_lock(struct rbd_img_request *img_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) struct rbd_device *rbd_dev = img_req->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) if (rbd_is_ro(rbd_dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) if (rbd_dev->opts->lock_on_read ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) return rbd_img_is_write(img_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) static bool rbd_lock_add_request(struct rbd_img_request *img_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) struct rbd_device *rbd_dev = img_req->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) bool locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) lockdep_assert_held(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) locked = rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) spin_lock(&rbd_dev->lock_lists_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) rbd_assert(list_empty(&img_req->lock_item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) if (!locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) list_add_tail(&img_req->lock_item, &rbd_dev->acquiring_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) list_add_tail(&img_req->lock_item, &rbd_dev->running_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) spin_unlock(&rbd_dev->lock_lists_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) return locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) static void rbd_lock_del_request(struct rbd_img_request *img_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) struct rbd_device *rbd_dev = img_req->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) bool need_wakeup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) lockdep_assert_held(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) spin_lock(&rbd_dev->lock_lists_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) rbd_assert(!list_empty(&img_req->lock_item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) list_del_init(&img_req->lock_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) list_empty(&rbd_dev->running_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) spin_unlock(&rbd_dev->lock_lists_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) if (need_wakeup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) complete(&rbd_dev->releasing_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) struct rbd_device *rbd_dev = img_req->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) if (!need_exclusive_lock(img_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) if (rbd_lock_add_request(img_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) if (rbd_dev->opts->exclusive) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) WARN_ON(1); /* lock got released? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) * Note the use of mod_delayed_work() in rbd_acquire_lock()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) * and cancel_delayed_work() in wake_lock_waiters().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) dout("%s rbd_dev %p queueing lock_dwork\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) static void rbd_img_object_requests(struct rbd_img_request *img_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) struct rbd_obj_request *obj_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) for_each_obj_request(img_req, obj_req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) int result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) if (__rbd_obj_handle_request(obj_req, &result)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) if (result) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) img_req->pending.result = result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) img_req->pending.num_pending++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) struct rbd_device *rbd_dev = img_req->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) switch (img_req->state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) case RBD_IMG_START:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) rbd_assert(!*result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) ret = rbd_img_exclusive_lock(img_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) *result = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) img_req->state = RBD_IMG_EXCLUSIVE_LOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) case RBD_IMG_EXCLUSIVE_LOCK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) if (*result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) rbd_assert(!need_exclusive_lock(img_req) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) __rbd_is_lock_owner(rbd_dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) rbd_img_object_requests(img_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) if (!img_req->pending.num_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) *result = img_req->pending.result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) img_req->state = RBD_IMG_OBJECT_REQUESTS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) img_req->state = __RBD_IMG_OBJECT_REQUESTS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) case __RBD_IMG_OBJECT_REQUESTS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) if (!pending_result_dec(&img_req->pending, result))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) case RBD_IMG_OBJECT_REQUESTS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) * Return true if @img_req is completed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) static bool __rbd_img_handle_request(struct rbd_img_request *img_req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) int *result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) struct rbd_device *rbd_dev = img_req->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) bool done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) if (need_exclusive_lock(img_req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) down_read(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) mutex_lock(&img_req->state_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) done = rbd_img_advance(img_req, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) if (done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) rbd_lock_del_request(img_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) mutex_unlock(&img_req->state_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) up_read(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) mutex_lock(&img_req->state_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) done = rbd_img_advance(img_req, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) mutex_unlock(&img_req->state_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) if (done && *result) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) rbd_assert(*result < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) rbd_warn(rbd_dev, "%s%s result %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) test_bit(IMG_REQ_CHILD, &img_req->flags) ? "child " : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) obj_op_name(img_req->op_type), *result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) return done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) static void rbd_img_handle_request(struct rbd_img_request *img_req, int result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) if (!__rbd_img_handle_request(img_req, &result))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) if (test_bit(IMG_REQ_CHILD, &img_req->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) struct rbd_obj_request *obj_req = img_req->obj_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) rbd_img_request_destroy(img_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) if (__rbd_obj_handle_request(obj_req, &result)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) img_req = obj_req->img_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) struct request *rq = blk_mq_rq_from_pdu(img_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) rbd_img_request_destroy(img_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) blk_mq_end_request(rq, errno_to_blk_status(result));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) static const struct rbd_client_id rbd_empty_cid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) static bool rbd_cid_equal(const struct rbd_client_id *lhs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) const struct rbd_client_id *rhs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) return lhs->gid == rhs->gid && lhs->handle == rhs->handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) static struct rbd_client_id rbd_get_cid(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) struct rbd_client_id cid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) mutex_lock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) cid.gid = ceph_client_gid(rbd_dev->rbd_client->client);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) cid.handle = rbd_dev->watch_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) mutex_unlock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) return cid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) * lock_rwsem must be held for write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) static void rbd_set_owner_cid(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) const struct rbd_client_id *cid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) dout("%s rbd_dev %p %llu-%llu -> %llu-%llu\n", __func__, rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) cid->gid, cid->handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) rbd_dev->owner_cid = *cid; /* struct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) static void format_lock_cookie(struct rbd_device *rbd_dev, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) mutex_lock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) sprintf(buf, "%s %llu", RBD_LOCK_COOKIE_PREFIX, rbd_dev->watch_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) mutex_unlock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) static void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) struct rbd_client_id cid = rbd_get_cid(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) strcpy(rbd_dev->lock_cookie, cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) rbd_set_owner_cid(rbd_dev, &cid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) * lock_rwsem must be held for write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) static int rbd_lock(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) char cookie[32];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) WARN_ON(__rbd_is_lock_owner(rbd_dev) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) rbd_dev->lock_cookie[0] != '\0');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) format_lock_cookie(rbd_dev, cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) RBD_LOCK_NAME, CEPH_CLS_LOCK_EXCLUSIVE, cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) RBD_LOCK_TAG, "", 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) __rbd_lock(rbd_dev, cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) * lock_rwsem must be held for write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) static void rbd_unlock(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) WARN_ON(!__rbd_is_lock_owner(rbd_dev) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) rbd_dev->lock_cookie[0] == '\0');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) RBD_LOCK_NAME, rbd_dev->lock_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) if (ret && ret != -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) rbd_warn(rbd_dev, "failed to unlock header: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) /* treat errors as the image is unlocked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) rbd_dev->lock_cookie[0] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) queue_work(rbd_dev->task_wq, &rbd_dev->released_lock_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) static int __rbd_notify_op_lock(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) enum rbd_notify_op notify_op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) struct page ***preply_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) size_t *preply_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) struct rbd_client_id cid = rbd_get_cid(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) char buf[4 + 8 + 8 + CEPH_ENCODING_START_BLK_LEN];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) int buf_size = sizeof(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) void *p = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) dout("%s rbd_dev %p notify_op %d\n", __func__, rbd_dev, notify_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) /* encode *LockPayload NotifyMessage (op + ClientId) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) ceph_start_encoding(&p, 2, 1, buf_size - CEPH_ENCODING_START_BLK_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) ceph_encode_32(&p, notify_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) ceph_encode_64(&p, cid.gid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768) ceph_encode_64(&p, cid.handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) return ceph_osdc_notify(osdc, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) &rbd_dev->header_oloc, buf, buf_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) RBD_NOTIFY_TIMEOUT, preply_pages, preply_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) static void rbd_notify_op_lock(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) enum rbd_notify_op notify_op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) __rbd_notify_op_lock(rbd_dev, notify_op, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) static void rbd_notify_acquired_lock(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) struct rbd_device *rbd_dev = container_of(work, struct rbd_device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) acquired_lock_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) rbd_notify_op_lock(rbd_dev, RBD_NOTIFY_OP_ACQUIRED_LOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) static void rbd_notify_released_lock(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) struct rbd_device *rbd_dev = container_of(work, struct rbd_device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) released_lock_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) rbd_notify_op_lock(rbd_dev, RBD_NOTIFY_OP_RELEASED_LOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) static int rbd_request_lock(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) struct page **reply_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) size_t reply_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) bool lock_owner_responded = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) dout("%s rbd_dev %p\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) ret = __rbd_notify_op_lock(rbd_dev, RBD_NOTIFY_OP_REQUEST_LOCK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) &reply_pages, &reply_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) if (ret && ret != -ETIMEDOUT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) rbd_warn(rbd_dev, "failed to request lock: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) if (reply_len > 0 && reply_len <= PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) void *p = page_address(reply_pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) void *const end = p + reply_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) u32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) ceph_decode_32_safe(&p, end, n, e_inval); /* num_acks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) while (n--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) u8 struct_v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) ceph_decode_need(&p, end, 8 + 8, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) p += 8 + 8; /* skip gid and cookie */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) ceph_decode_32_safe(&p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) if (!len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) if (lock_owner_responded) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) rbd_warn(rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) "duplicate lock owners detected");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) lock_owner_responded = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) ret = ceph_start_decoding(&p, end, 1, "ResponseMessage",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) &struct_v, &len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) rbd_warn(rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) "failed to decode ResponseMessage: %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) ret = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) if (!lock_owner_responded) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) rbd_warn(rbd_dev, "no lock owners detected");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) ret = -ETIMEDOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) ceph_release_page_vector(reply_pages, calc_pages_for(0, reply_len));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) * Either image request state machine(s) or rbd_add_acquire_lock()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) * (i.e. "rbd map").
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) struct rbd_img_request *img_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) dout("%s rbd_dev %p result %d\n", __func__, rbd_dev, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) lockdep_assert_held_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) cancel_delayed_work(&rbd_dev->lock_dwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) if (!completion_done(&rbd_dev->acquire_wait)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) rbd_assert(list_empty(&rbd_dev->acquiring_list) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) list_empty(&rbd_dev->running_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) rbd_dev->acquire_err = result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) complete_all(&rbd_dev->acquire_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) mutex_lock(&img_req->state_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) rbd_img_schedule(img_req, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) mutex_unlock(&img_req->state_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) static int get_lock_owner_info(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) struct ceph_locker **lockers, u32 *num_lockers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) u8 lock_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) char *lock_tag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) dout("%s rbd_dev %p\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) &rbd_dev->header_oloc, RBD_LOCK_NAME,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) &lock_type, &lock_tag, lockers, num_lockers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) if (*num_lockers == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) if (strcmp(lock_tag, RBD_LOCK_TAG)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) rbd_warn(rbd_dev, "locked by external mechanism, tag %s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) lock_tag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) if (lock_type == CEPH_CLS_LOCK_SHARED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) rbd_warn(rbd_dev, "shared lock type detected");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) if (strncmp((*lockers)[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) strlen(RBD_LOCK_COOKIE_PREFIX))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) rbd_warn(rbd_dev, "locked by external mechanism, cookie %s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) (*lockers)[0].id.cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) kfree(lock_tag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) static int find_watcher(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) const struct ceph_locker *locker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) struct ceph_watch_item *watchers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) u32 num_watchers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) u64 cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953) &rbd_dev->header_oloc, &watchers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) &num_watchers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) for (i = 0; i < num_watchers; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) if (!memcmp(&watchers[i].addr, &locker->info.addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) sizeof(locker->info.addr)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) watchers[i].cookie == cookie) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) struct rbd_client_id cid = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) .gid = le64_to_cpu(watchers[i].name.num),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) .handle = cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) dout("%s rbd_dev %p found cid %llu-%llu\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) rbd_dev, cid.gid, cid.handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) rbd_set_owner_cid(rbd_dev, &cid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) dout("%s rbd_dev %p no watchers\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) kfree(watchers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) * lock_rwsem must be held for write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) static int rbd_try_lock(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) struct ceph_client *client = rbd_dev->rbd_client->client;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) struct ceph_locker *lockers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) u32 num_lockers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) ret = rbd_lock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) if (ret != -EBUSY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) /* determine if the current lock holder is still alive */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) ret = get_lock_owner_info(rbd_dev, &lockers, &num_lockers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) if (num_lockers == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) ret = find_watcher(rbd_dev, lockers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) goto out; /* request lock or error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) ENTITY_NAME(lockers[0].id.name));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) ret = ceph_monc_blocklist_add(&client->monc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) &lockers[0].info.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) rbd_warn(rbd_dev, "blocklist of %s%llu failed: %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) ENTITY_NAME(lockers[0].id.name), ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022) &rbd_dev->header_oloc, RBD_LOCK_NAME,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) lockers[0].id.cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) &lockers[0].id.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) if (ret && ret != -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) ceph_free_lockers(lockers, num_lockers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) ceph_free_lockers(lockers, num_lockers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) static int rbd_post_acquire_action(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) ret = rbd_object_map_open(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) * Return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) * 0 - lock acquired
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) * 1 - caller should call rbd_request_lock()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) * <0 - error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) static int rbd_try_acquire_lock(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) down_read(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061) dout("%s rbd_dev %p read lock_state %d\n", __func__, rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) rbd_dev->lock_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) if (__rbd_is_lock_owner(rbd_dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) up_read(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) up_read(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) down_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) dout("%s rbd_dev %p write lock_state %d\n", __func__, rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) rbd_dev->lock_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072) if (__rbd_is_lock_owner(rbd_dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) up_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) ret = rbd_try_lock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) rbd_warn(rbd_dev, "failed to lock header: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) if (ret == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) ret = 1; /* request lock anyway */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086) up_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090) rbd_assert(rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) rbd_assert(list_empty(&rbd_dev->running_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093) ret = rbd_post_acquire_action(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) rbd_warn(rbd_dev, "post-acquire action failed: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) * Can't stay in RBD_LOCK_STATE_LOCKED because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) * rbd_lock_add_request() would let the request through,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099) * assuming that e.g. object map is locked and loaded.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101) rbd_unlock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) wake_lock_waiters(rbd_dev, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106) up_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110) static void rbd_acquire_lock(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) struct rbd_device, lock_dwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) dout("%s rbd_dev %p\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118) ret = rbd_try_acquire_lock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) if (ret <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120) dout("%s rbd_dev %p ret %d - done\n", __func__, rbd_dev, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124) ret = rbd_request_lock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125) if (ret == -ETIMEDOUT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126) goto again; /* treat this as a dead client */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127) } else if (ret == -EROFS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128) rbd_warn(rbd_dev, "peer will not release lock");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129) down_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130) wake_lock_waiters(rbd_dev, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131) up_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) } else if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) rbd_warn(rbd_dev, "error requesting lock: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134) mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135) RBD_RETRY_DELAY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) * lock owner acked, but resend if we don't see them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) * release the lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141) dout("%s rbd_dev %p requeuing lock_dwork\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143) mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144) msecs_to_jiffies(2 * RBD_NOTIFY_TIMEOUT * MSEC_PER_SEC));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150) dout("%s rbd_dev %p\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) lockdep_assert_held_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157) * Ensure that all in-flight IO is flushed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160) rbd_assert(!completion_done(&rbd_dev->releasing_wait));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) if (list_empty(&rbd_dev->running_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) up_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165) wait_for_completion(&rbd_dev->releasing_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167) down_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168) if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) rbd_assert(list_empty(&rbd_dev->running_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175) static void rbd_pre_release_action(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177) if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178) rbd_object_map_close(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) static void __rbd_release_lock(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) rbd_assert(list_empty(&rbd_dev->running_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185) rbd_pre_release_action(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) rbd_unlock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190) * lock_rwsem must be held for write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192) static void rbd_release_lock(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) if (!rbd_quiesce_lock(rbd_dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197) __rbd_release_lock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) * Give others a chance to grab the lock - we would re-acquire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201) * almost immediately if we got new IO while draining the running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202) * list otherwise. We need to ack our own notifications, so this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) * lock_dwork will be requeued from rbd_handle_released_lock() by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) * way of maybe_kick_acquire().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) cancel_delayed_work(&rbd_dev->lock_dwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) static void rbd_release_lock_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) struct rbd_device *rbd_dev = container_of(work, struct rbd_device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) unlock_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214) down_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) rbd_release_lock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216) up_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) static void maybe_kick_acquire(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221) bool have_requests;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) dout("%s rbd_dev %p\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224) if (__rbd_is_lock_owner(rbd_dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227) spin_lock(&rbd_dev->lock_lists_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) have_requests = !list_empty(&rbd_dev->acquiring_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229) spin_unlock(&rbd_dev->lock_lists_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) if (have_requests || delayed_work_pending(&rbd_dev->lock_dwork)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) dout("%s rbd_dev %p kicking lock_dwork\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) static void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) void **p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) struct rbd_client_id cid = { 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) if (struct_v >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) cid.gid = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) cid.handle = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246) dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) cid.handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) down_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) __func__, rbd_dev, cid.gid, cid.handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254) rbd_set_owner_cid(rbd_dev, &cid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256) downgrade_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258) down_read(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261) maybe_kick_acquire(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) up_read(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266) void **p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268) struct rbd_client_id cid = { 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270) if (struct_v >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271) cid.gid = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272) cid.handle = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276) cid.handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) down_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279) if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) __func__, rbd_dev, cid.gid, cid.handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284) rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) downgrade_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) down_read(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) maybe_kick_acquire(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) up_read(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296) * Returns result for ResponseMessage to be encoded (<= 0), or 1 if no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) * ResponseMessage is needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) static int rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) void **p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302) struct rbd_client_id my_cid = rbd_get_cid(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303) struct rbd_client_id cid = { 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304) int result = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) if (struct_v >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307) cid.gid = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308) cid.handle = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311) dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) cid.handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313) if (rbd_cid_equal(&cid, &my_cid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314) return result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316) down_read(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317) if (__rbd_is_lock_owner(rbd_dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318) if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319) rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323) * encode ResponseMessage(0) so the peer can detect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324) * a missing owner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326) result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329) if (!rbd_dev->opts->exclusive) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330) dout("%s rbd_dev %p queueing unlock_work\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331) __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332) queue_work(rbd_dev->task_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333) &rbd_dev->unlock_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335) /* refuse to release the lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336) result = -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342) up_read(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) return result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) static void __rbd_acknowledge_notify(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347) u64 notify_id, u64 cookie, s32 *result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4349) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4350) char buf[4 + CEPH_ENCODING_START_BLK_LEN];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4351) int buf_size = sizeof(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4352) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4354) if (result) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4355) void *p = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4357) /* encode ResponseMessage */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4358) ceph_start_encoding(&p, 1, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4359) buf_size - CEPH_ENCODING_START_BLK_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4360) ceph_encode_32(&p, *result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4361) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4362) buf_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4365) ret = ceph_osdc_notify_ack(osdc, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4366) &rbd_dev->header_oloc, notify_id, cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4367) buf, buf_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4368) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4369) rbd_warn(rbd_dev, "acknowledge_notify failed: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4372) static void rbd_acknowledge_notify(struct rbd_device *rbd_dev, u64 notify_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4373) u64 cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4375) dout("%s rbd_dev %p\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4376) __rbd_acknowledge_notify(rbd_dev, notify_id, cookie, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4379) static void rbd_acknowledge_notify_result(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4380) u64 notify_id, u64 cookie, s32 result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4381) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4382) dout("%s rbd_dev %p result %d\n", __func__, rbd_dev, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4383) __rbd_acknowledge_notify(rbd_dev, notify_id, cookie, &result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4384) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4386) static void rbd_watch_cb(void *arg, u64 notify_id, u64 cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4387) u64 notifier_id, void *data, size_t data_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4388) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4389) struct rbd_device *rbd_dev = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4390) void *p = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4391) void *const end = p + data_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4392) u8 struct_v = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4393) u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4394) u32 notify_op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4395) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4397) dout("%s rbd_dev %p cookie %llu notify_id %llu data_len %zu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4398) __func__, rbd_dev, cookie, notify_id, data_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4399) if (data_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4400) ret = ceph_start_decoding(&p, end, 1, "NotifyMessage",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4401) &struct_v, &len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4402) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4403) rbd_warn(rbd_dev, "failed to decode NotifyMessage: %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4404) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4405) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4408) notify_op = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4409) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4410) /* legacy notification for header updates */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4411) notify_op = RBD_NOTIFY_OP_HEADER_UPDATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4412) len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4415) dout("%s rbd_dev %p notify_op %u\n", __func__, rbd_dev, notify_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4416) switch (notify_op) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4417) case RBD_NOTIFY_OP_ACQUIRED_LOCK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4418) rbd_handle_acquired_lock(rbd_dev, struct_v, &p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4419) rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4420) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4421) case RBD_NOTIFY_OP_RELEASED_LOCK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4422) rbd_handle_released_lock(rbd_dev, struct_v, &p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4423) rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4424) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4425) case RBD_NOTIFY_OP_REQUEST_LOCK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4426) ret = rbd_handle_request_lock(rbd_dev, struct_v, &p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4427) if (ret <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4428) rbd_acknowledge_notify_result(rbd_dev, notify_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4429) cookie, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4430) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4431) rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4432) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4433) case RBD_NOTIFY_OP_HEADER_UPDATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4434) ret = rbd_dev_refresh(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4435) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4436) rbd_warn(rbd_dev, "refresh failed: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4438) rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4439) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4440) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4441) if (rbd_is_lock_owner(rbd_dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4442) rbd_acknowledge_notify_result(rbd_dev, notify_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4443) cookie, -EOPNOTSUPP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4444) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4445) rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4446) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4450) static void __rbd_unregister_watch(struct rbd_device *rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4452) static void rbd_watch_errcb(void *arg, u64 cookie, int err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4453) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4454) struct rbd_device *rbd_dev = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4456) rbd_warn(rbd_dev, "encountered watch error: %d", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4458) down_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4459) rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4460) up_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4462) mutex_lock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4463) if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4464) __rbd_unregister_watch(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4465) rbd_dev->watch_state = RBD_WATCH_STATE_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4467) queue_delayed_work(rbd_dev->task_wq, &rbd_dev->watch_dwork, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4469) mutex_unlock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4472) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4473) * watch_mutex must be locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4474) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4475) static int __rbd_register_watch(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4476) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4477) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4478) struct ceph_osd_linger_request *handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4480) rbd_assert(!rbd_dev->watch_handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4481) dout("%s rbd_dev %p\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4483) handle = ceph_osdc_watch(osdc, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4484) &rbd_dev->header_oloc, rbd_watch_cb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4485) rbd_watch_errcb, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4486) if (IS_ERR(handle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4487) return PTR_ERR(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4489) rbd_dev->watch_handle = handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4490) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4491) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4493) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4494) * watch_mutex must be locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4495) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4496) static void __rbd_unregister_watch(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4497) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4498) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4499) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4501) rbd_assert(rbd_dev->watch_handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4502) dout("%s rbd_dev %p\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4504) ret = ceph_osdc_unwatch(osdc, rbd_dev->watch_handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4505) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4506) rbd_warn(rbd_dev, "failed to unwatch: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4508) rbd_dev->watch_handle = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4511) static int rbd_register_watch(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4512) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4513) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4515) mutex_lock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4516) rbd_assert(rbd_dev->watch_state == RBD_WATCH_STATE_UNREGISTERED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4517) ret = __rbd_register_watch(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4518) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4519) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4521) rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4522) rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4524) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4525) mutex_unlock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4526) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4529) static void cancel_tasks_sync(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4530) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4531) dout("%s rbd_dev %p\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4533) cancel_work_sync(&rbd_dev->acquired_lock_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4534) cancel_work_sync(&rbd_dev->released_lock_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4535) cancel_delayed_work_sync(&rbd_dev->lock_dwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4536) cancel_work_sync(&rbd_dev->unlock_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4539) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4540) * header_rwsem must not be held to avoid a deadlock with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4541) * rbd_dev_refresh() when flushing notifies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4542) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4543) static void rbd_unregister_watch(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4545) cancel_tasks_sync(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4547) mutex_lock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4548) if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4549) __rbd_unregister_watch(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4550) rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4551) mutex_unlock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4553) cancel_delayed_work_sync(&rbd_dev->watch_dwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4554) ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4557) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4558) * lock_rwsem must be held for write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4559) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4560) static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4561) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4562) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4563) char cookie[32];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4564) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4566) if (!rbd_quiesce_lock(rbd_dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4567) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4569) format_lock_cookie(rbd_dev, cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4570) ret = ceph_cls_set_cookie(osdc, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4571) &rbd_dev->header_oloc, RBD_LOCK_NAME,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4572) CEPH_CLS_LOCK_EXCLUSIVE, rbd_dev->lock_cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4573) RBD_LOCK_TAG, cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4574) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4575) if (ret != -EOPNOTSUPP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4576) rbd_warn(rbd_dev, "failed to update lock cookie: %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4577) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4579) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4580) * Lock cookie cannot be updated on older OSDs, so do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4581) * a manual release and queue an acquire.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4582) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4583) __rbd_release_lock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4584) queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4585) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4586) __rbd_lock(rbd_dev, cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4587) wake_lock_waiters(rbd_dev, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4591) static void rbd_reregister_watch(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4593) struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4594) struct rbd_device, watch_dwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4595) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4597) dout("%s rbd_dev %p\n", __func__, rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4599) mutex_lock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4600) if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4601) mutex_unlock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4602) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4605) ret = __rbd_register_watch(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4606) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4607) rbd_warn(rbd_dev, "failed to reregister watch: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4608) if (ret != -EBLOCKLISTED && ret != -ENOENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4609) queue_delayed_work(rbd_dev->task_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4610) &rbd_dev->watch_dwork,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4611) RBD_RETRY_DELAY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4612) mutex_unlock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4613) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4614) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4616) mutex_unlock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4617) down_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4618) wake_lock_waiters(rbd_dev, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4619) up_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4620) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4623) rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4624) rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4625) mutex_unlock(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4627) down_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4628) if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4629) rbd_reacquire_lock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4630) up_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4632) ret = rbd_dev_refresh(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4633) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4634) rbd_warn(rbd_dev, "reregistration refresh failed: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4637) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4638) * Synchronous osd object method call. Returns the number of bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4639) * returned in the outbound buffer, or a negative error code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4640) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4641) static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4642) struct ceph_object_id *oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4643) struct ceph_object_locator *oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4644) const char *method_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4645) const void *outbound,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4646) size_t outbound_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4647) void *inbound,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4648) size_t inbound_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4649) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4650) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4651) struct page *req_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4652) struct page *reply_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4653) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4655) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4656) * Method calls are ultimately read operations. The result
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4657) * should placed into the inbound buffer provided. They
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4658) * also supply outbound data--parameters for the object
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4659) * method. Currently if this is present it will be a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4660) * snapshot id.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4661) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4662) if (outbound) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4663) if (outbound_size > PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4664) return -E2BIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4666) req_page = alloc_page(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4667) if (!req_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4668) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4670) memcpy(page_address(req_page), outbound, outbound_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4671) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4673) reply_page = alloc_page(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4674) if (!reply_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4675) if (req_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4676) __free_page(req_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4677) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4678) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4680) ret = ceph_osdc_call(osdc, oid, oloc, RBD_DRV_NAME, method_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4681) CEPH_OSD_FLAG_READ, req_page, outbound_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4682) &reply_page, &inbound_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4683) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4684) memcpy(inbound, page_address(reply_page), inbound_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4685) ret = inbound_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4688) if (req_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4689) __free_page(req_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4690) __free_page(reply_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4691) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4694) static void rbd_queue_workfn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4695) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4696) struct rbd_img_request *img_request =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4697) container_of(work, struct rbd_img_request, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4698) struct rbd_device *rbd_dev = img_request->rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4699) enum obj_operation_type op_type = img_request->op_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4700) struct request *rq = blk_mq_rq_from_pdu(img_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4701) u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4702) u64 length = blk_rq_bytes(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4703) u64 mapping_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4704) int result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4706) /* Ignore/skip any zero-length requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4707) if (!length) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4708) dout("%s: zero-length request\n", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4709) result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4710) goto err_img_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4713) blk_mq_start_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4715) down_read(&rbd_dev->header_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4716) mapping_size = rbd_dev->mapping.size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4717) rbd_img_capture_header(img_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4718) up_read(&rbd_dev->header_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4720) if (offset + length > mapping_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4721) rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)", offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4722) length, mapping_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4723) result = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4724) goto err_img_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4727) dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4728) img_request, obj_op_name(op_type), offset, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4730) if (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_ZEROOUT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4731) result = rbd_img_fill_nodata(img_request, offset, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4732) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4733) result = rbd_img_fill_from_bio(img_request, offset, length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4734) rq->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4735) if (result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4736) goto err_img_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4738) rbd_img_handle_request(img_request, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4739) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4741) err_img_request:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4742) rbd_img_request_destroy(img_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4743) if (result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4744) rbd_warn(rbd_dev, "%s %llx at %llx result %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4745) obj_op_name(op_type), length, offset, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4746) blk_mq_end_request(rq, errno_to_blk_status(result));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4749) static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4750) const struct blk_mq_queue_data *bd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4751) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4752) struct rbd_device *rbd_dev = hctx->queue->queuedata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4753) struct rbd_img_request *img_req = blk_mq_rq_to_pdu(bd->rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4754) enum obj_operation_type op_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4756) switch (req_op(bd->rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4757) case REQ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4758) op_type = OBJ_OP_DISCARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4759) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4760) case REQ_OP_WRITE_ZEROES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4761) op_type = OBJ_OP_ZEROOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4762) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4763) case REQ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4764) op_type = OBJ_OP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4765) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4766) case REQ_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4767) op_type = OBJ_OP_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4768) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4769) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4770) rbd_warn(rbd_dev, "unknown req_op %d", req_op(bd->rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4771) return BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4774) rbd_img_request_init(img_req, rbd_dev, op_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4776) if (rbd_img_is_write(img_req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4777) if (rbd_is_ro(rbd_dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4778) rbd_warn(rbd_dev, "%s on read-only mapping",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4779) obj_op_name(img_req->op_type));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4780) return BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4781) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4782) rbd_assert(!rbd_is_snap(rbd_dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4783) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4785) INIT_WORK(&img_req->work, rbd_queue_workfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4786) queue_work(rbd_wq, &img_req->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4787) return BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4790) static void rbd_free_disk(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4791) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4792) blk_cleanup_queue(rbd_dev->disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4793) blk_mq_free_tag_set(&rbd_dev->tag_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4794) put_disk(rbd_dev->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4795) rbd_dev->disk = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4796) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4798) static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4799) struct ceph_object_id *oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4800) struct ceph_object_locator *oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4801) void *buf, int buf_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4803) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4804) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4805) struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4806) struct page **pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4807) int num_pages = calc_pages_for(0, buf_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4808) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4810) req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4811) if (!req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4812) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4814) ceph_oid_copy(&req->r_base_oid, oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4815) ceph_oloc_copy(&req->r_base_oloc, oloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4816) req->r_flags = CEPH_OSD_FLAG_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4818) pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4819) if (IS_ERR(pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4820) ret = PTR_ERR(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4821) goto out_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4822) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4824) osd_req_op_extent_init(req, 0, CEPH_OSD_OP_READ, 0, buf_len, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4825) osd_req_op_extent_osd_data_pages(req, 0, pages, buf_len, 0, false,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4826) true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4828) ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4829) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4830) goto out_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4832) ceph_osdc_start_request(osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4833) ret = ceph_osdc_wait_request(osdc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4834) if (ret >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4835) ceph_copy_from_page_vector(pages, buf, 0, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4837) out_req:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4838) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4839) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4842) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4843) * Read the complete header for the given rbd device. On successful
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4844) * return, the rbd_dev->header field will contain up-to-date
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4845) * information about the image.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4846) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4847) static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4848) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4849) struct rbd_image_header_ondisk *ondisk = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4850) u32 snap_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4851) u64 names_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4852) u32 want_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4853) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4855) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4856) * The complete header will include an array of its 64-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4857) * snapshot ids, followed by the names of those snapshots as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4858) * a contiguous block of NUL-terminated strings. Note that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4859) * the number of snapshots could change by the time we read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4860) * it in, in which case we re-read it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4861) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4862) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4863) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4865) kfree(ondisk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4867) size = sizeof (*ondisk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4868) size += snap_count * sizeof (struct rbd_image_snap_ondisk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4869) size += names_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4870) ondisk = kmalloc(size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4871) if (!ondisk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4872) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4874) ret = rbd_obj_read_sync(rbd_dev, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4875) &rbd_dev->header_oloc, ondisk, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4876) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4877) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4878) if ((size_t)ret < size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4879) ret = -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4880) rbd_warn(rbd_dev, "short header read (want %zd got %d)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4881) size, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4882) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4884) if (!rbd_dev_ondisk_valid(ondisk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4885) ret = -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4886) rbd_warn(rbd_dev, "invalid header");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4887) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4890) names_size = le64_to_cpu(ondisk->snap_names_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4891) want_count = snap_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4892) snap_count = le32_to_cpu(ondisk->snap_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4893) } while (snap_count != want_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4895) ret = rbd_header_from_disk(rbd_dev, ondisk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4896) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4897) kfree(ondisk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4899) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4902) static void rbd_dev_update_size(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4903) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4904) sector_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4906) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4907) * If EXISTS is not set, rbd_dev->disk may be NULL, so don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4908) * try to update its size. If REMOVING is set, updating size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4909) * is just useless work since the device can't be opened.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4910) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4911) if (test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4912) !test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4913) size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4914) dout("setting size to %llu sectors", (unsigned long long)size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4915) set_capacity(rbd_dev->disk, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4916) revalidate_disk_size(rbd_dev->disk, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4918) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4920) static int rbd_dev_refresh(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4921) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4922) u64 mapping_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4923) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4925) down_write(&rbd_dev->header_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4926) mapping_size = rbd_dev->mapping.size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4928) ret = rbd_dev_header_info(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4929) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4930) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4932) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4933) * If there is a parent, see if it has disappeared due to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4934) * mapped image getting flattened.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4935) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4936) if (rbd_dev->parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4937) ret = rbd_dev_v2_parent_info(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4938) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4939) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4942) rbd_assert(!rbd_is_snap(rbd_dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4943) rbd_dev->mapping.size = rbd_dev->header.image_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4945) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4946) up_write(&rbd_dev->header_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4947) if (!ret && mapping_size != rbd_dev->mapping.size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4948) rbd_dev_update_size(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4950) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4953) static const struct blk_mq_ops rbd_mq_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4954) .queue_rq = rbd_queue_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4955) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4957) static int rbd_init_disk(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4958) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4959) struct gendisk *disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4960) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4961) unsigned int objset_bytes =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4962) rbd_dev->layout.object_size * rbd_dev->layout.stripe_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4963) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4965) /* create gendisk info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4966) disk = alloc_disk(single_major ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4967) (1 << RBD_SINGLE_MAJOR_PART_SHIFT) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4968) RBD_MINORS_PER_MAJOR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4969) if (!disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4970) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4972) snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4973) rbd_dev->dev_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4974) disk->major = rbd_dev->major;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4975) disk->first_minor = rbd_dev->minor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4976) if (single_major)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4977) disk->flags |= GENHD_FL_EXT_DEVT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4978) disk->fops = &rbd_bd_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4979) disk->private_data = rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4980)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4981) memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4982) rbd_dev->tag_set.ops = &rbd_mq_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4983) rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4984) rbd_dev->tag_set.numa_node = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4985) rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4986) rbd_dev->tag_set.nr_hw_queues = num_present_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4987) rbd_dev->tag_set.cmd_size = sizeof(struct rbd_img_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4989) err = blk_mq_alloc_tag_set(&rbd_dev->tag_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4990) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4991) goto out_disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4993) q = blk_mq_init_queue(&rbd_dev->tag_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4994) if (IS_ERR(q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4995) err = PTR_ERR(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4996) goto out_tag_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4997) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4999) blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5000) /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5002) blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5003) q->limits.max_sectors = queue_max_hw_sectors(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5004) blk_queue_max_segments(q, USHRT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5005) blk_queue_max_segment_size(q, UINT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5006) blk_queue_io_min(q, rbd_dev->opts->alloc_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5007) blk_queue_io_opt(q, rbd_dev->opts->alloc_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5009) if (rbd_dev->opts->trim) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5010) blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5011) q->limits.discard_granularity = rbd_dev->opts->alloc_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5012) blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5013) blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5016) if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5017) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5019) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5020) * disk_release() expects a queue ref from add_disk() and will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5021) * put it. Hold an extra ref until add_disk() is called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5022) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5023) WARN_ON(!blk_get_queue(q));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5024) disk->queue = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5025) q->queuedata = rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5026)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5027) rbd_dev->disk = disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5029) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5030) out_tag_set:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5031) blk_mq_free_tag_set(&rbd_dev->tag_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5032) out_disk:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5033) put_disk(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5034) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5037) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5038) sysfs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5039) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5041) static struct rbd_device *dev_to_rbd_dev(struct device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5042) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5043) return container_of(dev, struct rbd_device, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5046) static ssize_t rbd_size_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5047) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5048) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5049) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5051) return sprintf(buf, "%llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5052) (unsigned long long)rbd_dev->mapping.size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5053) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5054)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5055) static ssize_t rbd_features_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5056) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5057) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5058) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5060) return sprintf(buf, "0x%016llx\n", rbd_dev->header.features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5063) static ssize_t rbd_major_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5064) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5065) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5066) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5068) if (rbd_dev->major)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5069) return sprintf(buf, "%d\n", rbd_dev->major);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5071) return sprintf(buf, "(none)\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5074) static ssize_t rbd_minor_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5075) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5076) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5077) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5079) return sprintf(buf, "%d\n", rbd_dev->minor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5080) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5082) static ssize_t rbd_client_addr_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5083) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5084) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5085) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5086) struct ceph_entity_addr *client_addr =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5087) ceph_client_addr(rbd_dev->rbd_client->client);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5089) return sprintf(buf, "%pISpc/%u\n", &client_addr->in_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5090) le32_to_cpu(client_addr->nonce));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5091) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5093) static ssize_t rbd_client_id_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5094) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5095) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5096) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5098) return sprintf(buf, "client%lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5099) ceph_client_gid(rbd_dev->rbd_client->client));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5102) static ssize_t rbd_cluster_fsid_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5103) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5104) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5105) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5107) return sprintf(buf, "%pU\n", &rbd_dev->rbd_client->client->fsid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5110) static ssize_t rbd_config_info_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5111) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5113) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5115) if (!capable(CAP_SYS_ADMIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5116) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5118) return sprintf(buf, "%s\n", rbd_dev->config_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5121) static ssize_t rbd_pool_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5122) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5123) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5124) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5126) return sprintf(buf, "%s\n", rbd_dev->spec->pool_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5129) static ssize_t rbd_pool_id_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5130) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5131) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5132) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5134) return sprintf(buf, "%llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5135) (unsigned long long) rbd_dev->spec->pool_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5138) static ssize_t rbd_pool_ns_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5139) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5140) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5141) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5143) return sprintf(buf, "%s\n", rbd_dev->spec->pool_ns ?: "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5146) static ssize_t rbd_name_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5147) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5148) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5149) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5151) if (rbd_dev->spec->image_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5152) return sprintf(buf, "%s\n", rbd_dev->spec->image_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5154) return sprintf(buf, "(unknown)\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5157) static ssize_t rbd_image_id_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5158) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5159) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5160) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5162) return sprintf(buf, "%s\n", rbd_dev->spec->image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5165) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5166) * Shows the name of the currently-mapped snapshot (or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5167) * RBD_SNAP_HEAD_NAME for the base image).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5168) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5169) static ssize_t rbd_snap_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5170) struct device_attribute *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5171) char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5173) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5175) return sprintf(buf, "%s\n", rbd_dev->spec->snap_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5178) static ssize_t rbd_snap_id_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5179) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5181) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5183) return sprintf(buf, "%llu\n", rbd_dev->spec->snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5186) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5187) * For a v2 image, shows the chain of parent images, separated by empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5188) * lines. For v1 images or if there is no parent, shows "(no parent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5189) * image)".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5190) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5191) static ssize_t rbd_parent_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5192) struct device_attribute *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5193) char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5195) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5196) ssize_t count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5198) if (!rbd_dev->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5199) return sprintf(buf, "(no parent image)\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5201) for ( ; rbd_dev->parent; rbd_dev = rbd_dev->parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5202) struct rbd_spec *spec = rbd_dev->parent_spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5204) count += sprintf(&buf[count], "%s"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5205) "pool_id %llu\npool_name %s\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5206) "pool_ns %s\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5207) "image_id %s\nimage_name %s\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5208) "snap_id %llu\nsnap_name %s\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5209) "overlap %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5210) !count ? "" : "\n", /* first? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5211) spec->pool_id, spec->pool_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5212) spec->pool_ns ?: "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5213) spec->image_id, spec->image_name ?: "(unknown)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5214) spec->snap_id, spec->snap_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5215) rbd_dev->parent_overlap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5218) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5221) static ssize_t rbd_image_refresh(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5222) struct device_attribute *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5223) const char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5224) size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5225) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5226) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5227) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5229) if (!capable(CAP_SYS_ADMIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5230) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5232) ret = rbd_dev_refresh(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5233) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5234) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5236) return size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5239) static DEVICE_ATTR(size, 0444, rbd_size_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5240) static DEVICE_ATTR(features, 0444, rbd_features_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5241) static DEVICE_ATTR(major, 0444, rbd_major_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5242) static DEVICE_ATTR(minor, 0444, rbd_minor_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5243) static DEVICE_ATTR(client_addr, 0444, rbd_client_addr_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5244) static DEVICE_ATTR(client_id, 0444, rbd_client_id_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5245) static DEVICE_ATTR(cluster_fsid, 0444, rbd_cluster_fsid_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5246) static DEVICE_ATTR(config_info, 0400, rbd_config_info_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5247) static DEVICE_ATTR(pool, 0444, rbd_pool_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5248) static DEVICE_ATTR(pool_id, 0444, rbd_pool_id_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5249) static DEVICE_ATTR(pool_ns, 0444, rbd_pool_ns_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5250) static DEVICE_ATTR(name, 0444, rbd_name_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5251) static DEVICE_ATTR(image_id, 0444, rbd_image_id_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5252) static DEVICE_ATTR(refresh, 0200, NULL, rbd_image_refresh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5253) static DEVICE_ATTR(current_snap, 0444, rbd_snap_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5254) static DEVICE_ATTR(snap_id, 0444, rbd_snap_id_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5255) static DEVICE_ATTR(parent, 0444, rbd_parent_show, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5257) static struct attribute *rbd_attrs[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5258) &dev_attr_size.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5259) &dev_attr_features.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5260) &dev_attr_major.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5261) &dev_attr_minor.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5262) &dev_attr_client_addr.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5263) &dev_attr_client_id.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5264) &dev_attr_cluster_fsid.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5265) &dev_attr_config_info.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5266) &dev_attr_pool.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5267) &dev_attr_pool_id.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5268) &dev_attr_pool_ns.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5269) &dev_attr_name.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5270) &dev_attr_image_id.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5271) &dev_attr_current_snap.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5272) &dev_attr_snap_id.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5273) &dev_attr_parent.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5274) &dev_attr_refresh.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5275) NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5276) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5278) static struct attribute_group rbd_attr_group = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5279) .attrs = rbd_attrs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5280) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5282) static const struct attribute_group *rbd_attr_groups[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5283) &rbd_attr_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5284) NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5285) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5287) static void rbd_dev_release(struct device *dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5289) static const struct device_type rbd_device_type = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5290) .name = "rbd",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5291) .groups = rbd_attr_groups,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5292) .release = rbd_dev_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5293) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5295) static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5296) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5297) kref_get(&spec->kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5299) return spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5302) static void rbd_spec_free(struct kref *kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5303) static void rbd_spec_put(struct rbd_spec *spec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5304) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5305) if (spec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5306) kref_put(&spec->kref, rbd_spec_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5309) static struct rbd_spec *rbd_spec_alloc(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5310) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5311) struct rbd_spec *spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5313) spec = kzalloc(sizeof (*spec), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5314) if (!spec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5315) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5317) spec->pool_id = CEPH_NOPOOL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5318) spec->snap_id = CEPH_NOSNAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5319) kref_init(&spec->kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5321) return spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5324) static void rbd_spec_free(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5325) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5326) struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5328) kfree(spec->pool_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5329) kfree(spec->pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5330) kfree(spec->image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5331) kfree(spec->image_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5332) kfree(spec->snap_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5333) kfree(spec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5336) static void rbd_dev_free(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5337) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5338) WARN_ON(rbd_dev->watch_state != RBD_WATCH_STATE_UNREGISTERED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5339) WARN_ON(rbd_dev->lock_state != RBD_LOCK_STATE_UNLOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5341) ceph_oid_destroy(&rbd_dev->header_oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5342) ceph_oloc_destroy(&rbd_dev->header_oloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5343) kfree(rbd_dev->config_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5345) rbd_put_client(rbd_dev->rbd_client);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5346) rbd_spec_put(rbd_dev->spec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5347) kfree(rbd_dev->opts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5348) kfree(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5351) static void rbd_dev_release(struct device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5352) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5353) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5354) bool need_put = !!rbd_dev->opts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5356) if (need_put) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5357) destroy_workqueue(rbd_dev->task_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5358) ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5361) rbd_dev_free(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5363) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5364) * This is racy, but way better than putting module outside of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5365) * the release callback. The race window is pretty small, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5366) * doing something similar to dm (dm-builtin.c) is overkill.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5367) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5368) if (need_put)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5369) module_put(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5372) static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5373) struct rbd_spec *spec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5375) struct rbd_device *rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5377) rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5378) if (!rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5379) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5381) spin_lock_init(&rbd_dev->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5382) INIT_LIST_HEAD(&rbd_dev->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5383) init_rwsem(&rbd_dev->header_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5385) rbd_dev->header.data_pool_id = CEPH_NOPOOL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5386) ceph_oid_init(&rbd_dev->header_oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5387) rbd_dev->header_oloc.pool = spec->pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5388) if (spec->pool_ns) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5389) WARN_ON(!*spec->pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5390) rbd_dev->header_oloc.pool_ns =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5391) ceph_find_or_create_string(spec->pool_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5392) strlen(spec->pool_ns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5395) mutex_init(&rbd_dev->watch_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5396) rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5397) INIT_DELAYED_WORK(&rbd_dev->watch_dwork, rbd_reregister_watch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5399) init_rwsem(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5400) rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5401) INIT_WORK(&rbd_dev->acquired_lock_work, rbd_notify_acquired_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5402) INIT_WORK(&rbd_dev->released_lock_work, rbd_notify_released_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5403) INIT_DELAYED_WORK(&rbd_dev->lock_dwork, rbd_acquire_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5404) INIT_WORK(&rbd_dev->unlock_work, rbd_release_lock_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5405) spin_lock_init(&rbd_dev->lock_lists_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5406) INIT_LIST_HEAD(&rbd_dev->acquiring_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5407) INIT_LIST_HEAD(&rbd_dev->running_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5408) init_completion(&rbd_dev->acquire_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5409) init_completion(&rbd_dev->releasing_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5411) spin_lock_init(&rbd_dev->object_map_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5413) rbd_dev->dev.bus = &rbd_bus_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5414) rbd_dev->dev.type = &rbd_device_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5415) rbd_dev->dev.parent = &rbd_root_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5416) device_initialize(&rbd_dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5418) rbd_dev->rbd_client = rbdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5419) rbd_dev->spec = spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5421) return rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5424) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5425) * Create a mapping rbd_dev.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5426) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5427) static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5428) struct rbd_spec *spec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5429) struct rbd_options *opts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5430) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5431) struct rbd_device *rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5433) rbd_dev = __rbd_dev_create(rbdc, spec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5434) if (!rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5435) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5437) rbd_dev->opts = opts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5439) /* get an id and fill in device name */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5440) rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5441) minor_to_rbd_dev_id(1 << MINORBITS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5442) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5443) if (rbd_dev->dev_id < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5444) goto fail_rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5446) sprintf(rbd_dev->name, RBD_DRV_NAME "%d", rbd_dev->dev_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5447) rbd_dev->task_wq = alloc_ordered_workqueue("%s-tasks", WQ_MEM_RECLAIM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5448) rbd_dev->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5449) if (!rbd_dev->task_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5450) goto fail_dev_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5452) /* we have a ref from do_rbd_add() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5453) __module_get(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5455) dout("%s rbd_dev %p dev_id %d\n", __func__, rbd_dev, rbd_dev->dev_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5456) return rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5458) fail_dev_id:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5459) ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5460) fail_rbd_dev:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5461) rbd_dev_free(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5462) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5465) static void rbd_dev_destroy(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5466) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5467) if (rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5468) put_device(&rbd_dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5471) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5472) * Get the size and object order for an image snapshot, or if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5473) * snap_id is CEPH_NOSNAP, gets this information for the base
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5474) * image.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5475) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5476) static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5477) u8 *order, u64 *snap_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5479) __le64 snapid = cpu_to_le64(snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5480) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5481) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5482) u8 order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5483) __le64 size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5484) } __attribute__ ((packed)) size_buf = { 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5486) ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5487) &rbd_dev->header_oloc, "get_size",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5488) &snapid, sizeof(snapid),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5489) &size_buf, sizeof(size_buf));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5490) dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5491) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5492) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5493) if (ret < sizeof (size_buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5494) return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5496) if (order) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5497) *order = size_buf.order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5498) dout(" order %u", (unsigned int)*order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5500) *snap_size = le64_to_cpu(size_buf.size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5502) dout(" snap_id 0x%016llx snap_size = %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5503) (unsigned long long)snap_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5504) (unsigned long long)*snap_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5506) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5509) static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5510) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5511) return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5512) &rbd_dev->header.obj_order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5513) &rbd_dev->header.image_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5516) static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5518) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5519) void *reply_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5520) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5521) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5523) /* Response will be an encoded string, which includes a length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5524) size = sizeof(__le32) + RBD_OBJ_PREFIX_LEN_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5525) reply_buf = kzalloc(size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5526) if (!reply_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5527) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5529) ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5530) &rbd_dev->header_oloc, "get_object_prefix",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5531) NULL, 0, reply_buf, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5532) dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5533) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5534) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5536) p = reply_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5537) rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5538) p + ret, NULL, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5539) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5541) if (IS_ERR(rbd_dev->header.object_prefix)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5542) ret = PTR_ERR(rbd_dev->header.object_prefix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5543) rbd_dev->header.object_prefix = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5544) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5545) dout(" object_prefix = %s\n", rbd_dev->header.object_prefix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5547) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5548) kfree(reply_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5550) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5553) static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5554) bool read_only, u64 *snap_features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5556) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5557) __le64 snap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5558) u8 read_only;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5559) } features_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5560) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5561) __le64 features;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5562) __le64 incompat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5563) } __attribute__ ((packed)) features_buf = { 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5564) u64 unsup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5565) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5567) features_in.snap_id = cpu_to_le64(snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5568) features_in.read_only = read_only;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5570) ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5571) &rbd_dev->header_oloc, "get_features",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5572) &features_in, sizeof(features_in),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5573) &features_buf, sizeof(features_buf));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5574) dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5575) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5576) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5577) if (ret < sizeof (features_buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5578) return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5580) unsup = le64_to_cpu(features_buf.incompat) & ~RBD_FEATURES_SUPPORTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5581) if (unsup) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5582) rbd_warn(rbd_dev, "image uses unsupported features: 0x%llx",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5583) unsup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5584) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5587) *snap_features = le64_to_cpu(features_buf.features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5589) dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5590) (unsigned long long)snap_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5591) (unsigned long long)*snap_features,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5592) (unsigned long long)le64_to_cpu(features_buf.incompat));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5594) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5597) static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5598) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5599) return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5600) rbd_is_ro(rbd_dev),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5601) &rbd_dev->header.features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5602) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5604) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5605) * These are generic image flags, but since they are used only for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5606) * object map, store them in rbd_dev->object_map_flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5607) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5608) * For the same reason, this function is called only on object map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5609) * (re)load and not on header refresh.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5610) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5611) static int rbd_dev_v2_get_flags(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5612) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5613) __le64 snapid = cpu_to_le64(rbd_dev->spec->snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5614) __le64 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5615) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5617) ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5618) &rbd_dev->header_oloc, "get_flags",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5619) &snapid, sizeof(snapid),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5620) &flags, sizeof(flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5621) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5622) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5623) if (ret < sizeof(flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5624) return -EBADMSG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5626) rbd_dev->object_map_flags = le64_to_cpu(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5627) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5630) struct parent_image_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5631) u64 pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5632) const char *pool_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5633) const char *image_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5634) u64 snap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5636) bool has_overlap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5637) u64 overlap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5638) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5640) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5641) * The caller is responsible for @pii.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5642) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5643) static int decode_parent_image_spec(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5644) struct parent_image_info *pii)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5645) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5646) u8 struct_v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5647) u32 struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5648) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5650) ret = ceph_start_decoding(p, end, 1, "ParentImageSpec",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5651) &struct_v, &struct_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5652) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5653) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5655) ceph_decode_64_safe(p, end, pii->pool_id, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5656) pii->pool_ns = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5657) if (IS_ERR(pii->pool_ns)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5658) ret = PTR_ERR(pii->pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5659) pii->pool_ns = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5660) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5662) pii->image_id = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5663) if (IS_ERR(pii->image_id)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5664) ret = PTR_ERR(pii->image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5665) pii->image_id = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5666) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5668) ceph_decode_64_safe(p, end, pii->snap_id, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5669) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5671) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5672) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5675) static int __get_parent_info(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5676) struct page *req_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5677) struct page *reply_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5678) struct parent_image_info *pii)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5679) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5680) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5681) size_t reply_len = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5682) void *p, *end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5683) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5685) ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5686) "rbd", "parent_get", CEPH_OSD_FLAG_READ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5687) req_page, sizeof(u64), &reply_page, &reply_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5688) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5689) return ret == -EOPNOTSUPP ? 1 : ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5691) p = page_address(reply_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5692) end = p + reply_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5693) ret = decode_parent_image_spec(&p, end, pii);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5694) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5695) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5697) ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5698) "rbd", "parent_overlap_get", CEPH_OSD_FLAG_READ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5699) req_page, sizeof(u64), &reply_page, &reply_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5700) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5701) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5703) p = page_address(reply_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5704) end = p + reply_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5705) ceph_decode_8_safe(&p, end, pii->has_overlap, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5706) if (pii->has_overlap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5707) ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5709) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5711) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5712) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5715) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5716) * The caller is responsible for @pii.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5717) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5718) static int __get_parent_info_legacy(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5719) struct page *req_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5720) struct page *reply_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5721) struct parent_image_info *pii)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5722) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5723) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5724) size_t reply_len = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5725) void *p, *end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5726) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5728) ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5729) "rbd", "get_parent", CEPH_OSD_FLAG_READ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5730) req_page, sizeof(u64), &reply_page, &reply_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5731) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5732) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5734) p = page_address(reply_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5735) end = p + reply_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5736) ceph_decode_64_safe(&p, end, pii->pool_id, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5737) pii->image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5738) if (IS_ERR(pii->image_id)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5739) ret = PTR_ERR(pii->image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5740) pii->image_id = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5741) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5743) ceph_decode_64_safe(&p, end, pii->snap_id, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5744) pii->has_overlap = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5745) ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5747) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5749) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5750) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5753) static int get_parent_info(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5754) struct parent_image_info *pii)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5755) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5756) struct page *req_page, *reply_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5757) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5758) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5760) req_page = alloc_page(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5761) if (!req_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5762) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5764) reply_page = alloc_page(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5765) if (!reply_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5766) __free_page(req_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5767) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5770) p = page_address(req_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5771) ceph_encode_64(&p, rbd_dev->spec->snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5772) ret = __get_parent_info(rbd_dev, req_page, reply_page, pii);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5773) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5774) ret = __get_parent_info_legacy(rbd_dev, req_page, reply_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5775) pii);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5777) __free_page(req_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5778) __free_page(reply_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5779) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5780) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5782) static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5783) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5784) struct rbd_spec *parent_spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5785) struct parent_image_info pii = { 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5786) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5788) parent_spec = rbd_spec_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5789) if (!parent_spec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5790) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5792) ret = get_parent_info(rbd_dev, &pii);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5793) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5794) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5796) dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5797) __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5798) pii.has_overlap, pii.overlap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5800) if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5801) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5802) * Either the parent never existed, or we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5803) * record of it but the image got flattened so it no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5804) * longer has a parent. When the parent of a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5805) * layered image disappears we immediately set the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5806) * overlap to 0. The effect of this is that all new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5807) * requests will be treated as if the image had no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5808) * parent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5809) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5810) * If !pii.has_overlap, the parent image spec is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5811) * applicable. It's there to avoid duplication in each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5812) * snapshot record.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5813) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5814) if (rbd_dev->parent_overlap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5815) rbd_dev->parent_overlap = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5816) rbd_dev_parent_put(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5817) pr_info("%s: clone image has been flattened\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5818) rbd_dev->disk->disk_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5819) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5821) goto out; /* No parent? No problem. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5822) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5824) /* The ceph file layout needs to fit pool id in 32 bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5826) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5827) if (pii.pool_id > (u64)U32_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5828) rbd_warn(NULL, "parent pool id too large (%llu > %u)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5829) (unsigned long long)pii.pool_id, U32_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5830) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5833) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5834) * The parent won't change (except when the clone is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5835) * flattened, already handled that). So we only need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5836) * record the parent spec we have not already done so.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5837) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5838) if (!rbd_dev->parent_spec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5839) parent_spec->pool_id = pii.pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5840) if (pii.pool_ns && *pii.pool_ns) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5841) parent_spec->pool_ns = pii.pool_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5842) pii.pool_ns = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5844) parent_spec->image_id = pii.image_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5845) pii.image_id = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5846) parent_spec->snap_id = pii.snap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5848) rbd_dev->parent_spec = parent_spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5849) parent_spec = NULL; /* rbd_dev now owns this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5852) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5853) * We always update the parent overlap. If it's zero we issue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5854) * a warning, as we will proceed as if there was no parent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5855) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5856) if (!pii.overlap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5857) if (parent_spec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5858) /* refresh, careful to warn just once */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5859) if (rbd_dev->parent_overlap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5860) rbd_warn(rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5861) "clone now standalone (overlap became 0)");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5862) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5863) /* initial probe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5864) rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5867) rbd_dev->parent_overlap = pii.overlap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5869) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5870) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5871) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5872) kfree(pii.pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5873) kfree(pii.image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5874) rbd_spec_put(parent_spec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5875) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5878) static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5879) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5880) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5881) __le64 stripe_unit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5882) __le64 stripe_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5883) } __attribute__ ((packed)) striping_info_buf = { 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5884) size_t size = sizeof (striping_info_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5885) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5886) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5888) ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5889) &rbd_dev->header_oloc, "get_stripe_unit_count",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5890) NULL, 0, &striping_info_buf, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5891) dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5892) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5893) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5894) if (ret < size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5895) return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5897) p = &striping_info_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5898) rbd_dev->header.stripe_unit = ceph_decode_64(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5899) rbd_dev->header.stripe_count = ceph_decode_64(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5900) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5903) static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5905) __le64 data_pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5906) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5908) ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5909) &rbd_dev->header_oloc, "get_data_pool",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5910) NULL, 0, &data_pool_id, sizeof(data_pool_id));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5911) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5912) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5913) if (ret < sizeof(data_pool_id))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5914) return -EBADMSG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5916) rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5917) WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5918) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5919) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5921) static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5922) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5923) CEPH_DEFINE_OID_ONSTACK(oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5924) size_t image_id_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5925) char *image_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5926) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5927) void *end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5928) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5929) void *reply_buf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5930) size_t len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5931) char *image_name = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5932) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5934) rbd_assert(!rbd_dev->spec->image_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5936) len = strlen(rbd_dev->spec->image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5937) image_id_size = sizeof (__le32) + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5938) image_id = kmalloc(image_id_size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5939) if (!image_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5940) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5942) p = image_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5943) end = image_id + image_id_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5944) ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32)len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5946) size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5947) reply_buf = kmalloc(size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5948) if (!reply_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5949) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5951) ceph_oid_printf(&oid, "%s", RBD_DIRECTORY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5952) ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5953) "dir_get_name", image_id, image_id_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5954) reply_buf, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5955) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5956) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5957) p = reply_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5958) end = reply_buf + ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5960) image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5961) if (IS_ERR(image_name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5962) image_name = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5963) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5964) dout("%s: name is %s len is %zd\n", __func__, image_name, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5965) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5966) kfree(reply_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5967) kfree(image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5969) return image_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5970) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5972) static u64 rbd_v1_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5973) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5974) struct ceph_snap_context *snapc = rbd_dev->header.snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5975) const char *snap_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5976) u32 which = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5978) /* Skip over names until we find the one we are looking for */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5980) snap_name = rbd_dev->header.snap_names;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5981) while (which < snapc->num_snaps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5982) if (!strcmp(name, snap_name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5983) return snapc->snaps[which];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5984) snap_name += strlen(snap_name) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5985) which++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5987) return CEPH_NOSNAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5988) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5990) static u64 rbd_v2_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5991) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5992) struct ceph_snap_context *snapc = rbd_dev->header.snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5993) u32 which;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5994) bool found = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5995) u64 snap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5997) for (which = 0; !found && which < snapc->num_snaps; which++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5998) const char *snap_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6000) snap_id = snapc->snaps[which];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6001) snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6002) if (IS_ERR(snap_name)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6003) /* ignore no-longer existing snapshots */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6004) if (PTR_ERR(snap_name) == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6005) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6006) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6007) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6008) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6009) found = !strcmp(name, snap_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6010) kfree(snap_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6011) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6012) return found ? snap_id : CEPH_NOSNAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6013) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6015) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6016) * Assumes name is never RBD_SNAP_HEAD_NAME; returns CEPH_NOSNAP if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6017) * no snapshot by that name is found, or if an error occurs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6018) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6019) static u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6020) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6021) if (rbd_dev->image_format == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6022) return rbd_v1_snap_id_by_name(rbd_dev, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6024) return rbd_v2_snap_id_by_name(rbd_dev, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6026)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6027) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6028) * An image being mapped will have everything but the snap id.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6029) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6030) static int rbd_spec_fill_snap_id(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6031) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6032) struct rbd_spec *spec = rbd_dev->spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6034) rbd_assert(spec->pool_id != CEPH_NOPOOL && spec->pool_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6035) rbd_assert(spec->image_id && spec->image_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6036) rbd_assert(spec->snap_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6038) if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6039) u64 snap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6041) snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6042) if (snap_id == CEPH_NOSNAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6043) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6045) spec->snap_id = snap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6046) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6047) spec->snap_id = CEPH_NOSNAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6050) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6051) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6053) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6054) * A parent image will have all ids but none of the names.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6055) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6056) * All names in an rbd spec are dynamically allocated. It's OK if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6057) * can't figure out the name for an image id.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6058) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6059) static int rbd_spec_fill_names(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6060) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6061) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6062) struct rbd_spec *spec = rbd_dev->spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6063) const char *pool_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6064) const char *image_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6065) const char *snap_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6066) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6068) rbd_assert(spec->pool_id != CEPH_NOPOOL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6069) rbd_assert(spec->image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6070) rbd_assert(spec->snap_id != CEPH_NOSNAP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6071)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6072) /* Get the pool name; we have to make our own copy of this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6074) pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, spec->pool_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6075) if (!pool_name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6076) rbd_warn(rbd_dev, "no pool with id %llu", spec->pool_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6077) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6078) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6079) pool_name = kstrdup(pool_name, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6080) if (!pool_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6081) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6083) /* Fetch the image name; tolerate failure here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6085) image_name = rbd_dev_image_name(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6086) if (!image_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6087) rbd_warn(rbd_dev, "unable to get image name");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6089) /* Fetch the snapshot name */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6091) snap_name = rbd_snap_name(rbd_dev, spec->snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6092) if (IS_ERR(snap_name)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6093) ret = PTR_ERR(snap_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6094) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6095) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6097) spec->pool_name = pool_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6098) spec->image_name = image_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6099) spec->snap_name = snap_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6101) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6103) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6104) kfree(image_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6105) kfree(pool_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6106) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6109) static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6111) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6112) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6113) void *reply_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6114) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6115) void *end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6116) u64 seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6117) u32 snap_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6118) struct ceph_snap_context *snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6119) u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6121) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6122) * We'll need room for the seq value (maximum snapshot id),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6123) * snapshot count, and array of that many snapshot ids.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6124) * For now we have a fixed upper limit on the number we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6125) * prepared to receive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6126) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6127) size = sizeof (__le64) + sizeof (__le32) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6128) RBD_MAX_SNAP_COUNT * sizeof (__le64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6129) reply_buf = kzalloc(size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6130) if (!reply_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6131) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6133) ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6134) &rbd_dev->header_oloc, "get_snapcontext",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6135) NULL, 0, reply_buf, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6136) dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6137) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6138) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6140) p = reply_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6141) end = reply_buf + ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6142) ret = -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6143) ceph_decode_64_safe(&p, end, seq, out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6144) ceph_decode_32_safe(&p, end, snap_count, out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6146) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6147) * Make sure the reported number of snapshot ids wouldn't go
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6148) * beyond the end of our buffer. But before checking that,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6149) * make sure the computed size of the snapshot context we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6150) * allocate is representable in a size_t.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6151) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6152) if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6153) / sizeof (u64)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6154) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6155) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6157) if (!ceph_has_room(&p, end, snap_count * sizeof (__le64)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6158) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6159) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6161) snapc = ceph_create_snap_context(snap_count, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6162) if (!snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6163) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6164) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6166) snapc->seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6167) for (i = 0; i < snap_count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6168) snapc->snaps[i] = ceph_decode_64(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6170) ceph_put_snap_context(rbd_dev->header.snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6171) rbd_dev->header.snapc = snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6173) dout(" snap context seq = %llu, snap_count = %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6174) (unsigned long long)seq, (unsigned int)snap_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6175) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6176) kfree(reply_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6178) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6181) static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6182) u64 snap_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6183) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6184) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6185) void *reply_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6186) __le64 snapid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6187) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6188) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6189) void *end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6190) char *snap_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6192) size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6193) reply_buf = kmalloc(size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6194) if (!reply_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6195) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6197) snapid = cpu_to_le64(snap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6198) ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6199) &rbd_dev->header_oloc, "get_snapshot_name",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6200) &snapid, sizeof(snapid), reply_buf, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6201) dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6202) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6203) snap_name = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6204) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6207) p = reply_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6208) end = reply_buf + ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6209) snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6210) if (IS_ERR(snap_name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6211) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6213) dout(" snap_id 0x%016llx snap_name = %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6214) (unsigned long long)snap_id, snap_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6215) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6216) kfree(reply_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6218) return snap_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6221) static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6222) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6223) bool first_time = rbd_dev->header.object_prefix == NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6224) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6226) ret = rbd_dev_v2_image_size(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6227) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6228) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6230) if (first_time) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6231) ret = rbd_dev_v2_header_onetime(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6232) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6233) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6236) ret = rbd_dev_v2_snap_context(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6237) if (ret && first_time) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6238) kfree(rbd_dev->header.object_prefix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6239) rbd_dev->header.object_prefix = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6242) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6245) static int rbd_dev_header_info(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6247) rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6249) if (rbd_dev->image_format == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6250) return rbd_dev_v1_header_info(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6252) return rbd_dev_v2_header_info(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6255) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6256) * Skips over white space at *buf, and updates *buf to point to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6257) * first found non-space character (if any). Returns the length of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6258) * the token (string of non-white space characters) found. Note
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6259) * that *buf must be terminated with '\0'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6260) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6261) static inline size_t next_token(const char **buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6263) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6264) * These are the characters that produce nonzero for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6265) * isspace() in the "C" and "POSIX" locales.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6266) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6267) const char *spaces = " \f\n\r\t\v";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6269) *buf += strspn(*buf, spaces); /* Find start of token */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6271) return strcspn(*buf, spaces); /* Return token length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6274) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6275) * Finds the next token in *buf, dynamically allocates a buffer big
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6276) * enough to hold a copy of it, and copies the token into the new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6277) * buffer. The copy is guaranteed to be terminated with '\0'. Note
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6278) * that a duplicate buffer is created even for a zero-length token.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6279) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6280) * Returns a pointer to the newly-allocated duplicate, or a null
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6281) * pointer if memory for the duplicate was not available. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6282) * the lenp argument is a non-null pointer, the length of the token
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6283) * (not including the '\0') is returned in *lenp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6284) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6285) * If successful, the *buf pointer will be updated to point beyond
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6286) * the end of the found token.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6287) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6288) * Note: uses GFP_KERNEL for allocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6289) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6290) static inline char *dup_token(const char **buf, size_t *lenp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6292) char *dup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6293) size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6295) len = next_token(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6296) dup = kmemdup(*buf, len + 1, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6297) if (!dup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6298) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6299) *(dup + len) = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6300) *buf += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6302) if (lenp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6303) *lenp = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6305) return dup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6308) static int rbd_parse_param(struct fs_parameter *param,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6309) struct rbd_parse_opts_ctx *pctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6310) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6311) struct rbd_options *opt = pctx->opts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6312) struct fs_parse_result result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6313) struct p_log log = {.prefix = "rbd"};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6314) int token, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6316) ret = ceph_parse_param(param, pctx->copts, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6317) if (ret != -ENOPARAM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6318) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6320) token = __fs_parse(&log, rbd_parameters, param, &result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6321) dout("%s fs_parse '%s' token %d\n", __func__, param->key, token);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6322) if (token < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6323) if (token == -ENOPARAM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6324) return inval_plog(&log, "Unknown parameter '%s'",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6325) param->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6326) return token;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6329) switch (token) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6330) case Opt_queue_depth:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6331) if (result.uint_32 < 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6332) goto out_of_range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6333) opt->queue_depth = result.uint_32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6334) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6335) case Opt_alloc_size:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6336) if (result.uint_32 < SECTOR_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6337) goto out_of_range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6338) if (!is_power_of_2(result.uint_32))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6339) return inval_plog(&log, "alloc_size must be a power of 2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6340) opt->alloc_size = result.uint_32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6341) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6342) case Opt_lock_timeout:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6343) /* 0 is "wait forever" (i.e. infinite timeout) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6344) if (result.uint_32 > INT_MAX / 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6345) goto out_of_range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6346) opt->lock_timeout = msecs_to_jiffies(result.uint_32 * 1000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6347) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6348) case Opt_pool_ns:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6349) kfree(pctx->spec->pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6350) pctx->spec->pool_ns = param->string;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6351) param->string = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6352) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6353) case Opt_compression_hint:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6354) switch (result.uint_32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6355) case Opt_compression_hint_none:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6356) opt->alloc_hint_flags &=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6357) ~(CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6358) CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6359) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6360) case Opt_compression_hint_compressible:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6361) opt->alloc_hint_flags |=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6362) CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6363) opt->alloc_hint_flags &=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6364) ~CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6365) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6366) case Opt_compression_hint_incompressible:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6367) opt->alloc_hint_flags |=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6368) CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6369) opt->alloc_hint_flags &=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6370) ~CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6371) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6372) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6373) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6375) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6376) case Opt_read_only:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6377) opt->read_only = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6378) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6379) case Opt_read_write:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6380) opt->read_only = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6381) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6382) case Opt_lock_on_read:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6383) opt->lock_on_read = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6384) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6385) case Opt_exclusive:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6386) opt->exclusive = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6387) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6388) case Opt_notrim:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6389) opt->trim = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6390) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6391) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6392) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6395) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6397) out_of_range:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6398) return inval_plog(&log, "%s out of range", param->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6401) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6402) * This duplicates most of generic_parse_monolithic(), untying it from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6403) * fs_context and skipping standard superblock and security options.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6404) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6405) static int rbd_parse_options(char *options, struct rbd_parse_opts_ctx *pctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6406) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6407) char *key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6408) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6410) dout("%s '%s'\n", __func__, options);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6411) while ((key = strsep(&options, ",")) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6412) if (*key) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6413) struct fs_parameter param = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6414) .key = key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6415) .type = fs_value_is_flag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6416) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6417) char *value = strchr(key, '=');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6418) size_t v_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6420) if (value) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6421) if (value == key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6422) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6423) *value++ = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6424) v_len = strlen(value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6425) param.string = kmemdup_nul(value, v_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6426) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6427) if (!param.string)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6428) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6429) param.type = fs_value_is_string;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6431) param.size = v_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6433) ret = rbd_parse_param(¶m, pctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6434) kfree(param.string);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6435) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6436) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6440) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6443) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6444) * Parse the options provided for an "rbd add" (i.e., rbd image
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6445) * mapping) request. These arrive via a write to /sys/bus/rbd/add,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6446) * and the data written is passed here via a NUL-terminated buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6447) * Returns 0 if successful or an error code otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6448) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6449) * The information extracted from these options is recorded in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6450) * the other parameters which return dynamically-allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6451) * structures:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6452) * ceph_opts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6453) * The address of a pointer that will refer to a ceph options
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6454) * structure. Caller must release the returned pointer using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6455) * ceph_destroy_options() when it is no longer needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6456) * rbd_opts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6457) * Address of an rbd options pointer. Fully initialized by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6458) * this function; caller must release with kfree().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6459) * spec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6460) * Address of an rbd image specification pointer. Fully
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6461) * initialized by this function based on parsed options.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6462) * Caller must release with rbd_spec_put().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6463) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6464) * The options passed take this form:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6465) * <mon_addrs> <options> <pool_name> <image_name> [<snap_id>]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6466) * where:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6467) * <mon_addrs>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6468) * A comma-separated list of one or more monitor addresses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6469) * A monitor address is an ip address, optionally followed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6470) * by a port number (separated by a colon).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6471) * I.e.: ip1[:port1][,ip2[:port2]...]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6472) * <options>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6473) * A comma-separated list of ceph and/or rbd options.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6474) * <pool_name>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6475) * The name of the rados pool containing the rbd image.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6476) * <image_name>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6477) * The name of the image in that pool to map.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6478) * <snap_id>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6479) * An optional snapshot id. If provided, the mapping will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6480) * present data from the image at the time that snapshot was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6481) * created. The image head is used if no snapshot id is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6482) * provided. Snapshot mappings are always read-only.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6483) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6484) static int rbd_add_parse_args(const char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6485) struct ceph_options **ceph_opts,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6486) struct rbd_options **opts,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6487) struct rbd_spec **rbd_spec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6488) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6489) size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6490) char *options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6491) const char *mon_addrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6492) char *snap_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6493) size_t mon_addrs_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6494) struct rbd_parse_opts_ctx pctx = { 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6495) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6497) /* The first four tokens are required */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6499) len = next_token(&buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6500) if (!len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6501) rbd_warn(NULL, "no monitor address(es) provided");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6502) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6504) mon_addrs = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6505) mon_addrs_size = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6506) buf += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6508) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6509) options = dup_token(&buf, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6510) if (!options)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6511) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6512) if (!*options) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6513) rbd_warn(NULL, "no options provided");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6514) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6517) pctx.spec = rbd_spec_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6518) if (!pctx.spec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6519) goto out_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6521) pctx.spec->pool_name = dup_token(&buf, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6522) if (!pctx.spec->pool_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6523) goto out_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6524) if (!*pctx.spec->pool_name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6525) rbd_warn(NULL, "no pool name provided");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6526) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6529) pctx.spec->image_name = dup_token(&buf, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6530) if (!pctx.spec->image_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6531) goto out_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6532) if (!*pctx.spec->image_name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6533) rbd_warn(NULL, "no image name provided");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6534) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6537) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6538) * Snapshot name is optional; default is to use "-"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6539) * (indicating the head/no snapshot).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6540) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6541) len = next_token(&buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6542) if (!len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6543) buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6544) len = sizeof (RBD_SNAP_HEAD_NAME) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6545) } else if (len > RBD_MAX_SNAP_NAME_LEN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6546) ret = -ENAMETOOLONG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6547) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6549) snap_name = kmemdup(buf, len + 1, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6550) if (!snap_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6551) goto out_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6552) *(snap_name + len) = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6553) pctx.spec->snap_name = snap_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6555) pctx.copts = ceph_alloc_options();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6556) if (!pctx.copts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6557) goto out_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6559) /* Initialize all rbd options to the defaults */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6561) pctx.opts = kzalloc(sizeof(*pctx.opts), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6562) if (!pctx.opts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6563) goto out_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6565) pctx.opts->read_only = RBD_READ_ONLY_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6566) pctx.opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6567) pctx.opts->alloc_size = RBD_ALLOC_SIZE_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6568) pctx.opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6569) pctx.opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6570) pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6571) pctx.opts->trim = RBD_TRIM_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6573) ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6574) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6575) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6577) ret = rbd_parse_options(options, &pctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6578) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6579) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6581) *ceph_opts = pctx.copts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6582) *opts = pctx.opts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6583) *rbd_spec = pctx.spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6584) kfree(options);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6585) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6587) out_mem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6588) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6589) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6590) kfree(pctx.opts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6591) ceph_destroy_options(pctx.copts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6592) rbd_spec_put(pctx.spec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6593) kfree(options);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6594) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6597) static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6598) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6599) down_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6600) if (__rbd_is_lock_owner(rbd_dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6601) __rbd_release_lock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6602) up_write(&rbd_dev->lock_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6605) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6606) * If the wait is interrupted, an error is returned even if the lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6607) * was successfully acquired. rbd_dev_image_unlock() will release it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6608) * if needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6609) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6610) static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6611) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6612) long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6614) if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6615) if (!rbd_dev->opts->exclusive && !rbd_dev->opts->lock_on_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6616) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6618) rbd_warn(rbd_dev, "exclusive-lock feature is not enabled");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6619) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6622) if (rbd_is_ro(rbd_dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6623) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6625) rbd_assert(!rbd_is_lock_owner(rbd_dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6626) queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6627) ret = wait_for_completion_killable_timeout(&rbd_dev->acquire_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6628) ceph_timeout_jiffies(rbd_dev->opts->lock_timeout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6629) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6630) ret = rbd_dev->acquire_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6631) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6632) cancel_delayed_work_sync(&rbd_dev->lock_dwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6633) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6634) ret = -ETIMEDOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6637) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6638) rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6639) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6642) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6643) * The lock may have been released by now, unless automatic lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6644) * transitions are disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6645) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6646) rbd_assert(!rbd_dev->opts->exclusive || rbd_is_lock_owner(rbd_dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6647) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6650) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6651) * An rbd format 2 image has a unique identifier, distinct from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6652) * name given to it by the user. Internally, that identifier is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6653) * what's used to specify the names of objects related to the image.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6654) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6655) * A special "rbd id" object is used to map an rbd image name to its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6656) * id. If that object doesn't exist, then there is no v2 rbd image
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6657) * with the supplied name.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6658) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6659) * This function will record the given rbd_dev's image_id field if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6660) * it can be determined, and in that case will return 0. If any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6661) * errors occur a negative errno will be returned and the rbd_dev's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6662) * image_id field will be unchanged (and should be NULL).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6663) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6664) static int rbd_dev_image_id(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6665) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6666) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6667) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6668) CEPH_DEFINE_OID_ONSTACK(oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6669) void *response;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6670) char *image_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6672) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6673) * When probing a parent image, the image id is already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6674) * known (and the image name likely is not). There's no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6675) * need to fetch the image id again in this case. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6676) * do still need to set the image format though.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6677) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6678) if (rbd_dev->spec->image_id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6679) rbd_dev->image_format = *rbd_dev->spec->image_id ? 2 : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6681) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6684) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6685) * First, see if the format 2 image id file exists, and if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6686) * so, get the image's persistent id from it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6687) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6688) ret = ceph_oid_aprintf(&oid, GFP_KERNEL, "%s%s", RBD_ID_PREFIX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6689) rbd_dev->spec->image_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6690) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6691) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6693) dout("rbd id object name is %s\n", oid.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6695) /* Response will be an encoded string, which includes a length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6696) size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6697) response = kzalloc(size, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6698) if (!response) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6699) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6700) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6703) /* If it doesn't exist we'll assume it's a format 1 image */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6705) ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6706) "get_id", NULL, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6707) response, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6708) dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6709) if (ret == -ENOENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6710) image_id = kstrdup("", GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6711) ret = image_id ? 0 : -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6712) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6713) rbd_dev->image_format = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6714) } else if (ret >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6715) void *p = response;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6717) image_id = ceph_extract_encoded_string(&p, p + ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6718) NULL, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6719) ret = PTR_ERR_OR_ZERO(image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6720) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6721) rbd_dev->image_format = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6724) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6725) rbd_dev->spec->image_id = image_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6726) dout("image_id is %s\n", image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6728) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6729) kfree(response);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6730) ceph_oid_destroy(&oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6731) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6734) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6735) * Undo whatever state changes are made by v1 or v2 header info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6736) * call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6737) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6738) static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6739) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6740) struct rbd_image_header *header;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6742) rbd_dev_parent_put(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6743) rbd_object_map_free(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6744) rbd_dev_mapping_clear(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6746) /* Free dynamic fields from the header, then zero it out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6748) header = &rbd_dev->header;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6749) ceph_put_snap_context(header->snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6750) kfree(header->snap_sizes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6751) kfree(header->snap_names);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6752) kfree(header->object_prefix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6753) memset(header, 0, sizeof (*header));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6756) static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6757) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6758) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6760) ret = rbd_dev_v2_object_prefix(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6761) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6762) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6764) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6765) * Get the and check features for the image. Currently the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6766) * features are assumed to never change.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6767) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6768) ret = rbd_dev_v2_features(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6769) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6770) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6772) /* If the image supports fancy striping, get its parameters */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6774) if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6775) ret = rbd_dev_v2_striping_info(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6776) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6777) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6780) if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6781) ret = rbd_dev_v2_data_pool(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6782) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6783) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6784) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6786) rbd_init_layout(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6787) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6789) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6790) rbd_dev->header.features = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6791) kfree(rbd_dev->header.object_prefix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6792) rbd_dev->header.object_prefix = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6793) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6794) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6796) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6797) * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() ->
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6798) * rbd_dev_image_probe() recursion depth, which means it's also the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6799) * length of the already discovered part of the parent chain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6800) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6801) static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6802) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6803) struct rbd_device *parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6804) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6806) if (!rbd_dev->parent_spec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6807) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6809) if (++depth > RBD_MAX_PARENT_CHAIN_LEN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6810) pr_info("parent chain is too long (%d)\n", depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6811) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6812) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6813) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6815) parent = __rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6816) if (!parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6817) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6818) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6819) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6821) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6822) * Images related by parent/child relationships always share
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6823) * rbd_client and spec/parent_spec, so bump their refcounts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6824) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6825) __rbd_get_client(rbd_dev->rbd_client);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6826) rbd_spec_get(rbd_dev->parent_spec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6828) __set_bit(RBD_DEV_FLAG_READONLY, &parent->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6830) ret = rbd_dev_image_probe(parent, depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6831) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6832) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6834) rbd_dev->parent = parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6835) atomic_set(&rbd_dev->parent_ref, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6836) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6838) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6839) rbd_dev_unparent(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6840) rbd_dev_destroy(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6841) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6842) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6844) static void rbd_dev_device_release(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6845) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6846) clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6847) rbd_free_disk(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6848) if (!single_major)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6849) unregister_blkdev(rbd_dev->major, rbd_dev->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6852) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6853) * rbd_dev->header_rwsem must be locked for write and will be unlocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6854) * upon return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6855) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6856) static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6857) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6858) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6860) /* Record our major and minor device numbers. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6862) if (!single_major) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6863) ret = register_blkdev(0, rbd_dev->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6864) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6865) goto err_out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6867) rbd_dev->major = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6868) rbd_dev->minor = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6869) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6870) rbd_dev->major = rbd_major;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6871) rbd_dev->minor = rbd_dev_id_to_minor(rbd_dev->dev_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6872) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6874) /* Set up the blkdev mapping. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6876) ret = rbd_init_disk(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6877) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6878) goto err_out_blkdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6880) set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6881) set_disk_ro(rbd_dev->disk, rbd_is_ro(rbd_dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6883) ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6884) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6885) goto err_out_disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6887) set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6888) up_write(&rbd_dev->header_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6889) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6891) err_out_disk:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6892) rbd_free_disk(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6893) err_out_blkdev:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6894) if (!single_major)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6895) unregister_blkdev(rbd_dev->major, rbd_dev->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6896) err_out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6897) up_write(&rbd_dev->header_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6898) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6899) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6901) static int rbd_dev_header_name(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6902) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6903) struct rbd_spec *spec = rbd_dev->spec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6904) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6906) /* Record the header object name for this rbd image. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6908) rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6909) if (rbd_dev->image_format == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6910) ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6911) spec->image_name, RBD_SUFFIX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6912) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6913) ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6914) RBD_HEADER_PREFIX, spec->image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6916) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6919) static void rbd_print_dne(struct rbd_device *rbd_dev, bool is_snap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6921) if (!is_snap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6922) pr_info("image %s/%s%s%s does not exist\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6923) rbd_dev->spec->pool_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6924) rbd_dev->spec->pool_ns ?: "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6925) rbd_dev->spec->pool_ns ? "/" : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6926) rbd_dev->spec->image_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6927) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6928) pr_info("snap %s/%s%s%s@%s does not exist\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6929) rbd_dev->spec->pool_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6930) rbd_dev->spec->pool_ns ?: "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6931) rbd_dev->spec->pool_ns ? "/" : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6932) rbd_dev->spec->image_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6933) rbd_dev->spec->snap_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6937) static void rbd_dev_image_release(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6938) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6939) if (!rbd_is_ro(rbd_dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6940) rbd_unregister_watch(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6942) rbd_dev_unprobe(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6943) rbd_dev->image_format = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6944) kfree(rbd_dev->spec->image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6945) rbd_dev->spec->image_id = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6948) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6949) * Probe for the existence of the header object for the given rbd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6950) * device. If this image is the one being mapped (i.e., not a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6951) * parent), initiate a watch on its header object before using that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6952) * object to get detailed information about the rbd image.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6953) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6954) * On success, returns with header_rwsem held for write if called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6955) * with @depth == 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6956) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6957) static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6958) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6959) bool need_watch = !rbd_is_ro(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6960) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6962) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6963) * Get the id from the image id object. Unless there's an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6964) * error, rbd_dev->spec->image_id will be filled in with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6965) * a dynamically-allocated string, and rbd_dev->image_format
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6966) * will be set to either 1 or 2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6967) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6968) ret = rbd_dev_image_id(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6969) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6970) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6972) ret = rbd_dev_header_name(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6973) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6974) goto err_out_format;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6976) if (need_watch) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6977) ret = rbd_register_watch(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6978) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6979) if (ret == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6980) rbd_print_dne(rbd_dev, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6981) goto err_out_format;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6982) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6985) if (!depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6986) down_write(&rbd_dev->header_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6988) ret = rbd_dev_header_info(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6989) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6990) if (ret == -ENOENT && !need_watch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6991) rbd_print_dne(rbd_dev, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6992) goto err_out_probe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6993) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6995) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6996) * If this image is the one being mapped, we have pool name and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6997) * id, image name and id, and snap name - need to fill snap id.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6998) * Otherwise this is a parent image, identified by pool, image
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6999) * and snap ids - need to fill in names for those ids.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7000) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7001) if (!depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7002) ret = rbd_spec_fill_snap_id(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7003) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7004) ret = rbd_spec_fill_names(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7005) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7006) if (ret == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7007) rbd_print_dne(rbd_dev, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7008) goto err_out_probe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7011) ret = rbd_dev_mapping_set(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7012) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7013) goto err_out_probe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7015) if (rbd_is_snap(rbd_dev) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7016) (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7017) ret = rbd_object_map_load(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7018) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7019) goto err_out_probe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7022) if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7023) ret = rbd_dev_v2_parent_info(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7024) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7025) goto err_out_probe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7028) ret = rbd_dev_probe_parent(rbd_dev, depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7029) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7030) goto err_out_probe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7032) dout("discovered format %u image, header name is %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7033) rbd_dev->image_format, rbd_dev->header_oid.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7034) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7036) err_out_probe:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7037) if (!depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7038) up_write(&rbd_dev->header_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7039) if (need_watch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7040) rbd_unregister_watch(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7041) rbd_dev_unprobe(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7042) err_out_format:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7043) rbd_dev->image_format = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7044) kfree(rbd_dev->spec->image_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7045) rbd_dev->spec->image_id = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7046) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7047) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7048)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7049) static ssize_t do_rbd_add(struct bus_type *bus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7050) const char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7051) size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7052) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7053) struct rbd_device *rbd_dev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7054) struct ceph_options *ceph_opts = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7055) struct rbd_options *rbd_opts = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7056) struct rbd_spec *spec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7057) struct rbd_client *rbdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7058) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7060) if (!capable(CAP_SYS_ADMIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7061) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7063) if (!try_module_get(THIS_MODULE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7064) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7065)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7066) /* parse add command */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7067) rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7068) if (rc < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7069) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7071) rbdc = rbd_get_client(ceph_opts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7072) if (IS_ERR(rbdc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7073) rc = PTR_ERR(rbdc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7074) goto err_out_args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7075) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7077) /* pick the pool */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7078) rc = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, spec->pool_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7079) if (rc < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7080) if (rc == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7081) pr_info("pool %s does not exist\n", spec->pool_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7082) goto err_out_client;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7084) spec->pool_id = (u64)rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7086) rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7087) if (!rbd_dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7088) rc = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7089) goto err_out_client;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7090) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7091) rbdc = NULL; /* rbd_dev now owns this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7092) spec = NULL; /* rbd_dev now owns this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7093) rbd_opts = NULL; /* rbd_dev now owns this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7094)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7095) /* if we are mapping a snapshot it will be a read-only mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7096) if (rbd_dev->opts->read_only ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7097) strcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7098) __set_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7100) rbd_dev->config_info = kstrdup(buf, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7101) if (!rbd_dev->config_info) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7102) rc = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7103) goto err_out_rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7106) rc = rbd_dev_image_probe(rbd_dev, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7107) if (rc < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7108) goto err_out_rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7110) if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7111) rbd_warn(rbd_dev, "alloc_size adjusted to %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7112) rbd_dev->layout.object_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7113) rbd_dev->opts->alloc_size = rbd_dev->layout.object_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7116) rc = rbd_dev_device_setup(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7117) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7118) goto err_out_image_probe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7120) rc = rbd_add_acquire_lock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7121) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7122) goto err_out_image_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7124) /* Everything's ready. Announce the disk to the world. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7126) rc = device_add(&rbd_dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7127) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7128) goto err_out_image_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7130) device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7131) /* see rbd_init_disk() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7132) blk_put_queue(rbd_dev->disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7134) spin_lock(&rbd_dev_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7135) list_add_tail(&rbd_dev->node, &rbd_dev_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7136) spin_unlock(&rbd_dev_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7138) pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7139) (unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7140) rbd_dev->header.features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7141) rc = count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7142) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7143) module_put(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7144) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7146) err_out_image_lock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7147) rbd_dev_image_unlock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7148) rbd_dev_device_release(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7149) err_out_image_probe:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7150) rbd_dev_image_release(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7151) err_out_rbd_dev:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7152) rbd_dev_destroy(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7153) err_out_client:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7154) rbd_put_client(rbdc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7155) err_out_args:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7156) rbd_spec_put(spec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7157) kfree(rbd_opts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7158) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7161) static ssize_t add_store(struct bus_type *bus, const char *buf, size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7162) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7163) if (single_major)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7164) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7166) return do_rbd_add(bus, buf, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7169) static ssize_t add_single_major_store(struct bus_type *bus, const char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7170) size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7172) return do_rbd_add(bus, buf, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7173) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7175) static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7176) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7177) while (rbd_dev->parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7178) struct rbd_device *first = rbd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7179) struct rbd_device *second = first->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7180) struct rbd_device *third;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7182) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7183) * Follow to the parent with no grandparent and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7184) * remove it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7185) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7186) while (second && (third = second->parent)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7187) first = second;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7188) second = third;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7190) rbd_assert(second);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7191) rbd_dev_image_release(second);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7192) rbd_dev_destroy(second);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7193) first->parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7194) first->parent_overlap = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7196) rbd_assert(first->parent_spec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7197) rbd_spec_put(first->parent_spec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7198) first->parent_spec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7202) static ssize_t do_rbd_remove(struct bus_type *bus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7203) const char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7204) size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7205) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7206) struct rbd_device *rbd_dev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7207) struct list_head *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7208) int dev_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7209) char opt_buf[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7210) bool force = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7211) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7213) if (!capable(CAP_SYS_ADMIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7214) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7216) dev_id = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7217) opt_buf[0] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7218) sscanf(buf, "%d %5s", &dev_id, opt_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7219) if (dev_id < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7220) pr_err("dev_id out of range\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7221) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7223) if (opt_buf[0] != '\0') {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7224) if (!strcmp(opt_buf, "force")) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7225) force = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7226) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7227) pr_err("bad remove option at '%s'\n", opt_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7228) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7232) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7233) spin_lock(&rbd_dev_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7234) list_for_each(tmp, &rbd_dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7235) rbd_dev = list_entry(tmp, struct rbd_device, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7236) if (rbd_dev->dev_id == dev_id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7237) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7238) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7241) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7242) spin_lock_irq(&rbd_dev->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7243) if (rbd_dev->open_count && !force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7244) ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7245) else if (test_and_set_bit(RBD_DEV_FLAG_REMOVING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7246) &rbd_dev->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7247) ret = -EINPROGRESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7248) spin_unlock_irq(&rbd_dev->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7250) spin_unlock(&rbd_dev_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7251) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7252) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7254) if (force) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7255) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7256) * Prevent new IO from being queued and wait for existing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7257) * IO to complete/fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7258) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7259) blk_mq_freeze_queue(rbd_dev->disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7260) blk_set_queue_dying(rbd_dev->disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7263) del_gendisk(rbd_dev->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7264) spin_lock(&rbd_dev_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7265) list_del_init(&rbd_dev->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7266) spin_unlock(&rbd_dev_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7267) device_del(&rbd_dev->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7269) rbd_dev_image_unlock(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7270) rbd_dev_device_release(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7271) rbd_dev_image_release(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7272) rbd_dev_destroy(rbd_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7273) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7276) static ssize_t remove_store(struct bus_type *bus, const char *buf, size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7277) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7278) if (single_major)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7279) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7281) return do_rbd_remove(bus, buf, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7284) static ssize_t remove_single_major_store(struct bus_type *bus, const char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7285) size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7287) return do_rbd_remove(bus, buf, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7290) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7291) * create control files in sysfs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7292) * /sys/bus/rbd/...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7293) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7294) static int __init rbd_sysfs_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7296) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7298) ret = device_register(&rbd_root_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7299) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7300) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7302) ret = bus_register(&rbd_bus_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7303) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7304) device_unregister(&rbd_root_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7306) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7309) static void __exit rbd_sysfs_cleanup(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7310) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7311) bus_unregister(&rbd_bus_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7312) device_unregister(&rbd_root_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7315) static int __init rbd_slab_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7317) rbd_assert(!rbd_img_request_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7318) rbd_img_request_cache = KMEM_CACHE(rbd_img_request, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7319) if (!rbd_img_request_cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7320) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7322) rbd_assert(!rbd_obj_request_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7323) rbd_obj_request_cache = KMEM_CACHE(rbd_obj_request, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7324) if (!rbd_obj_request_cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7325) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7327) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7329) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7330) kmem_cache_destroy(rbd_img_request_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7331) rbd_img_request_cache = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7332) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7335) static void rbd_slab_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7337) rbd_assert(rbd_obj_request_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7338) kmem_cache_destroy(rbd_obj_request_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7339) rbd_obj_request_cache = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7341) rbd_assert(rbd_img_request_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7342) kmem_cache_destroy(rbd_img_request_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7343) rbd_img_request_cache = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7346) static int __init rbd_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7348) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7350) if (!libceph_compatible(NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7351) rbd_warn(NULL, "libceph incompatibility (quitting)");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7352) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7353) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7355) rc = rbd_slab_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7356) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7357) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7359) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7360) * The number of active work items is limited by the number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7361) * rbd devices * queue depth, so leave @max_active at default.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7362) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7363) rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7364) if (!rbd_wq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7365) rc = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7366) goto err_out_slab;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7369) if (single_major) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7370) rbd_major = register_blkdev(0, RBD_DRV_NAME);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7371) if (rbd_major < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7372) rc = rbd_major;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7373) goto err_out_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7377) rc = rbd_sysfs_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7378) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7379) goto err_out_blkdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7381) if (single_major)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7382) pr_info("loaded (major %d)\n", rbd_major);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7383) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7384) pr_info("loaded\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7386) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7388) err_out_blkdev:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7389) if (single_major)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7390) unregister_blkdev(rbd_major, RBD_DRV_NAME);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7391) err_out_wq:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7392) destroy_workqueue(rbd_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7393) err_out_slab:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7394) rbd_slab_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7395) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7398) static void __exit rbd_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7399) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7400) ida_destroy(&rbd_dev_id_ida);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7401) rbd_sysfs_cleanup();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7402) if (single_major)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7403) unregister_blkdev(rbd_major, RBD_DRV_NAME);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7404) destroy_workqueue(rbd_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7405) rbd_slab_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7408) module_init(rbd_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7409) module_exit(rbd_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7411) MODULE_AUTHOR("Alex Elder <elder@inktank.com>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7412) MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7413) MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7414) /* following authorship retained from original osdblk.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7415) MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7417) MODULE_DESCRIPTION("RADOS Block Device (RBD) driver");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7418) MODULE_LICENSE("GPL");