* [PATCH 1/4] rbd: look up snapshot name in names buffer
2013-04-30 12:41 [PATCH 0/4] rbd: get rid of the snapshot list Alex Elder
@ 2013-04-30 12:42 ` Alex Elder
2013-04-30 12:42 ` [PATCH 2/4] rbd: use snap_id not index to look up snap info Alex Elder
` (3 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Alex Elder @ 2013-04-30 12:42 UTC (permalink / raw)
To: ceph-devel
Rather than scanning the list of snapshot structures for it, scan
the snapshot context buffer containing snapshot names in order to
determine for a format 1 image the name associated with a given
snapshot id.
Pull out the part of rbd_dev_v1_snap_info() that does this scan into
a new function, _rbd_dev_v1_snap_name(). Have that function return
a dynamically-allocated copy of the name, and don't duplicate it in
rbd_dev_v1_snap_info().
Signed-off-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 40 ++++++++++++++++++++++++++++++----------
1 file changed, 30 insertions(+), 10 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 15e84d8..98033e7 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -66,6 +66,8 @@
#define RBD_SNAP_HEAD_NAME "-"
+#define BAD_SNAP_INDEX U32_MAX /* invalid index into snap array */
+
/* This allows a single page to hold an image name sent by OSD */
#define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1)
#define RBD_IMAGE_ID_LEN_MAX 64
@@ -809,6 +811,33 @@ out_err:
return -ENOMEM;
}
+static const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev,
u32 which)
+{
+ const char *snap_name;
+
+ rbd_assert(which < rbd_dev->header.snapc->num_snaps);
+
+ /* Skip over names until we find the one we are looking for */
+
+ snap_name = rbd_dev->header.snap_names;
+ while (which--)
+ snap_name += strlen(snap_name) + 1;
+
+ return kstrdup(snap_name, GFP_KERNEL);
+}
+
+static u32 rbd_dev_snap_index(struct rbd_device *rbd_dev, u64 snap_id)
+{
+ struct ceph_snap_context *snapc = rbd_dev->header.snapc;
+ u32 which;
+
+ for (which = 0; which < snapc->num_snaps; which++)
+ if (snapc->snaps[which] == snap_id)
+ return which;
+
+ return BAD_SNAP_INDEX;
+}
+
static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id)
{
struct rbd_snap *snap;
@@ -3421,17 +3450,8 @@ static const char *rbd_dev_v1_snap_info(struct
rbd_device *rbd_dev, u32 which,
u64 *snap_size, u64 *snap_features)
{
const char *snap_name;
- int i;
-
- rbd_assert(which < rbd_dev->header.snapc->num_snaps);
-
- /* Skip over names until we find the one we are looking for */
- snap_name = rbd_dev->header.snap_names;
- for (i = 0; i < which; i++)
- snap_name += strlen(snap_name) + 1;
-
- snap_name = kstrdup(snap_name, GFP_KERNEL);
+ snap_name = _rbd_dev_v1_snap_name(rbd_dev, which);
if (!snap_name)
return ERR_PTR(-ENOMEM);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 2/4] rbd: use snap_id not index to look up snap info
2013-04-30 12:41 [PATCH 0/4] rbd: get rid of the snapshot list Alex Elder
2013-04-30 12:42 ` [PATCH 1/4] rbd: look up snapshot name in names buffer Alex Elder
@ 2013-04-30 12:42 ` Alex Elder
2013-04-30 12:42 ` [PATCH 3/4] rbd: define rbd_snap_size() and rbd_snap_features() Alex Elder
` (2 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Alex Elder @ 2013-04-30 12:42 UTC (permalink / raw)
To: ceph-devel
In order to align with what was needed for format 1 rbd images,
rbd_dev_v2_snap_info() was set up to take as argument an index into
the array of snapshot ids in a rbd device's snapshot context.
This switches that around, so we pass the snapshot id instead.
In doing this, rbd_snap_name() now returns a dynamically-allocated
string rather than a fixed one, so there's no need to make a
duplicate in its caller, rbd_dev_spec_update().
This means the following functions take a snapshot id where they
previously used an index value:
rbd_dev_snap_info()
rbd_dev_v1_snap_info()
rbd_dev_v2_snap_info()
A new function, rbd_dev_snap_index(), determines the snap index for
format 1 images and uses it to look up the name.
Signed-off-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 68
++++++++++++++++++++++++++++-----------------------
1 file changed, 37 insertions(+), 31 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 98033e7..b1e1d12 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -433,6 +433,8 @@ static void rbd_dev_remove_parent(struct rbd_device
*rbd_dev);
static int rbd_dev_refresh(struct rbd_device *rbd_dev);
static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev);
+static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+ u64 snap_id);
static int rbd_open(struct block_device *bdev, fmode_t mode)
{
@@ -838,18 +840,27 @@ static u32 rbd_dev_snap_index(struct rbd_device
*rbd_dev, u64 snap_id)
return BAD_SNAP_INDEX;
}
-static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id)
+static const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u64
snap_id)
{
- struct rbd_snap *snap;
+ u32 which;
+ which = rbd_dev_snap_index(rbd_dev, snap_id);
+ if (which == BAD_SNAP_INDEX)
+ return NULL;
+
+ return _rbd_dev_v1_snap_name(rbd_dev, which);
+}
+
+static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id)
+{
if (snap_id == CEPH_NOSNAP)
return RBD_SNAP_HEAD_NAME;
- list_for_each_entry(snap, &rbd_dev->snaps, node)
- if (snap_id == snap->id)
- return snap->name;
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ if (rbd_dev->image_format == 1)
+ return rbd_dev_v1_snap_name(rbd_dev, snap_id);
- return NULL;
+ return rbd_dev_v2_snap_name(rbd_dev, snap_id);
}
static struct rbd_snap *snap_by_name(struct rbd_device *rbd_dev,
@@ -3446,11 +3457,15 @@ static struct rbd_snap *rbd_snap_create(struct
rbd_device *rbd_dev,
* Returns a dynamically-allocated snapshot name if successful, or a
* pointer-coded error otherwise.
*/
-static const char *rbd_dev_v1_snap_info(struct rbd_device *rbd_dev, u32
which,
- u64 *snap_size, u64 *snap_features)
+static const char *rbd_dev_v1_snap_info(struct rbd_device *rbd_dev,
+ u64 snap_id, u64 *snap_size, u64 *snap_features)
{
const char *snap_name;
+ u32 which;
+ which = rbd_dev_snap_index(rbd_dev, snap_id);
+ if (which == BAD_SNAP_INDEX)
+ return ERR_PTR(-ENOENT);
snap_name = _rbd_dev_v1_snap_name(rbd_dev, which);
if (!snap_name)
return ERR_PTR(-ENOMEM);
@@ -3816,12 +3831,6 @@ static int rbd_dev_spec_update(struct rbd_device
*rbd_dev)
snap_name = rbd_snap_name(rbd_dev, spec->snap_id);
if (!snap_name) {
- rbd_warn(rbd_dev, "no snapshot with id %llu", spec->snap_id);
- ret = -EIO;
- goto out_err;
- }
- snap_name = kstrdup(snap_name, GFP_KERNEL);
- if (!snap_name) {
ret = -ENOMEM;
goto out_err;
}
@@ -3909,11 +3918,12 @@ out:
return ret;
}
-static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32
which)
+static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+ u64 snap_id)
{
size_t size;
void *reply_buf;
- __le64 snap_id;
+ __le64 snapid;
int ret;
void *p;
void *end;
@@ -3924,11 +3934,10 @@ static const char *rbd_dev_v2_snap_name(struct
rbd_device *rbd_dev, u32 which)
if (!reply_buf)
return ERR_PTR(-ENOMEM);
- rbd_assert(which < rbd_dev->header.snapc->num_snaps);
- snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]);
+ snapid = cpu_to_le64(snap_id);
ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
"rbd", "get_snapshot_name",
- &snap_id, sizeof (snap_id),
+ &snapid, sizeof (snapid),
reply_buf, size);
dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
if (ret < 0) {
@@ -3943,24 +3952,21 @@ static const char *rbd_dev_v2_snap_name(struct
rbd_device *rbd_dev, u32 which)
goto out;
dout(" snap_id 0x%016llx snap_name = %s\n",
- (unsigned long long)le64_to_cpu(snap_id), snap_name);
+ (unsigned long long)snap_id, snap_name);
out:
kfree(reply_buf);
return snap_name;
}
-static const char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32
which,
- u64 *snap_size, u64 *snap_features)
+static const char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev,
+ u64 snap_id, u64 *snap_size, u64 *snap_features)
{
- u64 snap_id;
u64 size;
u64 features;
const char *snap_name;
int ret;
- rbd_assert(which < rbd_dev->header.snapc->num_snaps);
- snap_id = rbd_dev->header.snapc->snaps[which];
ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, NULL, &size);
if (ret)
goto out_err;
@@ -3969,7 +3975,7 @@ static const char *rbd_dev_v2_snap_info(struct
rbd_device *rbd_dev, u32 which,
if (ret)
goto out_err;
- snap_name = rbd_dev_v2_snap_name(rbd_dev, which);
+ snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id);
if (!IS_ERR(snap_name)) {
*snap_size = size;
*snap_features = features;
@@ -3980,14 +3986,14 @@ out_err:
return ERR_PTR(ret);
}
-static const char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which,
- u64 *snap_size, u64 *snap_features)
+static const char *rbd_dev_snap_info(struct rbd_device *rbd_dev,
+ u64 snap_id, u64 *snap_size, u64 *snap_features)
{
if (rbd_dev->image_format == 1)
- return rbd_dev_v1_snap_info(rbd_dev, which,
+ return rbd_dev_v1_snap_info(rbd_dev, snap_id,
snap_size, snap_features);
if (rbd_dev->image_format == 2)
- return rbd_dev_v2_snap_info(rbd_dev, which,
+ return rbd_dev_v2_snap_info(rbd_dev, snap_id,
snap_size, snap_features);
return ERR_PTR(-EINVAL);
}
@@ -4085,7 +4091,7 @@ static int rbd_dev_snaps_update(struct rbd_device
*rbd_dev)
continue;
}
- snap_name = rbd_dev_snap_info(rbd_dev, index,
+ snap_name = rbd_dev_snap_info(rbd_dev, snap_id,
&snap_size, &snap_features);
if (IS_ERR(snap_name)) {
ret = PTR_ERR(snap_name);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 3/4] rbd: define rbd_snap_size() and rbd_snap_features()
2013-04-30 12:41 [PATCH 0/4] rbd: get rid of the snapshot list Alex Elder
2013-04-30 12:42 ` [PATCH 1/4] rbd: look up snapshot name in names buffer Alex Elder
2013-04-30 12:42 ` [PATCH 2/4] rbd: use snap_id not index to look up snap info Alex Elder
@ 2013-04-30 12:42 ` Alex Elder
2013-04-30 12:43 ` [PATCH 4/4] rbd: kill off the snapshot list Alex Elder
2013-05-01 0:57 ` [PATCH 0/4] rbd: get rid of " Josh Durgin
4 siblings, 0 replies; 9+ messages in thread
From: Alex Elder @ 2013-04-30 12:42 UTC (permalink / raw)
To: ceph-devel
This patch defines a handful of new functions that will allow
us to get rid of the rbd device structure's list of snapshots.
Define rbd_snap_id_by_name() to look up a snapshot id given its
name. This is efficient for format 1 images but not for format 2.
Fortunately it only gets called at mapping time so it's not that
critical.
Use rbd_snap_id_by_name() to find out the id for a snapshot getting
mapped, and pass that id to new functions rbd_snap_size() and
rbd_snap_features() to look up information about a given snapshot's
size and feature mask given its snapshot id. All this gets done
in rbd_dev_mapping_set().
As a result, snap_by_name() is no longer needed, so get rid of it.
Signed-off-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 152
+++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 129 insertions(+), 23 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b1e1d12..67fd866d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -435,6 +435,11 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev);
static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev);
static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
u64 snap_id);
+static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+ u8 *order, u64 *snap_size);
+static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64
snap_id,
+ u64 *snap_features);
+static u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char
*name);
static int rbd_open(struct block_device *bdev, fmode_t mode)
{
@@ -840,7 +845,8 @@ static u32 rbd_dev_snap_index(struct rbd_device
*rbd_dev, u64 snap_id)
return BAD_SNAP_INDEX;
}
-static const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u64
snap_id)
+static const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev,
+ u64 snap_id)
{
u32 which;
@@ -863,35 +869,85 @@ static const char *rbd_snap_name(struct rbd_device
*rbd_dev, u64 snap_id)
return rbd_dev_v2_snap_name(rbd_dev, snap_id);
}
-static struct rbd_snap *snap_by_name(struct rbd_device *rbd_dev,
- const char *snap_name)
+static int rbd_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+ u64 *snap_size)
{
- struct rbd_snap *snap;
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ if (snap_id == CEPH_NOSNAP) {
+ *snap_size = rbd_dev->header.image_size;
+ } else if (rbd_dev->image_format == 1) {
+ u32 which;
- list_for_each_entry(snap, &rbd_dev->snaps, node)
- if (!strcmp(snap_name, snap->name))
- return snap;
+ which = rbd_dev_snap_index(rbd_dev, snap_id);
+ if (which == BAD_SNAP_INDEX)
+ return -ENOENT;
- return NULL;
+ *snap_size = rbd_dev->header.snap_sizes[which];
+ } else {
+ u64 size = 0;
+ int ret;
+
+ ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, NULL, &size);
+ if (ret)
+ return ret;
+
+ *snap_size = size;
+ }
+ return 0;
}
-static int rbd_dev_mapping_set(struct rbd_device *rbd_dev)
+static int rbd_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
+ u64 *snap_features)
{
- if (!memcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME,
- sizeof (RBD_SNAP_HEAD_NAME))) {
- rbd_dev->mapping.size = rbd_dev->header.image_size;
- rbd_dev->mapping.features = rbd_dev->header.features;
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ if (snap_id == CEPH_NOSNAP) {
+ *snap_features = rbd_dev->header.features;
+ } else if (rbd_dev->image_format == 1) {
+ *snap_features = 0; /* No features for format 1 */
} else {
- struct rbd_snap *snap;
+ u64 features = 0;
+ int ret;
+
+ ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, &features);
+ if (ret)
+ return ret;
+
+ *snap_features = features;
+ }
+ return 0;
+}
- snap = snap_by_name(rbd_dev, rbd_dev->spec->snap_name);
- if (!snap)
+static int rbd_dev_mapping_set(struct rbd_device *rbd_dev)
+{
+ const char *snap_name = rbd_dev->spec->snap_name;
+ u64 snap_id;
+ u64 size = 0;
+ u64 features = 0;
+ int ret;
+
+ if (strcmp(snap_name, RBD_SNAP_HEAD_NAME)) {
+ snap_id = rbd_snap_id_by_name(rbd_dev, snap_name);
+ if (snap_id == CEPH_NOSNAP)
return -ENOENT;
- rbd_dev->mapping.size = snap->size;
- rbd_dev->mapping.features = snap->features;
- rbd_dev->mapping.read_only = true;
+ } else {
+ snap_id = CEPH_NOSNAP;
}
+ ret = rbd_snap_size(rbd_dev, snap_id, &size);
+ if (ret)
+ return ret;
+ ret = rbd_snap_features(rbd_dev, snap_id, &features);
+ if (ret)
+ return ret;
+
+ rbd_dev->mapping.size = size;
+ rbd_dev->mapping.features = features;
+
+ /* If we are mapping a snapshot it must be marked read-only */
+
+ if (snap_id != CEPH_NOSNAP)
+ rbd_dev->mapping.read_only = true;
+
return 0;
}
@@ -3766,6 +3822,56 @@ out:
return image_name;
}
+static u64 rbd_v1_snap_id_by_name(struct rbd_device *rbd_dev, const
char *name)
+{
+ struct ceph_snap_context *snapc = rbd_dev->header.snapc;
+ const char *snap_name;
+ u32 which = 0;
+
+ /* Skip over names until we find the one we are looking for */
+
+ snap_name = rbd_dev->header.snap_names;
+ while (which < snapc->num_snaps) {
+ if (!strcmp(name, snap_name))
+ return snapc->snaps[which];
+ snap_name += strlen(snap_name) + 1;
+ which++;
+ }
+ return CEPH_NOSNAP;
+}
+
+static u64 rbd_v2_snap_id_by_name(struct rbd_device *rbd_dev, const
char *name)
+{
+ struct ceph_snap_context *snapc = rbd_dev->header.snapc;
+ u32 which;
+ bool found = false;
+ u64 snap_id;
+
+ for (which = 0; !found && which < snapc->num_snaps; which++) {
+ const char *snap_name;
+
+ snap_id = snapc->snaps[which];
+ snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id);
+ if (IS_ERR(snap_name))
+ break;
+ found = !strcmp(name, snap_name);
+ kfree(snap_name);
+ }
+ return found ? snap_id : CEPH_NOSNAP;
+}
+
+/*
+ * Assumes name is never RBD_SNAP_HEAD_NAME; returns CEPH_NOSNAP if
+ * no snapshot by that name is found, or if an error occurs.
+ */
+static u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char
*name)
+{
+ if (rbd_dev->image_format == 1)
+ return rbd_v1_snap_id_by_name(rbd_dev, name);
+
+ return rbd_v2_snap_id_by_name(rbd_dev, name);
+}
+
/*
* When an rbd image has a parent image, it is identified by the
* pool, image, and snapshot ids (not names). This function fills
@@ -3797,12 +3903,12 @@ static int rbd_dev_spec_update(struct rbd_device
*rbd_dev)
*/
if (spec->pool_name) {
if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) {
- struct rbd_snap *snap;
+ u64 snap_id;
- snap = snap_by_name(rbd_dev, spec->snap_name);
- if (!snap)
+ snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name);
+ if (snap_id == CEPH_NOSNAP)
return -ENOENT;
- spec->snap_id = snap->id;
+ spec->snap_id = snap_id;
} else {
spec->snap_id = CEPH_NOSNAP;
}
--
1.7.9.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 4/4] rbd: kill off the snapshot list
2013-04-30 12:41 [PATCH 0/4] rbd: get rid of the snapshot list Alex Elder
` (2 preceding siblings ...)
2013-04-30 12:42 ` [PATCH 3/4] rbd: define rbd_snap_size() and rbd_snap_features() Alex Elder
@ 2013-04-30 12:43 ` Alex Elder
2013-05-01 0:57 ` [PATCH 0/4] rbd: get rid of " Josh Durgin
4 siblings, 0 replies; 9+ messages in thread
From: Alex Elder @ 2013-04-30 12:43 UTC (permalink / raw)
To: ceph-devel
We no longer use the snapshot list for anything. When we need to
look up a snapshot name, id, size, or feature mask, we just do it
directly rather than relying on this list being updated with every
refresh. The main reason it existed was for the benefit of the
device/sysfs entries that previously were associated with snapshots.
So get rid of the snapshot list, and struct rbd_snap, and the
hundreds of lines of code that supported them.
This resolves:
http://tracker.ceph.com/issues/4868
Signed-off-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 257
+--------------------------------------------------
1 file changed, 1 insertion(+), 256 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 67fd866d..c52246a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -274,14 +274,6 @@ struct rbd_img_request {
#define for_each_obj_request_safe(ireq, oreq, n) \
list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links)
-struct rbd_snap {
- const char *name;
- u64 size;
- struct list_head node;
- u64 id;
- u64 features;
-};
-
struct rbd_mapping {
u64 size;
u64 features;
@@ -326,9 +318,6 @@ struct rbd_device {
struct list_head node;
- /* list of snapshots */
- struct list_head snaps;
-
/* sysfs related */
struct device dev;
unsigned long open_count; /* protected by lock */
@@ -356,10 +345,7 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
static int rbd_img_request_submit(struct rbd_img_request *img_request);
-static int rbd_dev_snaps_update(struct rbd_device *rbd_dev);
-
static void rbd_dev_device_release(struct device *dev);
-static void rbd_snap_destroy(struct rbd_snap *snap);
static ssize_t rbd_add(struct bus_type *bus, const char *buf,
size_t count);
@@ -3075,17 +3061,6 @@ static int rbd_read_header(struct rbd_device
*rbd_dev,
return ret;
}
-static void rbd_remove_all_snaps(struct rbd_device *rbd_dev)
-{
- struct rbd_snap *snap;
- struct rbd_snap *next;
-
- list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node) {
- list_del(&snap->node);
- rbd_snap_destroy(snap);
- }
-}
-
static void rbd_update_mapping_size(struct rbd_device *rbd_dev)
{
if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
@@ -3134,8 +3109,6 @@ static int rbd_dev_v1_refresh(struct rbd_device
*rbd_dev)
rbd_warn(rbd_dev, "object prefix changed (ignoring)");
kfree(h.object_prefix);
- ret = rbd_dev_snaps_update(rbd_dev);
-
up_write(&rbd_dev->header_rwsem);
return ret;
@@ -3461,7 +3434,6 @@ static struct rbd_device *rbd_dev_create(struct
rbd_client *rbdc,
spin_lock_init(&rbd_dev->lock);
rbd_dev->flags = 0;
INIT_LIST_HEAD(&rbd_dev->node);
- INIT_LIST_HEAD(&rbd_dev->snaps);
init_rwsem(&rbd_dev->header_rwsem);
rbd_dev->spec = spec;
@@ -3484,54 +3456,6 @@ static void rbd_dev_destroy(struct rbd_device
*rbd_dev)
kfree(rbd_dev);
}
-static void rbd_snap_destroy(struct rbd_snap *snap)
-{
- kfree(snap->name);
- kfree(snap);
-}
-
-static struct rbd_snap *rbd_snap_create(struct rbd_device *rbd_dev,
- const char *snap_name,
- u64 snap_id, u64 snap_size,
- u64 snap_features)
-{
- struct rbd_snap *snap;
-
- snap = kzalloc(sizeof (*snap), GFP_KERNEL);
- if (!snap)
- return ERR_PTR(-ENOMEM);
-
- snap->name = snap_name;
- snap->id = snap_id;
- snap->size = snap_size;
- snap->features = snap_features;
-
- return snap;
-}
-
-/*
- * Returns a dynamically-allocated snapshot name if successful, or a
- * pointer-coded error otherwise.
- */
-static const char *rbd_dev_v1_snap_info(struct rbd_device *rbd_dev,
- u64 snap_id, u64 *snap_size, u64 *snap_features)
-{
- const char *snap_name;
- u32 which;
-
- which = rbd_dev_snap_index(rbd_dev, snap_id);
- if (which == BAD_SNAP_INDEX)
- return ERR_PTR(-ENOENT);
- snap_name = _rbd_dev_v1_snap_name(rbd_dev, which);
- if (!snap_name)
- return ERR_PTR(-ENOMEM);
-
- *snap_size = rbd_dev->header.snap_sizes[which];
- *snap_features = 0; /* No features for v1 */
-
- return snap_name;
-}
-
/*
* Get the size and object order for an image snapshot, or if
* snap_id is CEPH_NOSNAP, gets this information for the base
@@ -3883,10 +3807,6 @@ static u64 rbd_snap_id_by_name(struct rbd_device
*rbd_dev, const char *name)
* When an image being mapped (not a parent) is probed, we have the
* pool name and pool id, image name and image id, and the snapshot
* name. The only thing we're missing is the snapshot id.
- *
- * The set of snapshots for an image is not known until they have
- * been read by rbd_dev_snaps_update(), so we can't completely fill
- * in this information until after that has been called.
*/
static int rbd_dev_spec_update(struct rbd_device *rbd_dev)
{
@@ -4065,45 +3985,6 @@ out:
return snap_name;
}
-static const char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev,
- u64 snap_id, u64 *snap_size, u64 *snap_features)
-{
- u64 size;
- u64 features;
- const char *snap_name;
- int ret;
-
- ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, NULL, &size);
- if (ret)
- goto out_err;
-
- ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, &features);
- if (ret)
- goto out_err;
-
- snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id);
- if (!IS_ERR(snap_name)) {
- *snap_size = size;
- *snap_features = features;
- }
-
- return snap_name;
-out_err:
- return ERR_PTR(ret);
-}
-
-static const char *rbd_dev_snap_info(struct rbd_device *rbd_dev,
- u64 snap_id, u64 *snap_size, u64 *snap_features)
-{
- if (rbd_dev->image_format == 1)
- return rbd_dev_v1_snap_info(rbd_dev, snap_id,
- snap_size, snap_features);
- if (rbd_dev->image_format == 2)
- return rbd_dev_v2_snap_info(rbd_dev, snap_id,
- snap_size, snap_features);
- return ERR_PTR(-EINVAL);
-}
-
static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev)
{
int ret;
@@ -4119,141 +4000,12 @@ static int rbd_dev_v2_refresh(struct rbd_device
*rbd_dev)
dout("rbd_dev_v2_snap_context returned %d\n", ret);
if (ret)
goto out;
- ret = rbd_dev_snaps_update(rbd_dev);
- dout("rbd_dev_snaps_update returned %d\n", ret);
- if (ret)
- goto out;
out:
up_write(&rbd_dev->header_rwsem);
return ret;
}
-/*
- * Scan the rbd device's current snapshot list and compare it to the
- * newly-received snapshot context. Remove any existing snapshots
- * not present in the new snapshot context. Add a new snapshot for
- * any snaphots in the snapshot context not in the current list.
- * And verify there are no changes to snapshots we already know
- * about.
- *
- * Assumes the snapshots in the snapshot context are sorted by
- * snapshot id, highest id first. (Snapshots in the rbd_dev's list
- * are also maintained in that order.)
- *
- * Note that any error occurs while updating the snapshot list
- * aborts the update, and the entire list is cleared. The snapshot
- * list becomes inconsistent at that point anyway, so it might as
- * well be empty.
- */
-static int rbd_dev_snaps_update(struct rbd_device *rbd_dev)
-{
- struct ceph_snap_context *snapc = rbd_dev->header.snapc;
- const u32 snap_count = snapc->num_snaps;
- struct list_head *head = &rbd_dev->snaps;
- struct list_head *links = head->next;
- u32 index = 0;
- int ret = 0;
-
- dout("%s: snap count is %u\n", __func__, (unsigned int)snap_count);
- while (index < snap_count || links != head) {
- u64 snap_id;
- struct rbd_snap *snap;
- const char *snap_name;
- u64 snap_size = 0;
- u64 snap_features = 0;
-
- snap_id = index < snap_count ? snapc->snaps[index]
- : CEPH_NOSNAP;
- snap = links != head ? list_entry(links, struct rbd_snap, node)
- : NULL;
- rbd_assert(!snap || snap->id != CEPH_NOSNAP);
-
- if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) {
- struct list_head *next = links->next;
-
- /*
- * A previously-existing snapshot is not in
- * the new snap context.
- *
- * If the now-missing snapshot is the one
- * the image represents, clear its existence
- * flag so we can avoid sending any more
- * requests to it.
- */
- if (rbd_dev->spec->snap_id == snap->id)
- clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
- dout("removing %ssnap id %llu\n",
- rbd_dev->spec->snap_id == snap->id ?
- "mapped " : "",
- (unsigned long long)snap->id);
-
- list_del(&snap->node);
- rbd_snap_destroy(snap);
-
- /* Done with this list entry; advance */
-
- links = next;
- continue;
- }
-
- snap_name = rbd_dev_snap_info(rbd_dev, snap_id,
- &snap_size, &snap_features);
- if (IS_ERR(snap_name)) {
- ret = PTR_ERR(snap_name);
- dout("failed to get snap info, error %d\n", ret);
- goto out_err;
- }
-
- dout("entry %u: snap_id = %llu\n", (unsigned int)snap_count,
- (unsigned long long)snap_id);
- if (!snap || (snap_id != CEPH_NOSNAP && snap->id < snap_id)) {
- struct rbd_snap *new_snap;
-
- /* We haven't seen this snapshot before */
-
- new_snap = rbd_snap_create(rbd_dev, snap_name,
- snap_id, snap_size, snap_features);
- if (IS_ERR(new_snap)) {
- ret = PTR_ERR(new_snap);
- dout(" failed to add dev, error %d\n", ret);
- goto out_err;
- }
-
- /* New goes before existing, or at end of list */
-
- dout(" added dev%s\n", snap ? "" : " at end\n");
- if (snap)
- list_add_tail(&new_snap->node, &snap->node);
- else
- list_add_tail(&new_snap->node, head);
- } else {
- /* Already have this one */
-
- dout(" already present\n");
-
- rbd_assert(snap->size == snap_size);
- rbd_assert(!strcmp(snap->name, snap_name));
- rbd_assert(snap->features == snap_features);
-
- /* Done with this list entry; advance */
-
- links = links->next;
- }
-
- /* Advance to the next entry in the snapshot context */
-
- index++;
- }
- dout("%s: done\n", __func__);
-
- return 0;
-out_err:
- rbd_remove_all_snaps(rbd_dev);
-
- return ret;
-}
-
static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
{
struct device *dev;
@@ -4914,7 +4666,6 @@ static void rbd_dev_image_release(struct
rbd_device *rbd_dev)
int ret;
rbd_dev_remove_parent(rbd_dev);
- rbd_remove_all_snaps(rbd_dev);
rbd_dev_unprobe(rbd_dev);
ret = rbd_dev_header_watch_sync(rbd_dev, 0);
if (ret)
@@ -4964,20 +4715,14 @@ static int rbd_dev_image_probe(struct rbd_device
*rbd_dev)
if (ret)
goto err_out_watch;
- ret = rbd_dev_snaps_update(rbd_dev);
- if (ret)
- goto err_out_probe;
-
ret = rbd_dev_spec_update(rbd_dev);
if (ret)
- goto err_out_snaps;
+ goto err_out_probe;
ret = rbd_dev_probe_parent(rbd_dev);
if (!ret)
return 0;
-err_out_snaps:
- rbd_remove_all_snaps(rbd_dev);
err_out_probe:
rbd_dev_unprobe(rbd_dev);
err_out_watch:
--
1.7.9.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH 0/4] rbd: get rid of the snapshot list
2013-04-30 12:41 [PATCH 0/4] rbd: get rid of the snapshot list Alex Elder
` (3 preceding siblings ...)
2013-04-30 12:43 ` [PATCH 4/4] rbd: kill off the snapshot list Alex Elder
@ 2013-05-01 0:57 ` Josh Durgin
2013-05-01 1:12 ` Alex Elder
4 siblings, 1 reply; 9+ messages in thread
From: Josh Durgin @ 2013-05-01 0:57 UTC (permalink / raw)
To: Alex Elder; +Cc: ceph-devel
On 04/30/2013 05:41 AM, Alex Elder wrote:
> An rbd device structure maintains a list of snapshot
> structures whose purpose is to cache the name, size,
> and features associated with a snapshot id. The main
> reason it was needed was related to the presence of
> Linux device information for snapshots, which we
> no longer have. We can look up the name, etc. "on
> the fly" about as easily as we can using the list,
> and getting rid of this list means we can eliminate
> a substantial bit of code.
>
> The final patch in this series gets rid of the snapshot
> list and the rbd_snap structure. The first three put
> in place replacement functionality that doesn't require
> the list.
>
> -Alex
>
> [PATCH 1/4] rbd: look up snapshot name in names buffer
> [PATCH 2/4] rbd: use snap_id not index to look up snap info
> [PATCH 3/4] rbd: define rbd_snap_size() and rbd_snap_features()
> [PATCH 4/4] rbd: kill off the snapshot list
These patches look good, but bring up two things that should be
addressed in later patches.
1) inefficient snapshot id lookup for format 2 images
We're sending a single request for each snapshot id in the snapshot
context. There could potentially be a very large number of snapshots
there, hundreds or even thousands, which would slow down mapping
a snapshot quite a bit. Aggregating this into a request with multiple
ops so we get, perhaps, up to 500 snapshot names at a time would
make this a lot faster.
2) mapped snapshots never clear the EXISTS flag
Now that we're not keeping a snapshot list, we don't check whether
the snapshot we mapped continues to exist. Since we know the snapshot
id, we just need to see if it's still present in the updated snapshot
context.
These aren't critical though, so this patch series looks good as is:
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH 0/4] rbd: get rid of the snapshot list
2013-05-01 0:57 ` [PATCH 0/4] rbd: get rid of " Josh Durgin
@ 2013-05-01 1:12 ` Alex Elder
2013-05-01 1:16 ` Alex Elder
2013-05-01 1:32 ` Josh Durgin
0 siblings, 2 replies; 9+ messages in thread
From: Alex Elder @ 2013-05-01 1:12 UTC (permalink / raw)
To: Josh Durgin; +Cc: ceph-devel
On 04/30/2013 07:57 PM, Josh Durgin wrote:
> On 04/30/2013 05:41 AM, Alex Elder wrote:
>> An rbd device structure maintains a list of snapshot
>> structures whose purpose is to cache the name, size,
>> and features associated with a snapshot id. The main
>> reason it was needed was related to the presence of
>> Linux device information for snapshots, which we
>> no longer have. We can look up the name, etc. "on
>> the fly" about as easily as we can using the list,
>> and getting rid of this list means we can eliminate
>> a substantial bit of code.
>>
>> The final patch in this series gets rid of the snapshot
>> list and the rbd_snap structure. The first three put
>> in place replacement functionality that doesn't require
>> the list.
>>
>> -Alex
>>
>> [PATCH 1/4] rbd: look up snapshot name in names buffer
>> [PATCH 2/4] rbd: use snap_id not index to look up snap info
>> [PATCH 3/4] rbd: define rbd_snap_size() and rbd_snap_features()
>> [PATCH 4/4] rbd: kill off the snapshot list
>
> These patches look good, but bring up two things that should be
> addressed in later patches.
>
> 1) inefficient snapshot id lookup for format 2 images
>
> We're sending a single request for each snapshot id in the snapshot
> context. There could potentially be a very large number of snapshots
> there, hundreds or even thousands, which would slow down mapping
> a snapshot quite a bit. Aggregating this into a request with multiple
> ops so we get, perhaps, up to 500 snapshot names at a time would
> make this a lot faster.
That's a good idea. And although the messenger is prepared
to handle that, the osd client and rbd still need some work
to really be able to handle generalized many-op requests.
I can see it on the horizon, but right now we still
really assume at most 2 ops, and only one of them (the
first) is a read. Format 1 of course does the whole thing
in one shot.
The second thing I was going to say is that this change
actually makes it *better* than it was. Previously
we would fetch all this stuff for every snapshot on
every refresh. Now (for format 2 anyway) we get only
the snapshot context, and only when we're mapping do
we look up more detailed snapshot info.
On that, the only thing inefficient is looking up the
snapshot id given its name. It is so inefficient it
might be worth offering an op that does that for us,
because as it is we have to do a sequential search
to find out a snapshot's name. (Maybe it's not that
important to track the name for parent snapshots but
users might be interested.)
> 2) mapped snapshots never clear the EXISTS flag
>
> Now that we're not keeping a snapshot list, we don't check whether
> the snapshot we mapped continues to exist. Since we know the snapshot
> id, we just need to see if it's still present in the updated snapshot
> context.
That should be pretty easy to fix. I forgot about that.
I just lopped out that whole function and I guess I
missed that.
> These aren't critical though, so this patch series looks good as is:
> Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/4] rbd: get rid of the snapshot list
2013-05-01 1:12 ` Alex Elder
@ 2013-05-01 1:16 ` Alex Elder
2013-05-01 1:32 ` Josh Durgin
1 sibling, 0 replies; 9+ messages in thread
From: Alex Elder @ 2013-05-01 1:16 UTC (permalink / raw)
To: Josh Durgin; +Cc: ceph-devel
On 04/30/2013 08:12 PM, Alex Elder wrote:
> On that, the only thing inefficient is looking up the
> snapshot id given its name. It is so inefficient it
> might be worth offering an op that does that for us,
> because as it is we have to do a sequential search
> to find out a snapshot's name. (Maybe it's not that
> important to track the name for parent snapshots but
> users might be interested.)
Looking at this, I was confused as I wrote it...
For a v2 snapshot, we iterate across snapshot ids in
the context, and fetch the name for each. Only
when we find the name that matches the one sought
do we know which id it's associated with.
Anyway, once we know the id it's a direct query
for all the other info.
-Alex
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/4] rbd: get rid of the snapshot list
2013-05-01 1:12 ` Alex Elder
2013-05-01 1:16 ` Alex Elder
@ 2013-05-01 1:32 ` Josh Durgin
1 sibling, 0 replies; 9+ messages in thread
From: Josh Durgin @ 2013-05-01 1:32 UTC (permalink / raw)
To: Alex Elder; +Cc: ceph-devel
On 04/30/2013 06:12 PM, Alex Elder wrote:
> On that, the only thing inefficient is looking up the
> snapshot id given its name. It is so inefficient it
> might be worth offering an op that does that for us,
> because as it is we have to do a sequential search
> to find out a snapshot's name. (Maybe it's not that
> important to track the name for parent snapshots but
> users might be interested.)
That would be more efficient, but I don't think it's worth going that
far. The client would need a way to work with current osds too, and
it's not that much data - it's mainly the extra latency from fetching
it that would be the problem. Iterating over 1000 snapshot names in
memory is much faster, and only done when mapping for the kernel.
Userspace fetches all the snapshot metadata in one operation, and
indexes it by name in memory (which is useful since all the librbd
calls relating to snapshots deal with names).
^ permalink raw reply [flat|nested] 9+ messages in thread