* [PATCH] rbd: prevent kernel stack blow up on rbd map
@ 2015-10-11 18:05 Ilya Dryomov
2015-10-15 2:03 ` Josh Durgin
0 siblings, 1 reply; 2+ messages in thread
From: Ilya Dryomov @ 2015-10-11 18:05 UTC (permalink / raw)
To: ceph-devel; +Cc: Xudong Cao
Mapping an image with a long parent chain (e.g. image foo, whose parent
is bar, whose parent is baz, etc) currently leads to a kernel stack
overflow, due to the following recursion in the reply path:
rbd_osd_req_callback()
rbd_obj_request_complete()
rbd_img_obj_callback()
rbd_img_parent_read_callback()
rbd_obj_request_complete()
...
Limit the parent chain to 8 images, which is ~3K worth of stack. It's
probably a good thing to do regardless - performance with more than
a few images long parent chain is likely to be pretty bad.
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
drivers/block/rbd.c | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index ccbc3cbbf24e..07f666f4ca18 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -96,6 +96,8 @@ static int atomic_dec_return_safe(atomic_t *v)
#define RBD_MINORS_PER_MAJOR 256
#define RBD_SINGLE_MAJOR_PART_SHIFT 4
+#define RBD_MAX_PARENT_CHAIN_LEN 8
+
#define RBD_SNAP_DEV_NAME_PREFIX "snap_"
#define RBD_MAX_SNAP_NAME_LEN \
(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
@@ -426,7 +428,7 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
size_t count);
static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
size_t count);
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth);
static void rbd_spec_put(struct rbd_spec *spec);
static int rbd_dev_id_to_minor(int dev_id)
@@ -5131,7 +5133,12 @@ out_err:
return ret;
}
-static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
+/*
+ * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() ->
+ * rbd_dev_image_probe() recursion depth, which means it's also the
+ * length of the already discovered part of the parent chain.
+ */
+static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
{
struct rbd_device *parent = NULL;
int ret;
@@ -5139,6 +5146,12 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
if (!rbd_dev->parent_spec)
return 0;
+ if (++depth > RBD_MAX_PARENT_CHAIN_LEN) {
+ pr_info("parent chain is too long (%d)\n", depth);
+ ret = -EINVAL;
+ goto out_err;
+ }
+
parent = rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec,
NULL);
if (!parent) {
@@ -5153,7 +5166,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
__rbd_get_client(rbd_dev->rbd_client);
rbd_spec_get(rbd_dev->parent_spec);
- ret = rbd_dev_image_probe(parent, false);
+ ret = rbd_dev_image_probe(parent, depth);
if (ret < 0)
goto out_err;
@@ -5282,7 +5295,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
* parent), initiate a watch on its header object before using that
* object to get detailed information about the rbd image.
*/
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
{
int ret;
@@ -5300,7 +5313,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
if (ret)
goto err_out_format;
- if (mapping) {
+ if (!depth) {
ret = rbd_dev_header_watch_sync(rbd_dev);
if (ret) {
if (ret == -ENOENT)
@@ -5321,7 +5334,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
* Otherwise this is a parent image, identified by pool, image
* and snap ids - need to fill in names for those ids.
*/
- if (mapping)
+ if (!depth)
ret = rbd_spec_fill_snap_id(rbd_dev);
else
ret = rbd_spec_fill_names(rbd_dev);
@@ -5343,12 +5356,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
* Need to warn users if this image is the one being
* mapped and has a parent.
*/
- if (mapping && rbd_dev->parent_spec)
+ if (!depth && rbd_dev->parent_spec)
rbd_warn(rbd_dev,
"WARNING: kernel layering is EXPERIMENTAL!");
}
- ret = rbd_dev_probe_parent(rbd_dev);
+ ret = rbd_dev_probe_parent(rbd_dev, depth);
if (ret)
goto err_out_probe;
@@ -5359,7 +5372,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
err_out_probe:
rbd_dev_unprobe(rbd_dev);
err_out_watch:
- if (mapping)
+ if (!depth)
rbd_dev_header_unwatch_sync(rbd_dev);
out_header_name:
kfree(rbd_dev->header_name);
@@ -5422,7 +5435,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
spec = NULL; /* rbd_dev now owns this */
rbd_opts = NULL; /* rbd_dev now owns this */
- rc = rbd_dev_image_probe(rbd_dev, true);
+ rc = rbd_dev_image_probe(rbd_dev, 0);
if (rc < 0)
goto err_out_rbd_dev;
--
2.4.3
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [PATCH] rbd: prevent kernel stack blow up on rbd map
2015-10-11 18:05 [PATCH] rbd: prevent kernel stack blow up on rbd map Ilya Dryomov
@ 2015-10-15 2:03 ` Josh Durgin
0 siblings, 0 replies; 2+ messages in thread
From: Josh Durgin @ 2015-10-15 2:03 UTC (permalink / raw)
To: Ilya Dryomov, ceph-devel; +Cc: Xudong Cao
On 10/11/2015 11:05 AM, Ilya Dryomov wrote:
> Mapping an image with a long parent chain (e.g. image foo, whose parent
> is bar, whose parent is baz, etc) currently leads to a kernel stack
> overflow, due to the following recursion in the reply path:
>
> rbd_osd_req_callback()
> rbd_obj_request_complete()
> rbd_img_obj_callback()
> rbd_img_parent_read_callback()
> rbd_obj_request_complete()
> ...
>
> Limit the parent chain to 8 images, which is ~3K worth of stack. It's
> probably a good thing to do regardless - performance with more than
> a few images long parent chain is likely to be pretty bad.
Long term I don't think a limit is necessary. With object map, there's
no real performance penalty for long chains. Once the recursion is
removed, we can get rid of the limit.
This is a simple backportable fix for now though, and it looks good to
me.
Reviewed-by: Josh Durgin <jdurgin@redhat.com>
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
> ---
> drivers/block/rbd.c | 33 +++++++++++++++++++++++----------
> 1 file changed, 23 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index ccbc3cbbf24e..07f666f4ca18 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -96,6 +96,8 @@ static int atomic_dec_return_safe(atomic_t *v)
> #define RBD_MINORS_PER_MAJOR 256
> #define RBD_SINGLE_MAJOR_PART_SHIFT 4
>
> +#define RBD_MAX_PARENT_CHAIN_LEN 8
> +
> #define RBD_SNAP_DEV_NAME_PREFIX "snap_"
> #define RBD_MAX_SNAP_NAME_LEN \
> (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
> @@ -426,7 +428,7 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
> size_t count);
> static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
> size_t count);
> -static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
> +static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth);
> static void rbd_spec_put(struct rbd_spec *spec);
>
> static int rbd_dev_id_to_minor(int dev_id)
> @@ -5131,7 +5133,12 @@ out_err:
> return ret;
> }
>
> -static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
> +/*
> + * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() ->
> + * rbd_dev_image_probe() recursion depth, which means it's also the
> + * length of the already discovered part of the parent chain.
> + */
> +static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
> {
> struct rbd_device *parent = NULL;
> int ret;
> @@ -5139,6 +5146,12 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
> if (!rbd_dev->parent_spec)
> return 0;
>
> + if (++depth > RBD_MAX_PARENT_CHAIN_LEN) {
> + pr_info("parent chain is too long (%d)\n", depth);
> + ret = -EINVAL;
> + goto out_err;
> + }
> +
> parent = rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec,
> NULL);
> if (!parent) {
> @@ -5153,7 +5166,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
> __rbd_get_client(rbd_dev->rbd_client);
> rbd_spec_get(rbd_dev->parent_spec);
>
> - ret = rbd_dev_image_probe(parent, false);
> + ret = rbd_dev_image_probe(parent, depth);
> if (ret < 0)
> goto out_err;
>
> @@ -5282,7 +5295,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
> * parent), initiate a watch on its header object before using that
> * object to get detailed information about the rbd image.
> */
> -static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
> +static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
> {
> int ret;
>
> @@ -5300,7 +5313,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
> if (ret)
> goto err_out_format;
>
> - if (mapping) {
> + if (!depth) {
> ret = rbd_dev_header_watch_sync(rbd_dev);
> if (ret) {
> if (ret == -ENOENT)
> @@ -5321,7 +5334,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
> * Otherwise this is a parent image, identified by pool, image
> * and snap ids - need to fill in names for those ids.
> */
> - if (mapping)
> + if (!depth)
> ret = rbd_spec_fill_snap_id(rbd_dev);
> else
> ret = rbd_spec_fill_names(rbd_dev);
> @@ -5343,12 +5356,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
> * Need to warn users if this image is the one being
> * mapped and has a parent.
> */
> - if (mapping && rbd_dev->parent_spec)
> + if (!depth && rbd_dev->parent_spec)
> rbd_warn(rbd_dev,
> "WARNING: kernel layering is EXPERIMENTAL!");
> }
>
> - ret = rbd_dev_probe_parent(rbd_dev);
> + ret = rbd_dev_probe_parent(rbd_dev, depth);
> if (ret)
> goto err_out_probe;
>
> @@ -5359,7 +5372,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
> err_out_probe:
> rbd_dev_unprobe(rbd_dev);
> err_out_watch:
> - if (mapping)
> + if (!depth)
> rbd_dev_header_unwatch_sync(rbd_dev);
> out_header_name:
> kfree(rbd_dev->header_name);
> @@ -5422,7 +5435,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
> spec = NULL; /* rbd_dev now owns this */
> rbd_opts = NULL; /* rbd_dev now owns this */
>
> - rc = rbd_dev_image_probe(rbd_dev, true);
> + rc = rbd_dev_image_probe(rbd_dev, 0);
> if (rc < 0)
> goto err_out_rbd_dev;
>
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2015-10-15 2:02 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-10-11 18:05 [PATCH] rbd: prevent kernel stack blow up on rbd map Ilya Dryomov
2015-10-15 2:03 ` Josh Durgin
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.