public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed
From: Hannes Reinecke <hare@suse.de>
To: Laurence Oberman <loberman@redhat.com>,
	Ilya Dryomov <idryomov@gmail.com>
Cc: Sage Weil <sage@redhat.com>, Jens Axboe <axboe@kernel.dk>,
	ceph-devel@vger.kernel.org, linux-block@vger.kernel.org
Subject: Re: [PATCH] rbd: lock object request list
Date: Thu, 30 Jan 2020 16:39:36 +0100	[thread overview]
Message-ID: <b786e9dd-02c1-e117-db92-aa3f50804bc7@suse.de> (raw)
In-Reply-To: <2fc165f5ad9ea0ec8a0878eabe800ca0af3e10b8.camel@redhat.com>

On 1/30/20 3:26 PM, Laurence Oberman wrote:
> On Thu, 2020-01-30 at 12:42 +0100, Hannes Reinecke wrote:
>> The object request list can be accessed from various contexts
>> so we need to lock it to avoid concurrent modifications and
>> random crashes.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.de>
>> ---
>>  drivers/block/rbd.c | 31 ++++++++++++++++++++++++-------
>>  1 file changed, 24 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
>> index 5710b2a8609c..ddc170661607 100644
>> --- a/drivers/block/rbd.c
>> +++ b/drivers/block/rbd.c
>> @@ -344,6 +344,7 @@ struct rbd_img_request {
>>  
>>  	struct list_head	lock_item;
>>  	struct list_head	object_extents;	/* obj_req.ex structs */
>> +	struct mutex		object_mutex;
>>  
>>  	struct mutex		state_mutex;
>>  	struct pending_result	pending;
>> @@ -1664,6 +1665,7 @@ static struct rbd_img_request
>> *rbd_img_request_create(
>>  	INIT_LIST_HEAD(&img_request->lock_item);
>>  	INIT_LIST_HEAD(&img_request->object_extents);
>>  	mutex_init(&img_request->state_mutex);
>> +	mutex_init(&img_request->object_mutex);
>>  	kref_init(&img_request->kref);
>>  
>>  	return img_request;
>> @@ -1680,8 +1682,10 @@ static void rbd_img_request_destroy(struct
>> kref *kref)
>>  	dout("%s: img %p\n", __func__, img_request);
>>  
>>  	WARN_ON(!list_empty(&img_request->lock_item));
>> +	mutex_lock(&img_request->object_mutex);
>>  	for_each_obj_request_safe(img_request, obj_request,
>> next_obj_request)
>>  		rbd_img_obj_request_del(img_request, obj_request);
>> +	mutex_unlock(&img_request->object_mutex);
>>  
>>  	if (img_request_layered_test(img_request)) {
>>  		img_request_layered_clear(img_request);
>> @@ -2486,6 +2490,7 @@ static int __rbd_img_fill_request(struct
>> rbd_img_request *img_req)
>>  	struct rbd_obj_request *obj_req, *next_obj_req;
>>  	int ret;
>>  
>> +	mutex_lock(&img_req->object_mutex);
>>  	for_each_obj_request_safe(img_req, obj_req, next_obj_req) {
>>  		switch (img_req->op_type) {
>>  		case OBJ_OP_READ:
>> @@ -2510,7 +2515,7 @@ static int __rbd_img_fill_request(struct
>> rbd_img_request *img_req)
>>  			continue;
>>  		}
>>  	}
>> -
>> +	mutex_unlock(&img_req->object_mutex);
>>  	img_req->state = RBD_IMG_START;
>>  	return 0;
>>  }
>> @@ -2569,6 +2574,7 @@ static int rbd_img_fill_request_nocopy(struct
>> rbd_img_request *img_req,
>>  	 * position in the provided bio (list) or bio_vec array.
>>  	 */
>>  	fctx->iter = *fctx->pos;
>> +	mutex_lock(&img_req->object_mutex);
>>  	for (i = 0; i < num_img_extents; i++) {
>>  		ret = ceph_file_to_extents(&img_req->rbd_dev->layout,
>>  					   img_extents[i].fe_off,
>> @@ -2576,10 +2582,12 @@ static int rbd_img_fill_request_nocopy(struct
>> rbd_img_request *img_req,
>>  					   &img_req->object_extents,
>>  					   alloc_object_extent,
>> img_req,
>>  					   fctx->set_pos_fn, &fctx-
>>> iter);
>> -		if (ret)
>> +		if (ret) {
>> +			mutex_unlock(&img_req->object_mutex);
>>  			return ret;
>> +		}
>>  	}
>> -
>> +	mutex_unlock(&img_req->object_mutex);
>>  	return __rbd_img_fill_request(img_req);
>>  }
>>  
>> @@ -2620,6 +2628,7 @@ static int rbd_img_fill_request(struct
>> rbd_img_request *img_req,
>>  	 * or bio_vec array because when mapped, those bio_vecs can
>> straddle
>>  	 * stripe unit boundaries.
>>  	 */
>> +	mutex_lock(&img_req->object_mutex);
>>  	fctx->iter = *fctx->pos;
>>  	for (i = 0; i < num_img_extents; i++) {
>>  		ret = ceph_file_to_extents(&rbd_dev->layout,
>> @@ -2629,15 +2638,17 @@ static int rbd_img_fill_request(struct
>> rbd_img_request *img_req,
>>  					   alloc_object_extent,
>> img_req,
>>  					   fctx->count_fn, &fctx-
>>> iter);
>>  		if (ret)
>> -			return ret;
>> +			goto out_unlock;
>>  	}
>>  
>>  	for_each_obj_request(img_req, obj_req) {
>>  		obj_req->bvec_pos.bvecs = kmalloc_array(obj_req-
>>> bvec_count,
>>  					      sizeof(*obj_req-
>>> bvec_pos.bvecs),
>>  					      GFP_NOIO);
>> -		if (!obj_req->bvec_pos.bvecs)
>> -			return -ENOMEM;
>> +		if (!obj_req->bvec_pos.bvecs) {
>> +			ret = -ENOMEM;
>> +			goto out_unlock;
>> +		}
>>  	}
>>  
>>  	/*
>> @@ -2652,10 +2663,14 @@ static int rbd_img_fill_request(struct
>> rbd_img_request *img_req,
>>  					   &img_req->object_extents,
>>  					   fctx->copy_fn, &fctx->iter);
>>  		if (ret)
>> -			return ret;
>> +			goto out_unlock;
>>  	}
>> +	mutex_unlock(&img_req->object_mutex);
>>  
>>  	return __rbd_img_fill_request(img_req);
>> +out_unlock:
>> +	mutex_unlock(&img_req->object_mutex);
>> +	return ret;
>>  }
>>  
>>  static int rbd_img_fill_nodata(struct rbd_img_request *img_req,
>> @@ -3552,6 +3567,7 @@ static void rbd_img_object_requests(struct
>> rbd_img_request *img_req)
>>  
>>  	rbd_assert(!img_req->pending.result && !img_req-
>>> pending.num_pending);
>>  
>> +	mutex_lock(&img_req->object_mutex);
>>  	for_each_obj_request(img_req, obj_req) {
>>  		int result = 0;
>>  
>> @@ -3564,6 +3580,7 @@ static void rbd_img_object_requests(struct
>> rbd_img_request *img_req)
>>  			img_req->pending.num_pending++;
>>  		}
>>  	}
>> +	mutex_unlock(&img_req->object_mutex);
>>  }
>>  
>>  static bool rbd_img_advance(struct rbd_img_request *img_req, int
>> *result)
> 
> Looks good to me. Just wonder how we escaped this for so long.
> 
> Reviewed-by: Laurence Oberman <loberman@redhat.com>
> 
The whole state machine is utterly fragile.
I'll be posting a patchset to clean stuff up somewhat,
but it's still a beast.
I'm rather surprised that it doesn't break more often ...

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		      Teamlead Storage & Networking
hare@suse.de			                  +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer

  reply	other threads:[~2020-01-30 15:39 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-30 11:42 [PATCH] rbd: lock object request list Hannes Reinecke
2020-01-30 14:26 ` Laurence Oberman
2020-01-30 15:39   ` Hannes Reinecke [this message]
2020-01-31  9:50     ` Ilya Dryomov
2020-01-31 18:55       ` Laurence Oberman
2020-01-30 15:09 ` Ilya Dryomov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b786e9dd-02c1-e117-db92-aa3f50804bc7@suse.de \
    --to=hare@suse.de \
    --cc=axboe@kernel.dk \
    --cc=ceph-devel@vger.kernel.org \
    --cc=idryomov@gmail.com \
    --cc=linux-block@vger.kernel.org \
    --cc=loberman@redhat.com \
    --cc=sage@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox