Re: [PATCH v4 04/10] xen/blkfront: split per device io_lock

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Bob Liu <bob.liu@oracle.com>
Cc: jonathan.davies@citrix.com, felipe.franciosi@citrix.com,
	rafal.mielniczuk@citrix.com, linux-kernel@vger.kernel.org,
	xen-devel@lists.xen.org, axboe@fb.com, david.vrabel@citrix.com,
	avanzini.arianna@gmail.com, roger.pau@citrix.com
Subject: Re: [PATCH v4 04/10] xen/blkfront: split per device io_lock
Date: Tue, 3 Nov 2015 15:09:02 -0500	[thread overview]
Message-ID: <20151103200902.GF28527@char.us.oracle.com> (raw)
In-Reply-To: <1446438106-20171-5-git-send-email-bob.liu@oracle.com>

On Mon, Nov 02, 2015 at 12:21:40PM +0800, Bob Liu wrote:
> The per device io_lock became a coarser grained lock after multi-queues/rings
> was introduced, this patch introduced a fine-grained ring_lock for each ring.

s/was introduced/was introduced (see commit titled XYZ)/

s/introdued/introduces/
> 
> The old io_lock was renamed to dev_lock and only protect the ->grants list

s/was/is/
s/protect/protects/

> which is shared by all rings.
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> ---
>  drivers/block/xen-blkfront.c | 57 ++++++++++++++++++++++++++------------------
>  1 file changed, 34 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index eab78e7..8cc5995 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -121,6 +121,7 @@ MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the
>   */
>  struct blkfront_ring_info {
>  	struct blkif_front_ring ring;

Can you add a comment explaining the lock semantic? As in under what conditions
should it be taken? Like you have it below.

> +	spinlock_t ring_lock;
>  	unsigned int ring_ref[XENBUS_MAX_RING_PAGES];
>  	unsigned int evtchn, irq;
>  	struct work_struct work;
> @@ -138,7 +139,8 @@ struct blkfront_ring_info {
>   */
>  struct blkfront_info
>  {
> -	spinlock_t io_lock;
> +	/* Lock to proect info->grants list shared by multi rings */

s/proect/protect/

Missing full stop.

> +	spinlock_t dev_lock;

Shouldn't it be right next to what it is protecting?

That is right below (or above): 'struct list_head grants;'?

>  	struct mutex mutex;
>  	struct xenbus_device *xbdev;
>  	struct gendisk *gd;
> @@ -224,6 +226,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
>  	struct grant *gnt_list_entry, *n;
>  	int i = 0;
>  
> +	spin_lock_irq(&info->dev_lock);

Why there? Why not where you add it to the list?
>  	while(i < num) {
>  		gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
>  		if (!gnt_list_entry)
> @@ -242,6 +245,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
>  		list_add(&gnt_list_entry->node, &info->grants);

Right here that is?

You are holding the lock for the duration of 'kzalloc' and 'alloc_page'.

And more interestingly, GFP_NOIO translates to __GFP_WAIT which means
it can call 'schedule'. - And you have taken an spinlock. That should
have thrown lots of warnings?

>  		i++;
>  	}
> +	spin_unlock_irq(&info->dev_lock);
>  
>  	return 0;
>  
> @@ -254,6 +258,7 @@ out_of_memory:
>  		kfree(gnt_list_entry);
>  		i--;
>  	}
> +	spin_unlock_irq(&info->dev_lock);

Just do it around the 'list_del' operation. You are using an
'safe'
>  	BUG_ON(i != 0);
>  	return -ENOMEM;
>  }
> @@ -265,6 +270,7 @@ static struct grant *get_grant(grant_ref_t *gref_head,
>  	struct grant *gnt_list_entry;
>  	unsigned long buffer_gfn;
>  
> +	spin_lock(&info->dev_lock);
>  	BUG_ON(list_empty(&info->grants));
>  	gnt_list_entry = list_first_entry(&info->grants, struct grant,
>  	                                  node);
> @@ -272,8 +278,10 @@ static struct grant *get_grant(grant_ref_t *gref_head,
>  
>  	if (gnt_list_entry->gref != GRANT_INVALID_REF) {
>  		info->persistent_gnts_c--;
> +		spin_unlock(&info->dev_lock);
>  		return gnt_list_entry;
>  	}
> +	spin_unlock(&info->dev_lock);

Just have one spin_unlock. Put it right before the 'if (gnt_list_entry->gref)..'.
>  
>  	/* Assign a gref to this page */
>  	gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
> @@ -639,7 +647,7 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)hctx->driver_data;
>  
>  	blk_mq_start_request(qd->rq);
> -	spin_lock_irq(&info->io_lock);
> +	spin_lock_irq(&rinfo->ring_lock);
>  	if (RING_FULL(&rinfo->ring))
>  		goto out_busy;
>  
> @@ -650,15 +658,15 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
>  		goto out_busy;
>  
>  	flush_requests(rinfo);
> -	spin_unlock_irq(&info->io_lock);
> +	spin_unlock_irq(&rinfo->ring_lock);
>  	return BLK_MQ_RQ_QUEUE_OK;
>  
>  out_err:
> -	spin_unlock_irq(&info->io_lock);
> +	spin_unlock_irq(&rinfo->ring_lock);
>  	return BLK_MQ_RQ_QUEUE_ERROR;
>  
>  out_busy:
> -	spin_unlock_irq(&info->io_lock);
> +	spin_unlock_irq(&rinfo->ring_lock);
>  	blk_mq_stop_hw_queue(hctx);
>  	return BLK_MQ_RQ_QUEUE_BUSY;
>  }
> @@ -959,21 +967,22 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
>  	info->gd = NULL;
>  }
>  
> -/* Must be called with io_lock holded */
>  static void kick_pending_request_queues(struct blkfront_ring_info *rinfo)
>  {
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&rinfo->ring_lock, flags);
>  	if (!RING_FULL(&rinfo->ring))
>  		blk_mq_start_stopped_hw_queues(rinfo->dev_info->rq, true);
> +	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
>  }
>  
>  static void blkif_restart_queue(struct work_struct *work)
>  {
>  	struct blkfront_ring_info *rinfo = container_of(work, struct blkfront_ring_info, work);
>  
> -	spin_lock_irq(&rinfo->dev_info->io_lock);
>  	if (rinfo->dev_info->connected == BLKIF_STATE_CONNECTED)
>  		kick_pending_request_queues(rinfo);
> -	spin_unlock_irq(&rinfo->dev_info->io_lock);
>  }
>  
>  static void blkif_free_ring(struct blkfront_ring_info *rinfo)
> @@ -1065,7 +1074,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>  	int i;
>  
>  	/* Prevent new requests being issued until we fix things up. */
> -	spin_lock_irq(&info->io_lock);
> +	spin_lock_irq(&info->dev_lock);

I would move that lock to be right before:


if (!list_empty(&info->grants)).


>  	info->connected = suspend ?
>  		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
>  	/* No more blkif_request(). */
> @@ -1091,7 +1100,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>  
>  	for (i = 0; i < info->nr_rings; i++)
>  		blkif_free_ring(&info->rinfo[i]);
> -	spin_unlock_irq(&info->io_lock);
> +	spin_unlock_irq(&info->dev_lock);

Please move them before this loop. The blkif_free_ring is not touching
the 'grants' list.

>  }
>  
>  static void blkif_completion(struct blk_shadow *s, struct blkfront_ring_info *rinfo,
> @@ -1103,6 +1112,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_ring_info *ri
>  	void *shared_data;
>  	int nseg;
>  	struct blkfront_info *info = rinfo->dev_info;
> +	unsigned long flags;
>  
>  	nseg = s->req.operation == BLKIF_OP_INDIRECT ?
>  		s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
> @@ -1120,6 +1130,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_ring_info *ri
>  			kunmap_atomic(shared_data);
>  		}
>  	}
> +


???
Stray?

>  	/* Add the persistent grant into the list of free grants */
>  	for (i = 0; i < nseg; i++) {
>  		if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
> @@ -1132,8 +1143,10 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_ring_info *ri
>  			if (!info->feature_persistent)
>  				pr_alert_ratelimited("backed has not unmapped grant: %u\n",
>  						     s->grants_used[i]->gref);
> +			spin_lock_irqsave(&info->dev_lock, flags);
>  			list_add(&s->grants_used[i]->node, &info->grants);
>  			info->persistent_gnts_c++;
> +			spin_unlock_irqrestore(&info->dev_lock, flags);

Are we also protecting the persistent_gnts_c value? If so that should
be documented in the 'struct blkfront_info'

>  		} else {
>  			/*
>  			 * If the grant is not mapped by the backend we end the
> @@ -1143,7 +1156,9 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_ring_info *ri
>  			 */
>  			gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
>  			s->grants_used[i]->gref = GRANT_INVALID_REF;
> +			spin_lock_irqsave(&info->dev_lock, flags);
>  			list_add_tail(&s->grants_used[i]->node, &info->grants);
> +			spin_unlock_irqrestore(&info->dev_lock, flags);
>  		}
>  	}
>  	if (s->req.operation == BLKIF_OP_INDIRECT) {
> @@ -1152,8 +1167,10 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_ring_info *ri
>  				if (!info->feature_persistent)
>  					pr_alert_ratelimited("backed has not unmapped grant: %u\n",
>  							     s->indirect_grants[i]->gref);
> +				spin_lock_irqsave(&info->dev_lock, flags);
>  				list_add(&s->indirect_grants[i]->node, &info->grants);
>  				info->persistent_gnts_c++;
> +				spin_unlock_irqrestore(&info->dev_lock, flags);
>  			} else {
>  				struct page *indirect_page;
>  
> @@ -1162,12 +1179,14 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_ring_info *ri
>  				 * Add the used indirect page back to the list of
>  				 * available pages for indirect grefs.
>  				 */
> +				spin_lock_irqsave(&info->dev_lock, flags);
>  				if (!info->feature_persistent) {
>  					indirect_page = pfn_to_page(s->indirect_grants[i]->pfn);
>  					list_add(&indirect_page->lru, &rinfo->indirect_pages);
>  				}
>  				s->indirect_grants[i]->gref = GRANT_INVALID_REF;
>  				list_add_tail(&s->indirect_grants[i]->node, &info->grants);
> +				spin_unlock_irqrestore(&info->dev_lock, flags);
>  			}
>  		}
>  	}
> @@ -1183,13 +1202,10 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>  	struct blkfront_info *info = rinfo->dev_info;
>  	int error;
>  
> -	spin_lock_irqsave(&info->io_lock, flags);
> -
> -	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
> -		spin_unlock_irqrestore(&info->io_lock, flags);
> +	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
>  		return IRQ_HANDLED;
> -	}
>  
> +	spin_lock_irqsave(&rinfo->ring_lock, flags);
>   again:
>  	rp = rinfo->ring.sring->rsp_prod;
>  	rmb(); /* Ensure we see queued responses up to 'rp'. */
> @@ -1280,10 +1296,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>  	} else
>  		rinfo->ring.sring->rsp_event = i + 1;
>  
> +	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
>  	kick_pending_request_queues(rinfo);

We drop the lock and then take it again in 'kick_pending_request_queues'

Would it be better if we have an argument 'locked' which would
bypass the taking of the lock once more?

>  
> -	spin_unlock_irqrestore(&info->io_lock, flags);
> -
>  	return IRQ_HANDLED;
>  }
>  
> @@ -1529,10 +1544,11 @@ static int blkfront_probe(struct xenbus_device *dev,
>  		INIT_LIST_HEAD(&rinfo->indirect_pages);
>  		rinfo->dev_info = info;
>  		INIT_WORK(&rinfo->work, blkif_restart_queue);
> +		spin_lock_init(&rinfo->ring_lock);
>  	}
>  
>  	mutex_init(&info->mutex);
> -	spin_lock_init(&info->io_lock);
> +	spin_lock_init(&info->dev_lock);
>  	info->xbdev = dev;
>  	info->vdevice = vdevice;
>  	INIT_LIST_HEAD(&info->grants);
> @@ -1629,8 +1645,6 @@ static int blkif_recover(struct blkfront_info *info)
>  	}
>  	xenbus_switch_state(info->xbdev, XenbusStateConnected);
>  
> -	spin_lock_irq(&info->io_lock);
> -
>  	/* Now safe for us to use the shared ring */
>  	info->connected = BLKIF_STATE_CONNECTED;
>  
> @@ -1648,7 +1662,6 @@ static int blkif_recover(struct blkfront_info *info)
>  		BUG_ON(req->nr_phys_segments > segs);
>  		blk_mq_requeue_request(req);
>  	}
> -	spin_unlock_irq(&info->io_lock);
>  	blk_mq_kick_requeue_list(info->rq);
>  
>  	while ((bio = bio_list_pop(&bio_list)) != NULL) {
> @@ -1994,11 +2007,9 @@ static void blkfront_connect(struct blkfront_info *info)
>  	xenbus_switch_state(info->xbdev, XenbusStateConnected);
>  
>  	/* Kick pending requests. */
> -	spin_lock_irq(&info->io_lock);
>  	info->connected = BLKIF_STATE_CONNECTED;
>  	for (i = 0; i < info->nr_rings; i++)
>  		kick_pending_request_queues(&info->rinfo[i]);
> -	spin_unlock_irq(&info->io_lock);
>  
>  	add_disk(info->gd);
>  
> -- 
> 1.8.3.1
>

next prev parent reply	other threads:[~2015-11-03 20:09 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-02  4:21 [PATCH v4 00/10] xen-block: multi hardware-queues/rings support Bob Liu
2015-11-02  4:21 ` [PATCH v4 01/10] xen/blkif: document blkif multi-queue/ring extension Bob Liu
2015-11-03 18:01   ` Konrad Rzeszutek Wilk
2015-11-02  4:21 ` Bob Liu
2015-11-02  4:21 ` [PATCH v4 02/10] xen/blkfront: separate per ring information out of device info Bob Liu
2015-11-02  4:49   ` kbuild test robot
2015-11-02  4:49   ` kbuild test robot
2015-11-02  5:33     ` Bob Liu
2015-11-02  5:33     ` Bob Liu
2015-11-03 19:09   ` Konrad Rzeszutek Wilk
2015-11-02  4:21 ` Bob Liu
2015-11-02  4:21 ` [PATCH v4 03/10] xen/blkfront: pseudo support for multi hardware queues/rings Bob Liu
2015-11-02  4:21 ` Bob Liu
2015-11-03 19:44   ` Konrad Rzeszutek Wilk
2015-11-04  1:01     ` Bob Liu
2015-11-04  2:03       ` Konrad Rzeszutek Wilk
2015-11-04  1:01     ` Bob Liu
2015-11-02  4:21 ` [PATCH v4 04/10] xen/blkfront: split per device io_lock Bob Liu
2015-11-02  4:21   ` Bob Liu
2015-11-03 20:09   ` Konrad Rzeszutek Wilk [this message]
2015-11-04  1:07     ` Bob Liu
2015-11-04  1:51       ` Konrad Rzeszutek Wilk
2015-11-04  1:51       ` Konrad Rzeszutek Wilk
2015-11-04  1:07     ` Bob Liu
2015-11-02  4:21 ` [PATCH v4 05/10] xen/blkfront: negotiate number of queues/rings to be used with backend Bob Liu
2015-11-02  4:21 ` Bob Liu
2015-11-03 20:40   ` Konrad Rzeszutek Wilk
2015-11-04  1:11     ` Bob Liu
2015-11-04  1:11     ` Bob Liu
2015-11-04  1:53       ` Konrad Rzeszutek Wilk
2015-11-04  1:53       ` Konrad Rzeszutek Wilk
2015-11-02  4:21 ` [PATCH v4 06/10] xen/blkback: separate ring information out of struct xen_blkif Bob Liu
2015-11-02  4:21   ` Bob Liu
2015-11-03 21:37   ` Konrad Rzeszutek Wilk
2015-11-02  4:21 ` [PATCH v4 07/10] xen/blkback: pseudo support for multi hardware queues/rings Bob Liu
2015-11-02  4:21   ` Bob Liu
2015-11-05  2:30   ` Konrad Rzeszutek Wilk
2015-11-05  3:02     ` Bob Liu
2015-11-05  3:24       ` Konrad Rzeszutek Wilk
2015-11-05  3:24       ` Konrad Rzeszutek Wilk
2015-11-05  3:02     ` Bob Liu
2015-11-05  2:30   ` Konrad Rzeszutek Wilk
2015-11-02  4:21 ` [PATCH v4 08/10] xen/blkback: get the number of hardware queues/rings from blkfront Bob Liu
2015-11-02  4:21 ` Bob Liu
2015-11-05  2:37   ` Konrad Rzeszutek Wilk
2015-11-05  2:37   ` Konrad Rzeszutek Wilk
2015-11-02  4:21 ` [PATCH v4 09/10] xen/blkfront: make persistent grants per-queue Bob Liu
2015-11-02  4:21 ` Bob Liu
2015-11-05  2:39   ` Konrad Rzeszutek Wilk
2015-11-05  2:39   ` Konrad Rzeszutek Wilk
2015-11-02  4:21 ` [PATCH v4 10/10] xen/blkback: make pool of persistent grants and free pages per-queue Bob Liu
2015-11-02  4:21 ` Bob Liu
2015-11-05  2:43   ` Konrad Rzeszutek Wilk
2015-11-05  2:43   ` Konrad Rzeszutek Wilk
2015-11-05  2:46     ` Bob Liu
2015-11-05  2:46     ` Bob Liu
2015-11-05 19:50       ` Konrad Rzeszutek Wilk
2015-11-05 19:50       ` Konrad Rzeszutek Wilk
2015-11-02 11:19 ` [PATCH v4 00/10] xen-block: multi hardware-queues/rings support Julien Grall
2015-11-02 11:19 ` [Xen-devel] " Julien Grall

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151103200902.GF28527@char.us.oracle.com \
    --to=konrad.wilk@oracle.com \
    --cc=avanzini.arianna@gmail.com \
    --cc=axboe@fb.com \
    --cc=bob.liu@oracle.com \
    --cc=david.vrabel@citrix.com \
    --cc=felipe.franciosi@citrix.com \
    --cc=jonathan.davies@citrix.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rafal.mielniczuk@citrix.com \
    --cc=roger.pau@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.