From mboxrd@z Thu Jan  1 00:00:00 1970
From: Ming Lei <tom.leiming@gmail.com>
Subject: Re: virtio-scsi: two questions related with picking up queue
Date: Thu, 8 May 2014 00:24:37 +0800
Message-ID: <20140508002437.0dd549e8@tom-ThinkPad-T410>
References: <CACVXFVNeBbJgA6h5n1Mt7vp7_Es67N5KEpZ0en4O+9GnD6wp+A@mail.gmail.com>
	<5368E0DB.5010000@redhat.com>
Mime-Version: 1.0
Content-Type: text/plain; charset=US-ASCII
Content-Transfer-Encoding: 7bit
Return-path: <linux-scsi-owner@vger.kernel.org>
Received: from mail-pa0-f42.google.com ([209.85.220.42]:57402 "EHLO
	mail-pa0-f42.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1753512AbaEGQYy (ORCPT
	<rfc822;linux-scsi@vger.kernel.org>); Wed, 7 May 2014 12:24:54 -0400
Received: by mail-pa0-f42.google.com with SMTP id rd3so1402971pab.29
        for <linux-scsi@vger.kernel.org>; Wed, 07 May 2014 09:24:53 -0700 (PDT)
In-Reply-To: <5368E0DB.5010000@redhat.com>
Sender: linux-scsi-owner@vger.kernel.org
List-Id: linux-scsi@vger.kernel.org
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: Linux SCSI List <linux-scsi@vger.kernel.org>, Wanlong Gao <gaowanlong@cn.fujitsu.com>, "James E.J. Bottomley" <JBottomley@parallels.com>, Rusty Russell <rusty@rustcorp.com.au>

On Tue, 06 May 2014 15:17:15 +0200
Paolo Bonzini <pbonzini@redhat.com> wrote:

> Il 06/05/2014 11:26, Ming Lei ha scritto:
> > Hi Paolo and All,
> >
> > One question is about ACCESS_ONCE() in virtscsi_pick_vq(),
> > looks it needn't since both reading and writing tgt->req_vq holds
> > tgt->tgt_lock.
> 
> You're right.  It should be possible to avoid the lock in 
> virtscsi_pick_vq like this:
> 
> 	value = atomic_read(&tgt->reqs);
> retry:
> 	if (value != 0) {
> 		old_value = atomic_cmpxchg(&tgt->regs, value, value + 1)
> 		if (old_value != value) {
> 			value = old_value;
> 			goto retry;
> 		}
> 
> 		smp_mb__after_atomic_cmpxchg(); // you get the idea :)
> 		vq = ACCESS_ONCE(tgt->req_vq);
> 	} else {
> 	        spin_lock_irqsave(&tgt->tgt_lock, flags);
> 
> 		// tgt->reqs may not be 0 anymore, need to recheck
> 		value = atomic_read(&tgt->reqs);
> 		if (atomic_read(&tgt->reqs) != 0) {
> 		        spin_unlock_irqrestore(&tgt->tgt_lock, flags);
> 			goto retry;
> 		}
> 
> 		// tgt->reqs now will remain fixed to 0.
> 		...
> 		tgt->req_vq = vq = ...;
> 		smp_wmb();
> 		atomic_set(&tgt->reqs, 1);
> 	        spin_unlock_irqrestore(&tgt->tgt_lock, flags);
> 	}
> 
> 	return vq;
> 

Another approach I thought of is to use percpu spinlock, and
the idea is simple:

	- all perpcu locks are held for writing req_vq, and
	- only percpu lock is needed for reading req_vq.

What do you think about the below patch?

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 697fa53..00deab4 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -82,7 +82,7 @@ struct virtio_scsi_vq {
  */
 struct virtio_scsi_target_state {
 	/* This spinlock never held at the same time as vq_lock. */
-	spinlock_t tgt_lock;
+	spinlock_t __percpu *lock;
 
 	/* Count of outstanding requests. */
 	atomic_t reqs;
@@ -517,21 +517,46 @@ static struct virtio_scsi_vq *virtscsi_pick_vq(struct virtio_scsi *vscsi,
 {
 	struct virtio_scsi_vq *vq;
 	unsigned long flags;
-	u32 queue_num;
+	u32 cpu = get_cpu();
+	spinlock_t	*lock = per_cpu_ptr(tgt->lock, cpu);
 
-	spin_lock_irqsave(&tgt->tgt_lock, flags);
+	spin_lock_irqsave(lock, flags);
 
 	if (atomic_inc_return(&tgt->reqs) > 1)
 		vq = tgt->req_vq;
 	else {
-		queue_num = smp_processor_id();
+		u32 queue_num = cpu;
+		int i;
+
 		while (unlikely(queue_num >= vscsi->num_queues))
 			queue_num -= vscsi->num_queues;
 
-		tgt->req_vq = vq = &vscsi->req_vqs[queue_num];
+		/*
+		 * there should be only one writing because of atomic
+		 * counter, so we don't worry about deadlock, but
+		 * might need to teach lockdep to not complain it
+		 */
+		for_each_possible_cpu(i) {
+			spinlock_t *other = per_cpu_ptr(tgt->lock, i);
+			if (i != cpu)
+				spin_lock(other);
+		}
+
+		/* only update req_vq when reqs is one*/
+		if (atomic_read(&tgt->reqs) == 1)
+			tgt->req_vq = vq = &vscsi->req_vqs[queue_num];
+		else
+			vq = tgt->req_vq;
+
+		for_each_possible_cpu(i) {
+			spinlock_t *other = per_cpu_ptr(tgt->lock, i);
+			if (i != cpu)
+				spin_unlock(other);
+		}
 	}
 
-	spin_unlock_irqrestore(&tgt->tgt_lock, flags);
+	spin_unlock_irqrestore(lock, flags);
+	put_cpu();
 	return vq;
 }
 
@@ -618,10 +643,22 @@ static int virtscsi_target_alloc(struct scsi_target *starget)
 {
 	struct virtio_scsi_target_state *tgt =
 				kmalloc(sizeof(*tgt), GFP_KERNEL);
+	int i;
+
 	if (!tgt)
 		return -ENOMEM;
 
-	spin_lock_init(&tgt->tgt_lock);
+	tgt->lock = alloc_percpu(spinlock_t);
+	if (!tgt->lock) {
+		kfree(tgt);
+		return -ENOMEM;
+	}
+
+	for_each_possible_cpu(i) {
+		spinlock_t *lock = per_cpu_ptr(tgt->lock, i);
+		spin_lock_init(lock);
+	}
+
 	atomic_set(&tgt->reqs, 0);
 	tgt->req_vq = NULL;
 
@@ -632,6 +669,7 @@ static int virtscsi_target_alloc(struct scsi_target *starget)
 static void virtscsi_target_destroy(struct scsi_target *starget)
 {
 	struct virtio_scsi_target_state *tgt = starget->hostdata;
+	free_percpu(tgt->lock);
 	kfree(tgt);
 }
 

Thanks,
-- 
Ming Lei