linux-ide.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] block: support different tag allocation policy
@ 2014-12-15  3:21 Shaohua Li
  2014-12-15  3:21 ` [PATCH 2/2] libata: use generic taging Shaohua Li
  2014-12-15  8:08 ` [PATCH 1/2] block: support different tag allocation policy Christoph Hellwig
  0 siblings, 2 replies; 6+ messages in thread
From: Shaohua Li @ 2014-12-15  3:21 UTC (permalink / raw)
  To: linux-ide; +Cc: Kernel-team, Jens Axboe, Tejun Heo

The libata tag allocation is using a round-robin policy. Next patch will
make libata use block generic tag allocation, so let's add a policy to
tag allocation.

Currently two policies: FIFO (default) and round-robin.

Cc: Jens Axboe <axboe@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 block/blk-tag.c          | 33 +++++++++++++++++++++++++--------
 drivers/block/osdblk.c   |  2 +-
 drivers/scsi/scsi_scan.c |  3 ++-
 include/linux/blkdev.h   |  8 ++++++--
 include/scsi/scsi_host.h |  3 +++
 include/scsi/scsi_tcq.h  |  2 +-
 6 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/block/blk-tag.c b/block/blk-tag.c
index a185b86..f0344e6 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -119,7 +119,7 @@ init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth)
 }
 
 static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
-						   int depth)
+						int depth, int alloc_policy)
 {
 	struct blk_queue_tag *tags;
 
@@ -131,6 +131,8 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
 		goto fail;
 
 	atomic_set(&tags->refcnt, 1);
+	tags->alloc_policy = alloc_policy;
+	tags->next_tag = 0;
 	return tags;
 fail:
 	kfree(tags);
@@ -140,10 +142,11 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
 /**
  * blk_init_tags - initialize the tag info for an external tag map
  * @depth:	the maximum queue depth supported
+ * @alloc_policy: tag allocation policy
  **/
-struct blk_queue_tag *blk_init_tags(int depth)
+struct blk_queue_tag *blk_init_tags(int depth, int alloc_policy)
 {
-	return __blk_queue_init_tags(NULL, depth);
+	return __blk_queue_init_tags(NULL, depth, alloc_policy);
 }
 EXPORT_SYMBOL(blk_init_tags);
 
@@ -152,19 +155,20 @@ EXPORT_SYMBOL(blk_init_tags);
  * @q:  the request queue for the device
  * @depth:  the maximum queue depth supported
  * @tags: the tag to use
+ * @alloc_policy: tag allocation policy
  *
  * Queue lock must be held here if the function is called to resize an
  * existing map.
  **/
 int blk_queue_init_tags(struct request_queue *q, int depth,
-			struct blk_queue_tag *tags)
+			struct blk_queue_tag *tags, int alloc_policy)
 {
 	int rc;
 
 	BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
 
 	if (!tags && !q->queue_tags) {
-		tags = __blk_queue_init_tags(q, depth);
+		tags = __blk_queue_init_tags(q, depth, alloc_policy);
 
 		if (!tags)
 			return -ENOMEM;
@@ -344,9 +348,21 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	}
 
 	do {
-		tag = find_first_zero_bit(bqt->tag_map, max_depth);
-		if (tag >= max_depth)
-			return 1;
+		if (bqt->alloc_policy == BLK_TAG_ALLOC_FIFO) {
+			tag = find_first_zero_bit(bqt->tag_map, max_depth);
+			if (tag >= max_depth)
+				return 1;
+		} else {
+			int start = bqt->next_tag;
+			int size = min_t(int, bqt->max_depth, max_depth + start);
+			tag = find_next_zero_bit(bqt->tag_map, size, start);
+			if (tag >= size && start + size > bqt->max_depth) {
+				size = start + size - bqt->max_depth;
+				tag = find_first_zero_bit(bqt->tag_map, size);
+			}
+			if (tag >= size)
+				return 1;
+		}
 
 	} while (test_and_set_bit_lock(tag, bqt->tag_map));
 	/*
@@ -354,6 +370,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	 * See blk_queue_end_tag for details.
 	 */
 
+	bqt->next_tag = (tag + 1) % bqt->max_depth;
 	rq->cmd_flags |= REQ_QUEUED;
 	rq->tag = tag;
 	bqt->tag_index[tag] = rq;
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 79aa179..e229425 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -423,7 +423,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev)
 	}
 
 	/* switch queue to TCQ mode; allocate tag map */
-	rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL);
+	rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL, BLK_TAG_ALLOC_FIFO);
 	if (rc) {
 		blk_cleanup_queue(q);
 		put_disk(disk);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 983aed1..921a8c8 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -290,7 +290,8 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 	if (!shost_use_blk_mq(sdev->host) &&
 	    (shost->bqt || shost->hostt->use_blk_tags)) {
 		blk_queue_init_tags(sdev->request_queue,
-				    sdev->host->cmd_per_lun, shost->bqt);
+				    sdev->host->cmd_per_lun, shost->bqt,
+				    shost->hostt->tag_alloc_policy);
 	}
 	scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 92f4b4b..38b095d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -272,7 +272,11 @@ struct blk_queue_tag {
 	int max_depth;			/* what we will send to device */
 	int real_max_depth;		/* what the array can hold */
 	atomic_t refcnt;		/* map can be shared */
+	int alloc_policy;		/* tag allocation policy */
+	int next_tag;			/* next tag */
 };
+#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
+#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
 
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
@@ -1139,11 +1143,11 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 extern int blk_queue_start_tag(struct request_queue *, struct request *);
 extern struct request *blk_queue_find_tag(struct request_queue *, int);
 extern void blk_queue_end_tag(struct request_queue *, struct request *);
-extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *);
+extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int);
 extern void blk_queue_free_tags(struct request_queue *);
 extern int blk_queue_resize_tags(struct request_queue *, int);
 extern void blk_queue_invalidate_tags(struct request_queue *);
-extern struct blk_queue_tag *blk_init_tags(int);
+extern struct blk_queue_tag *blk_init_tags(int, int);
 extern void blk_free_tags(struct blk_queue_tag *);
 
 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index e939d2b..642884e 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -415,6 +415,9 @@ struct scsi_host_template {
 	 */
 	unsigned char present;
 
+	/* If use block layer to manage tags, this is tag allocation policy */
+	int tag_alloc_policy;
+
 	/*
 	 * Let the block layer assigns tags to all commands.
 	 */
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index fe4a702..c814fae 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -102,7 +102,7 @@ static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
 	 * devices on the shared host (for libata)
 	 */
 	if (!shost->bqt) {
-		shost->bqt = blk_init_tags(depth);
+		shost->bqt = blk_init_tags(depth, BLK_TAG_ALLOC_FIFO);
 		if (!shost->bqt)
 			return -ENOMEM;
 	}
-- 
1.8.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] libata: use generic taging
  2014-12-15  3:21 [PATCH 1/2] block: support different tag allocation policy Shaohua Li
@ 2014-12-15  3:21 ` Shaohua Li
  2014-12-15  8:13   ` Christoph Hellwig
  2014-12-15  8:08 ` [PATCH 1/2] block: support different tag allocation policy Christoph Hellwig
  1 sibling, 1 reply; 6+ messages in thread
From: Shaohua Li @ 2014-12-15  3:21 UTC (permalink / raw)
  To: linux-ide; +Cc: Kernel-team, Jens Axboe, Tejun Heo

libata uses its own tag management which is duplication and the
implementation is poor. And if we switch to blk-mq, tag is build-in.
It's time to switch to generic taging.

Cc: Jens Axboe <axboe@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/ata/libata-core.c | 71 +++++++++--------------------------------------
 drivers/ata/libata-scsi.c |  4 ++-
 drivers/ata/libata.h      |  2 +-
 include/linux/libata.h    |  4 +--
 4 files changed, 19 insertions(+), 62 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 5c84fb5..ba4986d 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1585,7 +1585,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 	else
 		tag = 0;
 
-	if (test_and_set_bit(tag, &ap->qc_allocated))
+	if (dev->sdev && blk_queue_find_tag(dev->sdev->request_queue, tag))
 		BUG();
 	qc = __ata_qc_from_tag(ap, tag);
 
@@ -4726,46 +4726,6 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
 }
 
 /**
- *	ata_qc_new - Request an available ATA command, for queueing
- *	@ap: target port
- *
- *	Some ATA host controllers may implement a queue depth which is less
- *	than ATA_MAX_QUEUE. So we shouldn't allocate a tag which is beyond
- *	the hardware limitation.
- *
- *	LOCKING:
- *	None.
- */
-
-static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
-{
-	struct ata_queued_cmd *qc = NULL;
-	unsigned int max_queue = ap->host->n_tags;
-	unsigned int i, tag;
-
-	/* no command while frozen */
-	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
-		return NULL;
-
-	for (i = 0, tag = ap->last_tag + 1; i < max_queue; i++, tag++) {
-		tag = tag < max_queue ? tag : 0;
-
-		/* the last tag is reserved for internal command. */
-		if (tag == ATA_TAG_INTERNAL)
-			continue;
-
-		if (!test_and_set_bit(tag, &ap->qc_allocated)) {
-			qc = __ata_qc_from_tag(ap, tag);
-			qc->tag = tag;
-			ap->last_tag = tag;
-			break;
-		}
-	}
-
-	return qc;
-}
-
-/**
  *	ata_qc_new_init - Request an available ATA command, and initialize it
  *	@dev: Device from whom we request an available command structure
  *
@@ -4773,19 +4733,22 @@ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
  *	None.
  */
 
-struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev)
+struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
 {
 	struct ata_port *ap = dev->link->ap;
 	struct ata_queued_cmd *qc;
 
-	qc = ata_qc_new(ap);
-	if (qc) {
-		qc->scsicmd = NULL;
-		qc->ap = ap;
-		qc->dev = dev;
+	/* no command while frozen */
+	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
+		return NULL;
 
-		ata_qc_reinit(qc);
-	}
+	qc = __ata_qc_from_tag(ap, tag);
+	qc->tag = tag;
+	qc->scsicmd = NULL;
+	qc->ap = ap;
+	qc->dev = dev;
+
+	ata_qc_reinit(qc);
 
 	return qc;
 }
@@ -4802,18 +4765,10 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev)
  */
 void ata_qc_free(struct ata_queued_cmd *qc)
 {
-	struct ata_port *ap;
-	unsigned int tag;
-
 	WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
-	ap = qc->ap;
 
 	qc->flags = 0;
-	tag = qc->tag;
-	if (likely(ata_tag_valid(tag))) {
-		qc->tag = ATA_TAG_POISON;
-		clear_bit(tag, &ap->qc_allocated);
-	}
+	qc->tag = ATA_TAG_POISON;
 }
 
 void __ata_qc_complete(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index e364e86..12e867c 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -756,7 +756,7 @@ static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev,
 {
 	struct ata_queued_cmd *qc;
 
-	qc = ata_qc_new_init(dev);
+	qc = ata_qc_new_init(dev, cmd->request->tag);
 	if (qc) {
 		qc->scsicmd = cmd;
 		qc->scsidone = cmd->scsi_done;
@@ -3711,6 +3711,8 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync)
 						 NULL);
 			if (!IS_ERR(sdev)) {
 				dev->sdev = sdev;
+				blk_queue_resize_tags(sdev->request_queue,
+					ap->host->n_tags);
 				scsi_device_put(sdev);
 			} else {
 				dev->sdev = NULL;
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 5f4e0cc..4040513 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -63,7 +63,7 @@ extern struct ata_link *ata_dev_phys_link(struct ata_device *dev);
 extern void ata_force_cbl(struct ata_port *ap);
 extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
 extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
-extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev);
+extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag);
 extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
 			   u64 block, u32 n_block, unsigned int tf_flags,
 			   unsigned int tag);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 2d18241..300204c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -821,10 +821,8 @@ struct ata_port {
 	unsigned int		cbl;	/* cable type; ATA_CBL_xxx */
 
 	struct ata_queued_cmd	qcmd[ATA_MAX_QUEUE];
-	unsigned long		qc_allocated;
 	unsigned int		qc_active;
 	int			nr_active_links; /* #links with active qcs */
-	unsigned int		last_tag;	/* track next tag hw expects */
 
 	struct ata_link		link;		/* host default link */
 	struct ata_link		*slave_link;	/* see ata_slave_link_init() */
@@ -1344,6 +1342,8 @@ extern struct device_attribute *ata_common_sdev_attrs[];
 	.ioctl			= ata_scsi_ioctl,		\
 	.queuecommand		= ata_scsi_queuecmd,		\
 	.can_queue		= ATA_DEF_QUEUE,		\
+	.use_blk_tags		= 1,				\
+	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,		\
 	.this_id		= ATA_SHT_THIS_ID,		\
 	.cmd_per_lun		= ATA_SHT_CMD_PER_LUN,		\
 	.emulated		= ATA_SHT_EMULATED,		\
-- 
1.8.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] block: support different tag allocation policy
  2014-12-15  3:21 [PATCH 1/2] block: support different tag allocation policy Shaohua Li
  2014-12-15  3:21 ` [PATCH 2/2] libata: use generic taging Shaohua Li
@ 2014-12-15  8:08 ` Christoph Hellwig
  2014-12-15 16:24   ` Shaohua Li
  1 sibling, 1 reply; 6+ messages in thread
From: Christoph Hellwig @ 2014-12-15  8:08 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-ide, Kernel-team, Jens Axboe, Tejun Heo

On Sun, Dec 14, 2014 at 07:21:49PM -0800, Shaohua Li wrote:
> The libata tag allocation is using a round-robin policy. Next patch will
> make libata use block generic tag allocation, so let's add a policy to
> tag allocation.
> 
> Currently two policies: FIFO (default) and round-robin.

What's the benefit of round robin allocation?  Why do you only need
it for the old simple allocator, and no policy control for the more
complex blkpmq allocator?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] libata: use generic taging
  2014-12-15  3:21 ` [PATCH 2/2] libata: use generic taging Shaohua Li
@ 2014-12-15  8:13   ` Christoph Hellwig
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2014-12-15  8:13 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-ide, Kernel-team, Jens Axboe, Tejun Heo

> +struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
>  {
>  	struct ata_port *ap = dev->link->ap;
>  	struct ata_queued_cmd *qc;
>  
> +	/* no command while frozen */
> +	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
> +		return NULL;
>  
> +	qc = __ata_qc_from_tag(ap, tag);

It would be more useful to allocate the ata_queued_cmd as part of
the request/scsi_cmnd, and dip into the reserved pool for internal
commands.

Also note that tags are per-host which probably breaks the indexing 
scheme to look up the commands from ap->qcmd.

> @@ -3711,6 +3711,8 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync)
>  						 NULL);
>  			if (!IS_ERR(sdev)) {
>  				dev->sdev = sdev;
> +				blk_queue_resize_tags(sdev->request_queue,
> +					ap->host->n_tags);

Never call blk_queue_resize_tags directly from a SCSI LLDD, and use
the scsi_change_queue_depth API instead.  Also remember that for
blk-mq you can't grow the tag map after allocation.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] block: support different tag allocation policy
  2014-12-15  8:08 ` [PATCH 1/2] block: support different tag allocation policy Christoph Hellwig
@ 2014-12-15 16:24   ` Shaohua Li
  2014-12-15 16:57     ` Christoph Hellwig
  0 siblings, 1 reply; 6+ messages in thread
From: Shaohua Li @ 2014-12-15 16:24 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-ide, Kernel-team, Jens Axboe, Tejun Heo

On Mon, Dec 15, 2014 at 12:08:13AM -0800, Christoph Hellwig wrote:
> On Sun, Dec 14, 2014 at 07:21:49PM -0800, Shaohua Li wrote:
> > The libata tag allocation is using a round-robin policy. Next patch will
> > make libata use block generic tag allocation, so let's add a policy to
> > tag allocation.
> > 
> > Currently two policies: FIFO (default) and round-robin.
> 
> What's the benefit of round robin allocation?  Why do you only need
> it for the old simple allocator, and no policy control for the more
> complex blkpmq allocator?

See 8a4aeec8d2d6a3edeffbdfa.

The policy for blkmq will be added later.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] block: support different tag allocation policy
  2014-12-15 16:24   ` Shaohua Li
@ 2014-12-15 16:57     ` Christoph Hellwig
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2014-12-15 16:57 UTC (permalink / raw)
  To: Shaohua Li
  Cc: Christoph Hellwig, linux-ide, Kernel-team, Jens Axboe, Tejun Heo

On Mon, Dec 15, 2014 at 08:24:43AM -0800, Shaohua Li wrote:
> The policy for blkmq will be added later.

It doesn't make sense to move libata to block layer tagging before this
has been sorted out.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2014-12-15 16:57 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-12-15  3:21 [PATCH 1/2] block: support different tag allocation policy Shaohua Li
2014-12-15  3:21 ` [PATCH 2/2] libata: use generic taging Shaohua Li
2014-12-15  8:13   ` Christoph Hellwig
2014-12-15  8:08 ` [PATCH 1/2] block: support different tag allocation policy Christoph Hellwig
2014-12-15 16:24   ` Shaohua Li
2014-12-15 16:57     ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).