[PATCH B2/5] SCSI: support for allocating large scatterlists

linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Boaz Harrosh <bharrosh@panasas.com>
To: James Bottomley <James.Bottomley@SteelEye.com>,
	Jens Axboe <jens.axboe@oracle.com>,
	FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>,
	linux-scsi <linux-scsi@vger.kernel.org>
Subject: [PATCH B2/5] SCSI: support for allocating large scatterlists
Date: Tue, 24 Jul 2007 12:01:23 +0300	[thread overview]
Message-ID: <46A5BFE3.7090505@panasas.com> (raw)
In-Reply-To: <46A5BCB6.9050102@panasas.com>


    A slightly revised sg-chaining patch to accommodate
    for the cleanup of sg-pools allocations.

    from Jens:
      This is what enables large commands. If we need to allocate an
      sgtable that doesn't fit in a single page, allocate several
      SCSI_MAX_SG_SEGMENTS sized tables and chain them together.

      SCSI defaults to large chained sg tables, if the arch supports it.

    Was-Signed-by: Jens Axboe <jens.axboe@oracle.com>

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 drivers/scsi/scsi_lib.c  |  136 +++++++++++++++++++++++++++++++++++++++++++---
 include/scsi/scsi_cmnd.h |    1 +
 2 files changed, 129 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 71532f9..7ee5591 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -54,7 +54,11 @@ struct scsi_host_sg_pool {
 };
 static struct scsi_host_sg_pool scsi_sg_pools[SG_MEMPOOL_NR];
 
-
+/*
+ * IO limit For archs that have sg chaining. This limit is totally arbitrary,
+ * a setting of 2048 will get you at least 8mb ios.
+ */
+#define SCSI_MAX_SG_CHAIN_SEGMENTS	2048
 
 static void scsi_run_queue(struct request_queue *q);
 
@@ -712,21 +716,123 @@ static unsigned scsi_sgtable_index(unsigned nents)
 
 struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
 {
-	unsigned int pool = scsi_sgtable_index(cmd->use_sg);
-	struct scatterlist *sgl;
+	struct scsi_host_sg_pool *sgp;
+	struct scatterlist *sgl, *prev, *ret;
+	unsigned int index;
+	int this, left;
+
+	BUG_ON(!cmd->use_sg);
+
+	left = cmd->use_sg;
+	ret = prev = NULL;
+	do {
+		this = left;
+		if (this > SCSI_MAX_SG_SEGMENTS) {
+			this = SCSI_MAX_SG_SEGMENTS - 1;
+			index = SG_MEMPOOL_NR - 1;
+		} else
+			index = scsi_sgtable_index(this);
 
-	sgl = mempool_alloc(scsi_sg_pools[pool].pool, gfp_mask);
-	if (unlikely(!sgl))
-		return NULL;
+		left -= this;
+
+		sgp = scsi_sg_pools + index;
+
+		sgl = mempool_alloc(sgp->pool, gfp_mask);
+		if (unlikely(!sgl))
+			goto enomem;
+
+		memset(sgl, 0, sizeof(*sgl) * sgp->size);
+
+		/*
+		 * first loop through, set initial index and return value
+		 */
+		if (!ret) {
+			cmd->sg_pool = index;
+			ret = sgl;
+		}
+
+		/*
+		 * chain previous sglist, if any. we know the previous
+		 * sglist must be the biggest one, or we would not have
+		 * ended up doing another loop.
+		 */
+		if (prev)
+			sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);
+
+		/*
+		 * don't allow subsequent mempool allocs to sleep, it would
+		 * violate the mempool principle.
+		 */
+		gfp_mask &= ~__GFP_WAIT;
+		gfp_mask |= __GFP_HIGH;
+		prev = sgl;
+	} while (left);
+
+	/*
+	 * ->use_sg may get modified after dma mapping has potentially
+	 * shrunk the number of segments, so keep a copy of it for free.
+	 */
+	cmd->__use_sg = cmd->use_sg;
+	return ret;
+enomem:
+	if (ret) {
+		/*
+		 * Free entries chained off ret. Since we were trying to
+		 * allocate another sglist, we know that all entries are of
+		 * the max size.
+		 */
+		sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1;
+		prev = ret;
+		ret = &ret[SCSI_MAX_SG_SEGMENTS - 1];
+
+		while ((sgl = sg_chain_ptr(ret)) != NULL) {
+			ret = &sgl[SCSI_MAX_SG_SEGMENTS - 1];
+			mempool_free(sgl, sgp->pool);
+		}
 
-	cmd->sg_pool = pool;
-	return sgl;
+		mempool_free(prev, sgp->pool);
+	}
+	return NULL;
 }
 
 EXPORT_SYMBOL(scsi_alloc_sgtable);
 
 void scsi_free_sgtable(struct scsi_cmnd *cmd)
 {
+	struct scatterlist *sgl = cmd->request_buffer;
+	struct scsi_host_sg_pool *sgp;
+
+	/*
+	 * if this is the biggest size sglist, check if we have
+	 * chained parts we need to free
+	 */
+	if (cmd->__use_sg > SCSI_MAX_SG_SEGMENTS) {
+		unsigned short this, left;
+		struct scatterlist *next;
+		unsigned int index;
+
+		left = cmd->__use_sg - (SCSI_MAX_SG_SEGMENTS - 1);
+		next = sg_chain_ptr(&sgl[SCSI_MAX_SG_SEGMENTS - 1]);
+		while (left && next) {
+			sgl = next;
+			this = left;
+			if (this > SCSI_MAX_SG_SEGMENTS) {
+				this = SCSI_MAX_SG_SEGMENTS - 1;
+				index = SG_MEMPOOL_NR - 1;
+			} else
+				index = scsi_sgtable_index(this);
+
+			left -= this;
+
+			sgp = scsi_sg_pools + index;
+
+			if (left)
+				next = sg_chain_ptr(&sgl[sgp->size - 1]);
+
+			mempool_free(sgl, sgp->pool);
+		}
+	}
+
 	mempool_free(cmd->request_buffer, scsi_sg_pools[cmd->sg_pool].pool);
 }
 
@@ -1550,8 +1656,22 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
 	if (!q)
 		return NULL;
 
+	/*
+	 * this limit is imposed by hardware restrictions
+	 */
 	blk_queue_max_hw_segments(q, shost->sg_tablesize);
+
+	/*
+	 * In the future, sg chaining support will be mandatory and this
+	 * ifdef can then go away. Right now we don't have all archs
+	 * converted, so better keep it safe.
+	 */
+#ifdef ARCH_HAS_SG_CHAIN
+	blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
+#else
 	blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
+#endif
+
 	blk_queue_max_sectors(q, shost->max_sectors);
 	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
 	blk_queue_segment_boundary(q, shost->dma_boundary);
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 279a4df..7d0b2de 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -72,6 +72,7 @@ struct scsi_cmnd {
 	/* These elements define the operation we ultimately want to perform */
 	unsigned short use_sg;	/* Number of pieces of scatter-gather */
 	unsigned short sg_pool;	/* pool index of allocated sg array */
+	unsigned short __use_sg;
 
 	unsigned underflow;	/* Return error if less than
 				   this amount is transferred */
-- 
1.5.2.2.249.g45fd

next prev parent reply	other threads:[~2007-07-24  9:02 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-07-24  8:47 [PATCHSET 0/5] Peaceful co-existence of scsi_sgtable and Large IO sg-chaining Boaz Harrosh
2007-07-24  8:52 ` [PATCH AB1/5] SCSI: SG pools allocation cleanup Boaz Harrosh
2007-07-24 13:08   ` Boaz Harrosh
2007-07-25  8:08   ` Boaz Harrosh
2007-07-25  9:05     ` [PATCH AB1/5 ver2] " Boaz Harrosh
2007-07-25  9:06     ` [PATCH A2/5 ver2] SCSI: scsi_sgtable implementation Boaz Harrosh
2007-07-24  8:56 ` [PATCH A2/5] " Boaz Harrosh
2007-07-24  8:59 ` [PATCH A3/5] SCSI: sg-chaining over scsi_sgtable Boaz Harrosh
2007-07-24  9:01 ` Boaz Harrosh [this message]
2007-07-24  9:03 ` [PATCH B3/5] SCSI: scsi_sgtable over sg-chainning Boaz Harrosh
2007-07-24  9:16 ` [PATCHSET 0/5] Peaceful co-existence of scsi_sgtable and Large IO sg-chaining FUJITA Tomonori
2007-07-24 10:01   ` Boaz Harrosh
2007-07-24 11:12     ` FUJITA Tomonori
2007-07-24 13:41       ` FUJITA Tomonori
2007-07-24 14:01         ` Benny Halevy
2007-07-24 16:10           ` James Bottomley
2007-07-25  8:26             ` Benny Halevy
2007-07-25  8:42               ` FUJITA Tomonori
2007-07-25 19:22                 ` Boaz Harrosh
2007-07-26 11:33                   ` FUJITA Tomonori
2007-07-31 20:12                   ` Boaz Harrosh
2007-08-05 16:03                     ` FUJITA Tomonori
2007-08-06  7:22                     ` FUJITA Tomonori
2007-08-07  6:55                       ` Jens Axboe
2007-08-07  8:36                         ` FUJITA Tomonori
2007-08-08  7:16                           ` Jens Axboe
2007-07-25 19:50                 ` Boaz Harrosh

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:71532f9 dfblob:7ee5591 dfblob:279a4df dfblob:7d0b2de )
 OR (
bs:"[PATCH B2/5] SCSI: support for allocating large scatterlists" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=46A5BFE3.7090505@panasas.com \
    --to=bharrosh@panasas.com \
    --cc=James.Bottomley@SteelEye.com \
    --cc=fujita.tomonori@lab.ntt.co.jp \
    --cc=jens.axboe@oracle.com \
    --cc=linux-scsi@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).