From: Boaz Harrosh <bharrosh@panasas.com>
To: James Bottomley <James.Bottomley@SteelEye.com>,
Jens Axboe <jens.axboe@oracle.com>,
FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>,
linux-scsi <linux-scsi@vger.kernel.org>
Subject: [PATCH B2/5] SCSI: support for allocating large scatterlists
Date: Tue, 24 Jul 2007 12:01:23 +0300 [thread overview]
Message-ID: <46A5BFE3.7090505@panasas.com> (raw)
In-Reply-To: <46A5BCB6.9050102@panasas.com>
A slightly revised sg-chaining patch to accommodate
for the cleanup of sg-pools allocations.
from Jens:
This is what enables large commands. If we need to allocate an
sgtable that doesn't fit in a single page, allocate several
SCSI_MAX_SG_SEGMENTS sized tables and chain them together.
SCSI defaults to large chained sg tables, if the arch supports it.
Was-Signed-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
drivers/scsi/scsi_lib.c | 136 +++++++++++++++++++++++++++++++++++++++++++---
include/scsi/scsi_cmnd.h | 1 +
2 files changed, 129 insertions(+), 8 deletions(-)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 71532f9..7ee5591 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -54,7 +54,11 @@ struct scsi_host_sg_pool {
};
static struct scsi_host_sg_pool scsi_sg_pools[SG_MEMPOOL_NR];
-
+/*
+ * IO limit For archs that have sg chaining. This limit is totally arbitrary,
+ * a setting of 2048 will get you at least 8mb ios.
+ */
+#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048
static void scsi_run_queue(struct request_queue *q);
@@ -712,21 +716,123 @@ static unsigned scsi_sgtable_index(unsigned nents)
struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
{
- unsigned int pool = scsi_sgtable_index(cmd->use_sg);
- struct scatterlist *sgl;
+ struct scsi_host_sg_pool *sgp;
+ struct scatterlist *sgl, *prev, *ret;
+ unsigned int index;
+ int this, left;
+
+ BUG_ON(!cmd->use_sg);
+
+ left = cmd->use_sg;
+ ret = prev = NULL;
+ do {
+ this = left;
+ if (this > SCSI_MAX_SG_SEGMENTS) {
+ this = SCSI_MAX_SG_SEGMENTS - 1;
+ index = SG_MEMPOOL_NR - 1;
+ } else
+ index = scsi_sgtable_index(this);
- sgl = mempool_alloc(scsi_sg_pools[pool].pool, gfp_mask);
- if (unlikely(!sgl))
- return NULL;
+ left -= this;
+
+ sgp = scsi_sg_pools + index;
+
+ sgl = mempool_alloc(sgp->pool, gfp_mask);
+ if (unlikely(!sgl))
+ goto enomem;
+
+ memset(sgl, 0, sizeof(*sgl) * sgp->size);
+
+ /*
+ * first loop through, set initial index and return value
+ */
+ if (!ret) {
+ cmd->sg_pool = index;
+ ret = sgl;
+ }
+
+ /*
+ * chain previous sglist, if any. we know the previous
+ * sglist must be the biggest one, or we would not have
+ * ended up doing another loop.
+ */
+ if (prev)
+ sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);
+
+ /*
+ * don't allow subsequent mempool allocs to sleep, it would
+ * violate the mempool principle.
+ */
+ gfp_mask &= ~__GFP_WAIT;
+ gfp_mask |= __GFP_HIGH;
+ prev = sgl;
+ } while (left);
+
+ /*
+ * ->use_sg may get modified after dma mapping has potentially
+ * shrunk the number of segments, so keep a copy of it for free.
+ */
+ cmd->__use_sg = cmd->use_sg;
+ return ret;
+enomem:
+ if (ret) {
+ /*
+ * Free entries chained off ret. Since we were trying to
+ * allocate another sglist, we know that all entries are of
+ * the max size.
+ */
+ sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1;
+ prev = ret;
+ ret = &ret[SCSI_MAX_SG_SEGMENTS - 1];
+
+ while ((sgl = sg_chain_ptr(ret)) != NULL) {
+ ret = &sgl[SCSI_MAX_SG_SEGMENTS - 1];
+ mempool_free(sgl, sgp->pool);
+ }
- cmd->sg_pool = pool;
- return sgl;
+ mempool_free(prev, sgp->pool);
+ }
+ return NULL;
}
EXPORT_SYMBOL(scsi_alloc_sgtable);
void scsi_free_sgtable(struct scsi_cmnd *cmd)
{
+ struct scatterlist *sgl = cmd->request_buffer;
+ struct scsi_host_sg_pool *sgp;
+
+ /*
+ * if this is the biggest size sglist, check if we have
+ * chained parts we need to free
+ */
+ if (cmd->__use_sg > SCSI_MAX_SG_SEGMENTS) {
+ unsigned short this, left;
+ struct scatterlist *next;
+ unsigned int index;
+
+ left = cmd->__use_sg - (SCSI_MAX_SG_SEGMENTS - 1);
+ next = sg_chain_ptr(&sgl[SCSI_MAX_SG_SEGMENTS - 1]);
+ while (left && next) {
+ sgl = next;
+ this = left;
+ if (this > SCSI_MAX_SG_SEGMENTS) {
+ this = SCSI_MAX_SG_SEGMENTS - 1;
+ index = SG_MEMPOOL_NR - 1;
+ } else
+ index = scsi_sgtable_index(this);
+
+ left -= this;
+
+ sgp = scsi_sg_pools + index;
+
+ if (left)
+ next = sg_chain_ptr(&sgl[sgp->size - 1]);
+
+ mempool_free(sgl, sgp->pool);
+ }
+ }
+
mempool_free(cmd->request_buffer, scsi_sg_pools[cmd->sg_pool].pool);
}
@@ -1550,8 +1656,22 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
if (!q)
return NULL;
+ /*
+ * this limit is imposed by hardware restrictions
+ */
blk_queue_max_hw_segments(q, shost->sg_tablesize);
+
+ /*
+ * In the future, sg chaining support will be mandatory and this
+ * ifdef can then go away. Right now we don't have all archs
+ * converted, so better keep it safe.
+ */
+#ifdef ARCH_HAS_SG_CHAIN
+ blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
+#else
blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
+#endif
+
blk_queue_max_sectors(q, shost->max_sectors);
blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
blk_queue_segment_boundary(q, shost->dma_boundary);
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 279a4df..7d0b2de 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -72,6 +72,7 @@ struct scsi_cmnd {
/* These elements define the operation we ultimately want to perform */
unsigned short use_sg; /* Number of pieces of scatter-gather */
unsigned short sg_pool; /* pool index of allocated sg array */
+ unsigned short __use_sg;
unsigned underflow; /* Return error if less than
this amount is transferred */
--
1.5.2.2.249.g45fd
next prev parent reply other threads:[~2007-07-24 9:02 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-07-24 8:47 [PATCHSET 0/5] Peaceful co-existence of scsi_sgtable and Large IO sg-chaining Boaz Harrosh
2007-07-24 8:52 ` [PATCH AB1/5] SCSI: SG pools allocation cleanup Boaz Harrosh
2007-07-24 13:08 ` Boaz Harrosh
2007-07-25 8:08 ` Boaz Harrosh
2007-07-25 9:05 ` [PATCH AB1/5 ver2] " Boaz Harrosh
2007-07-25 9:06 ` [PATCH A2/5 ver2] SCSI: scsi_sgtable implementation Boaz Harrosh
2007-07-24 8:56 ` [PATCH A2/5] " Boaz Harrosh
2007-07-24 8:59 ` [PATCH A3/5] SCSI: sg-chaining over scsi_sgtable Boaz Harrosh
2007-07-24 9:01 ` Boaz Harrosh [this message]
2007-07-24 9:03 ` [PATCH B3/5] SCSI: scsi_sgtable over sg-chainning Boaz Harrosh
2007-07-24 9:16 ` [PATCHSET 0/5] Peaceful co-existence of scsi_sgtable and Large IO sg-chaining FUJITA Tomonori
2007-07-24 10:01 ` Boaz Harrosh
2007-07-24 11:12 ` FUJITA Tomonori
2007-07-24 13:41 ` FUJITA Tomonori
2007-07-24 14:01 ` Benny Halevy
2007-07-24 16:10 ` James Bottomley
2007-07-25 8:26 ` Benny Halevy
2007-07-25 8:42 ` FUJITA Tomonori
2007-07-25 19:22 ` Boaz Harrosh
2007-07-26 11:33 ` FUJITA Tomonori
2007-07-31 20:12 ` Boaz Harrosh
2007-08-05 16:03 ` FUJITA Tomonori
2007-08-06 7:22 ` FUJITA Tomonori
2007-08-07 6:55 ` Jens Axboe
2007-08-07 8:36 ` FUJITA Tomonori
2007-08-08 7:16 ` Jens Axboe
2007-07-25 19:50 ` Boaz Harrosh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=46A5BFE3.7090505@panasas.com \
--to=bharrosh@panasas.com \
--cc=James.Bottomley@SteelEye.com \
--cc=fujita.tomonori@lab.ntt.co.jp \
--cc=jens.axboe@oracle.com \
--cc=linux-scsi@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).