From: Boaz Harrosh <bharrosh@panasas.com>
To: James Bottomley <James.Bottomley@SteelEye.com>,
Jens Axboe <jens.axboe@oracle.com>,
FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>,
linux-scsi <linux-scsi@vger.kernel.org>
Subject: [PATCH B2/5] SCSI: support for allocating large scatterlists
Date: Tue, 24 Jul 2007 12:01:23 +0300 [thread overview]
Message-ID: <46A5BFE3.7090505@panasas.com> (raw)
In-Reply-To: <46A5BCB6.9050102@panasas.com>
A slightly revised sg-chaining patch to accommodate
for the cleanup of sg-pools allocations.
from Jens:
This is what enables large commands. If we need to allocate an
sgtable that doesn't fit in a single page, allocate several
SCSI_MAX_SG_SEGMENTS sized tables and chain them together.
SCSI defaults to large chained sg tables, if the arch supports it.
Was-Signed-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
drivers/scsi/scsi_lib.c | 136 +++++++++++++++++++++++++++++++++++++++++++---
include/scsi/scsi_cmnd.h | 1 +
2 files changed, 129 insertions(+), 8 deletions(-)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 71532f9..7ee5591 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -54,7 +54,11 @@ struct scsi_host_sg_pool {
};
static struct scsi_host_sg_pool scsi_sg_pools[SG_MEMPOOL_NR];
-
+/*
+ * IO limit For archs that have sg chaining. This limit is totally arbitrary,
+ * a setting of 2048 will get you at least 8mb ios.
+ */
+#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048
static void scsi_run_queue(struct request_queue *q);
@@ -712,21 +716,123 @@ static unsigned scsi_sgtable_index(unsigned nents)
struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
{
- unsigned int pool = scsi_sgtable_index(cmd->use_sg);
- struct scatterlist *sgl;
+ struct scsi_host_sg_pool *sgp;
+ struct scatterlist *sgl, *prev, *ret;
+ unsigned int index;
+ int this, left;
+
+ BUG_ON(!cmd->use_sg);
+
+ left = cmd->use_sg;
+ ret = prev = NULL;
+ do {
+ this = left;
+ if (this > SCSI_MAX_SG_SEGMENTS) {
+ this = SCSI_MAX_SG_SEGMENTS - 1;
+ index = SG_MEMPOOL_NR - 1;
+ } else
+ index = scsi_sgtable_index(this);
- sgl = mempool_alloc(scsi_sg_pools[pool].pool, gfp_mask);
- if (unlikely(!sgl))
- return NULL;
+ left -= this;
+
+ sgp = scsi_sg_pools + index;
+
+ sgl = mempool_alloc(sgp->pool, gfp_mask);
+ if (unlikely(!sgl))
+ goto enomem;
+
+ memset(sgl, 0, sizeof(*sgl) * sgp->size);
+
+ /*
+ * first loop through, set initial index and return value
+ */
+ if (!ret) {
+ cmd->sg_pool = index;
+ ret = sgl;
+ }
+
+ /*
+ * chain previous sglist, if any. we know the previous
+ * sglist must be the biggest one, or we would not have
+ * ended up doing another loop.
+ */
+ if (prev)
+ sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);
+
+ /*
+ * don't allow subsequent mempool allocs to sleep, it would
+ * violate the mempool principle.
+ */
+ gfp_mask &= ~__GFP_WAIT;
+ gfp_mask |= __GFP_HIGH;
+ prev = sgl;
+ } while (left);
+
+ /*
+ * ->use_sg may get modified after dma mapping has potentially
+ * shrunk the number of segments, so keep a copy of it for free.
+ */
+ cmd->__use_sg = cmd->use_sg;
+ return ret;
+enomem:
+ if (ret) {
+ /*
+ * Free entries chained off ret. Since we were trying to
+ * allocate another sglist, we know that all entries are of
+ * the max size.
+ */
+ sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1;
+ prev = ret;
+ ret = &ret[SCSI_MAX_SG_SEGMENTS - 1];
+
+ while ((sgl = sg_chain_ptr(ret)) != NULL) {
+ ret = &sgl[SCSI_MAX_SG_SEGMENTS - 1];
+ mempool_free(sgl, sgp->pool);
+ }
- cmd->sg_pool = pool;
- return sgl;
+ mempool_free(prev, sgp->pool);
+ }
+ return NULL;
}
EXPORT_SYMBOL(scsi_alloc_sgtable);
void scsi_free_sgtable(struct scsi_cmnd *cmd)
{
+ struct scatterlist *sgl = cmd->request_buffer;
+ struct scsi_host_sg_pool *sgp;
+
+ /*
+ * if this is the biggest size sglist, check if we have
+ * chained parts we need to free
+ */
+ if (cmd->__use_sg > SCSI_MAX_SG_SEGMENTS) {
+ unsigned short this, left;
+ struct scatterlist *next;
+ unsigned int index;
+
+ left = cmd->__use_sg - (SCSI_MAX_SG_SEGMENTS - 1);
+ next = sg_chain_ptr(&sgl[SCSI_MAX_SG_SEGMENTS - 1]);
+ while (left && next) {
+ sgl = next;
+ this = left;
+ if (this > SCSI_MAX_SG_SEGMENTS) {
+ this = SCSI_MAX_SG_SEGMENTS - 1;
+ index = SG_MEMPOOL_NR - 1;
+ } else
+ index = scsi_sgtable_index(this);
+
+ left -= this;
+
+ sgp = scsi_sg_pools + index;
+
+ if (left)
+ next = sg_chain_ptr(&sgl[sgp->size - 1]);
+
+ mempool_free(sgl, sgp->pool);
+ }
+ }
+
mempool_free(cmd->request_buffer, scsi_sg_pools[cmd->sg_pool].pool);
}
@@ -1550,8 +1656,22 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
if (!q)
return NULL;
+ /*
+ * this limit is imposed by hardware restrictions
+ */
blk_queue_max_hw_segments(q, shost->sg_tablesize);
+
+ /*
+ * In the future, sg chaining support will be mandatory and this
+ * ifdef can then go away. Right now we don't have all archs
+ * converted, so better keep it safe.
+ */
+#ifdef ARCH_HAS_SG_CHAIN
+ blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
+#else
blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
+#endif
+
blk_queue_max_sectors(q, shost->max_sectors);
blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
blk_queue_segment_boundary(q, shost->dma_boundary);
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 279a4df..7d0b2de 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -72,6 +72,7 @@ struct scsi_cmnd {
/* These elements define the operation we ultimately want to perform */
unsigned short use_sg; /* Number of pieces of scatter-gather */
unsigned short sg_pool; /* pool index of allocated sg array */
+ unsigned short __use_sg;
unsigned underflow; /* Return error if less than
this amount is transferred */
--
1.5.2.2.249.g45fd
next prev parent reply other threads:[~2007-07-24 9:02 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-07-24 8:47 [PATCHSET 0/5] Peaceful co-existence of scsi_sgtable and Large IO sg-chaining Boaz Harrosh
2007-07-24 8:52 ` [PATCH AB1/5] SCSI: SG pools allocation cleanup Boaz Harrosh
2007-07-24 13:08 ` Boaz Harrosh
2007-07-25 8:08 ` Boaz Harrosh
2007-07-25 9:05 ` [PATCH AB1/5 ver2] " Boaz Harrosh
2007-07-25 9:06 ` [PATCH A2/5 ver2] SCSI: scsi_sgtable implementation Boaz Harrosh
2007-07-24 8:56 ` [PATCH A2/5] " Boaz Harrosh
2007-07-24 8:59 ` [PATCH A3/5] SCSI: sg-chaining over scsi_sgtable Boaz Harrosh
2007-07-24 9:01 ` Boaz Harrosh [this message]
2007-07-24 9:03 ` [PATCH B3/5] SCSI: scsi_sgtable over sg-chainning Boaz Harrosh
2007-07-24 9:16 ` [PATCHSET 0/5] Peaceful co-existence of scsi_sgtable and Large IO sg-chaining FUJITA Tomonori
2007-07-24 10:01 ` Boaz Harrosh
2007-07-24 11:12 ` FUJITA Tomonori
2007-07-24 13:41 ` FUJITA Tomonori
2007-07-24 14:01 ` Benny Halevy
2007-07-24 16:10 ` James Bottomley
2007-07-25 8:26 ` Benny Halevy
2007-07-25 8:42 ` FUJITA Tomonori
2007-07-25 19:22 ` Boaz Harrosh
2007-07-26 11:33 ` FUJITA Tomonori
2007-07-31 20:12 ` Boaz Harrosh
2007-08-05 16:03 ` FUJITA Tomonori
2007-08-06 7:22 ` FUJITA Tomonori
2007-08-07 6:55 ` Jens Axboe
2007-08-07 8:36 ` FUJITA Tomonori
2007-08-08 7:16 ` Jens Axboe
2007-07-25 19:50 ` Boaz Harrosh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=46A5BFE3.7090505@panasas.com \
--to=bharrosh@panasas.com \
--cc=James.Bottomley@SteelEye.com \
--cc=fujita.tomonori@lab.ntt.co.jp \
--cc=jens.axboe@oracle.com \
--cc=linux-scsi@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.