From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de, akpm@linux-foundation.org
Cc: linux-raid@vger.kernel.org
Subject: [PATCH -mm 2/4] raid5: split allocation of stripe_heads and stripe_queues
Date: Sat, 06 Oct 2007 10:06:49 -0700 [thread overview]
Message-ID: <20071006170649.23741.93602.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <20071006170538.23741.75193.stgit@dwillia2-linux.ch.intel.com>
Provide separate routines for allocating stripe_head and stripe_queue
objects and introduce 'io_weight' bitmaps to struct stripe_queue.
The io_weight bitmaps add an efficient way to determine what is pending in
a stripe_queue using 'hweight' in comparison to a 'for' loop.
Tested-by: Mr. James W. Laferriere <babydr@baby-dragons.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
drivers/md/raid5.c | 316 ++++++++++++++++++++++++++++++++------------
include/linux/raid/raid5.h | 11 +-
2 files changed, 239 insertions(+), 88 deletions(-)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a13de7d..7bc206c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -65,6 +65,7 @@
#define IO_THRESHOLD 1
#define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head))
#define HASH_MASK (NR_HASH - 1)
+#define STRIPE_QUEUE_SIZE 1 /* multiple of nr_stripes */
#define stripe_hash(conf, sect) (&((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK]))
@@ -78,6 +79,8 @@
* of the current stripe+device
*/
#define r5_next_bio(bio, sect) ( ( (bio)->bi_sector + ((bio)->bi_size>>9) < sect + STRIPE_SECTORS) ? (bio)->bi_next : NULL)
+#define r5_io_weight_size(devs) (sizeof(unsigned long) * \
+ (ALIGN(devs, BITS_PER_LONG) / BITS_PER_LONG))
/*
* The following can be used to debug the driver
*/
@@ -120,6 +123,21 @@ static void return_io(struct bio *return_bi)
}
}
+#if BITS_PER_LONG == 32
+#define hweight hweight32
+#else
+#define hweight hweight64
+#endif
+static unsigned long io_weight(unsigned long *bitmap, int disks)
+{
+ unsigned long weight = hweight(*bitmap);
+
+ for (bitmap++; disks > BITS_PER_LONG; disks -= BITS_PER_LONG, bitmap++)
+ weight += hweight(*bitmap);
+
+ return weight;
+}
+
static void print_raid5_conf (raid5_conf_t *conf);
static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
@@ -236,36 +254,37 @@ static int grow_buffers(struct stripe_head *sh, int num)
static void raid5_build_block (struct stripe_head *sh, int i);
-static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks)
+static void init_queue(struct stripe_queue *sq, sector_t sector,
+ int disks, int pd_idx);
+
+static void
+init_stripe(struct stripe_head *sh, struct stripe_queue *sq,
+ sector_t sector, int pd_idx, int disks)
{
- raid5_conf_t *conf = sh->sq->raid_conf;
+ raid5_conf_t *conf = sq->raid_conf;
int i;
+ pr_debug("init_stripe called, stripe %llu\n",
+ (unsigned long long)sector);
+
BUG_ON(atomic_read(&sh->count) != 0);
BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete);
+ init_queue(sh->sq, sector, disks, pd_idx);
CHECK_DEVLOCK();
- pr_debug("init_stripe called, stripe %llu\n",
- (unsigned long long)sh->sector);
remove_hash(sh);
sh->sector = sector;
- sh->sq->pd_idx = pd_idx;
sh->state = 0;
- sh->sq->disks = disks;
-
for (i = disks; i--;) {
struct r5dev *dev = &sh->dev[i];
- struct r5_queue_dev *dev_q = &sh->sq->dev[i];
- if (dev_q->toread || dev_q->read || dev_q->towrite ||
- dev_q->written || test_bit(R5_LOCKED, &dev->flags)) {
- printk(KERN_ERR "sector=%llx i=%d %p %p %p %p %d\n",
- (unsigned long long)sh->sector, i, dev_q->toread,
- dev_q->read, dev_q->towrite, dev_q->written,
+ if (test_bit(R5_LOCKED, &dev->flags)) {
+ printk(KERN_ERR "sector=%llx i=%d %d\n",
+ (unsigned long long)sector, i,
test_bit(R5_LOCKED, &dev->flags));
BUG();
}
@@ -283,7 +302,7 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, in
CHECK_DEVLOCK();
pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector);
hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
- if (sh->sector == sector && sh->sq->disks == disks)
+ if (sh->sector == sector && disks == disks)
return sh;
pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector);
return NULL;
@@ -326,7 +345,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
);
conf->inactive_blocked = 0;
} else
- init_stripe(sh, sector, pd_idx, disks);
+ init_stripe(sh, sh->sq, sector, pd_idx, disks);
} else {
if (atomic_read(&sh->count)) {
BUG_ON(!list_empty(&sh->lru));
@@ -348,6 +367,39 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
return sh;
}
+static void init_queue(struct stripe_queue *sq, sector_t sector,
+ int disks, int pd_idx)
+{
+ raid5_conf_t *conf = sq->raid_conf;
+ int i;
+
+ pr_debug("%s: %llu -> %llu [%p]\n",
+ __FUNCTION__, (unsigned long long) sq->sector,
+ (unsigned long long) sector, sq);
+
+ BUG_ON(io_weight(sq->to_read, disks));
+ BUG_ON(io_weight(sq->to_write, disks));
+ BUG_ON(io_weight(sq->overwrite, disks));
+
+ sq->sector = sector;
+ sq->pd_idx = pd_idx;
+ sq->disks = disks;
+
+ for (i = disks; i--;) {
+ struct r5_queue_dev *dev_q = &sq->dev[i];
+
+ if (dev_q->toread || dev_q->read || dev_q->towrite ||
+ dev_q->written) {
+ printk(KERN_ERR "sector=%llx i=%d %p %p %p %p\n",
+ (unsigned long long)sq->sector, i, dev_q->toread,
+ dev_q->read, dev_q->towrite, dev_q->written);
+ BUG();
+ }
+ dev_q->sector = compute_blocknr(conf, disks, sector, pd_idx, i);
+ }
+}
+
+
/* test_and_ack_op() ensures that we only dequeue an operation once */
#define test_and_ack_op(op, pend) \
do { \
@@ -570,21 +622,23 @@ static void ops_complete_biofill(void *stripe_head_ref)
static void ops_run_biofill(struct stripe_head *sh)
{
struct dma_async_tx_descriptor *tx = NULL;
- raid5_conf_t *conf = sh->sq->raid_conf;
+ struct stripe_queue *sq = sh->sq;
+ raid5_conf_t *conf = sq->raid_conf;
int i;
pr_debug("%s: stripe %llu\n", __FUNCTION__,
(unsigned long long)sh->sector);
- for (i = sh->sq->disks; i--;) {
+ for (i = sq->disks; i--;) {
struct r5dev *dev = &sh->dev[i];
- struct r5_queue_dev *dev_q = &sh->sq->dev[i];
+ struct r5_queue_dev *dev_q = &sq->dev[i];
if (test_bit(R5_Wantfill, &dev->flags)) {
struct bio *rbi;
spin_lock_irq(&conf->device_lock);
dev_q->read = rbi = dev_q->toread;
dev_q->toread = NULL;
+ clear_bit(i, sq->to_read);
spin_unlock_irq(&conf->device_lock);
while (rbi && rbi->bi_sector <
dev_q->sector + STRIPE_SECTORS) {
@@ -669,9 +723,9 @@ static struct dma_async_tx_descriptor *
ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
{
/* kernel stack size limits the total number of disks */
- int disks = sh->sq->disks;
- struct page *xor_srcs[disks];
struct stripe_queue *sq = sh->sq;
+ int disks = sq->disks;
+ struct page *xor_srcs[disks];
int count = 0, pd_idx = sq->pd_idx, i;
/* existing parity data subtracted */
@@ -698,9 +752,10 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
static struct dma_async_tx_descriptor *
ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
{
- int disks = sh->sq->disks;
struct stripe_queue *sq = sh->sq;
- int pd_idx = sq->pd_idx, i;
+ int disks = sq->disks;
+ int pd_idx = sq->pd_idx;
+ int i;
/* check if prexor is active which means only process blocks
* that are part of a read-modify-write (Wantprexor)
@@ -733,6 +788,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
spin_lock(&sq->lock);
chosen = dev_q->towrite;
dev_q->towrite = NULL;
+ clear_bit(i, sq->to_write);
BUG_ON(dev_q->written);
wbi = dev_q->written = chosen;
spin_unlock(&sq->lock);
@@ -793,7 +849,9 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
int disks = sq->disks;
struct page *xor_srcs[disks];
- int count = 0, pd_idx = sh->sq->pd_idx, i;
+ int count = 0;
+ int pd_idx = sq->pd_idx;
+ int i;
struct page *xor_dest;
int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
unsigned long flags;
@@ -866,11 +924,14 @@ static void ops_complete_check(void *stripe_head_ref)
static void ops_run_check(struct stripe_head *sh)
{
/* kernel stack size limits the total number of disks */
- int disks = sh->sq->disks;
+ struct stripe_queue *sq = sh->sq;
+ int disks = sq->disks;
struct page *xor_srcs[disks];
struct dma_async_tx_descriptor *tx;
- int count = 0, pd_idx = sh->sq->pd_idx, i;
+ int count = 0;
+ int pd_idx = sq->pd_idx;
+ int i;
struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
pr_debug("%s: stripe %llu\n", __FUNCTION__,
@@ -897,7 +958,10 @@ static void ops_run_check(struct stripe_head *sh)
static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
{
- int overlap_clear = 0, i, disks = sh->sq->disks;
+ struct stripe_queue *sq = sh->sq;
+ int overlap_clear = 0;
+ int disks = sq->disks;
+ int i;
struct dma_async_tx_descriptor *tx = NULL;
if (test_bit(STRIPE_OP_BIOFILL, &pending)) {
@@ -926,43 +990,29 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
ops_run_io(sh);
if (overlap_clear) {
- for (i = disks; i--; ) {
- struct r5dev *dev = &sh->dev[i];
- if (test_and_clear_bit(R5_Overlap, &dev->flags))
- wake_up(&sh->sq->raid_conf->wait_for_overlap);
- }
+ for (i = disks; i--;)
+ if (test_and_clear_bit(i, sq->overlap))
+ wake_up(&sq->raid_conf->wait_for_overlap);
}
}
+static struct stripe_queue *grow_one_queue(raid5_conf_t *conf);
+
static int grow_one_stripe(raid5_conf_t *conf)
{
struct stripe_head *sh;
- struct stripe_queue *sq;
-
sh = kmem_cache_alloc(conf->sh_slab_cache, GFP_KERNEL);
if (!sh)
return 0;
-
- sq = kmem_cache_alloc(conf->sq_slab_cache, GFP_KERNEL);
- if (!sq) {
- kmem_cache_free(conf->sh_slab_cache, sh);
- return 0;
- }
-
memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));
- memset(sq, 0, sizeof(*sq) +
- (conf->raid_disks-1) * sizeof(struct r5_queue_dev));
- sh->sq = sq;
- sq->raid_conf = conf;
- spin_lock_init(&sq->lock);
+ sh->sq = grow_one_queue(conf);
if (grow_buffers(sh, conf->raid_disks)) {
shrink_buffers(sh, conf->raid_disks);
kmem_cache_free(conf->sh_slab_cache, sh);
- kmem_cache_free(conf->sq_slab_cache, sq);
return 0;
}
- sq->disks = conf->raid_disks;
+
/* we just created an active stripe so... */
atomic_set(&sh->count, 1);
atomic_inc(&conf->active_stripes);
@@ -973,6 +1023,37 @@ static int grow_one_stripe(raid5_conf_t *conf)
return 1;
}
+static struct stripe_queue *grow_one_queue(raid5_conf_t *conf)
+{
+ struct stripe_queue *sq;
+ int disks = conf->raid_disks;
+ void *weight_map;
+ sq = kmem_cache_alloc(conf->sq_slab_cache, GFP_KERNEL);
+ if (!sq)
+ return 0;
+ memset(sq, 0, (sizeof(*sq)+(disks-1) * sizeof(struct r5_queue_dev)) +
+ r5_io_weight_size(disks) + r5_io_weight_size(disks) +
+ r5_io_weight_size(disks) + r5_io_weight_size(disks));
+
+ /* set the queue weight bitmaps to the free space at the end of sq */
+ weight_map = ((void *) sq) + offsetof(typeof(*sq), dev) +
+ sizeof(struct r5_queue_dev) * disks;
+ sq->to_read = weight_map;
+ weight_map += r5_io_weight_size(disks);
+ sq->to_write = weight_map;
+ weight_map += r5_io_weight_size(disks);
+ sq->overwrite = weight_map;
+ weight_map += r5_io_weight_size(disks);
+ sq->overlap = weight_map;
+
+ spin_lock_init(&sq->lock);
+ sq->sector = MaxSector;
+ sq->raid_conf = conf;
+ sq->disks = disks;
+
+ return sq;
+}
+
static int grow_stripes(raid5_conf_t *conf, int num)
{
struct kmem_cache *sc;
@@ -993,9 +1074,12 @@ static int grow_stripes(raid5_conf_t *conf, int num)
conf->pool_size = devs;
sc = kmem_cache_create(conf->sq_cache_name[conf->active_name],
- sizeof(struct stripe_queue) +
- (devs-1)*sizeof(struct r5_queue_dev), 0, 0, NULL);
-
+ (sizeof(struct stripe_queue)+(devs-1) *
+ sizeof(struct r5_queue_dev)) +
+ r5_io_weight_size(devs) +
+ r5_io_weight_size(devs) +
+ r5_io_weight_size(devs) +
+ r5_io_weight_size(devs), 0, 0, NULL);
if (!sc)
return 1;
conf->sq_slab_cache = sc;
@@ -1003,6 +1087,7 @@ static int grow_stripes(raid5_conf_t *conf, int num)
while (num--)
if (!grow_one_stripe(conf))
return 1;
+
return 0;
}
@@ -1033,11 +1118,13 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
* so we use GFP_NOIO allocations.
*/
struct stripe_head *osh, *nsh;
+ struct stripe_queue *nsq;
LIST_HEAD(newstripes);
+ LIST_HEAD(newqueues);
struct disk_info *ndisks;
int err = 0;
struct kmem_cache *sc, *sc_q;
- int i;
+ int i, j;
if (newsize <= conf->pool_size)
return 0; /* never bother to shrink */
@@ -1051,45 +1138,88 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
if (!sc)
return -ENOMEM;
- sc_q = kmem_cache_create(conf->sh_cache_name[1-conf->active_name],
- sizeof(struct stripe_queue) +
- (newsize-1)*sizeof(struct r5_queue_dev), 0, 0, NULL);
+ sc_q = kmem_cache_create(conf->sq_cache_name[conf->active_name],
+ (sizeof(struct stripe_queue)+(newsize-1) *
+ sizeof(struct r5_queue_dev)) +
+ r5_io_weight_size(newsize) +
+ r5_io_weight_size(newsize) +
+ r5_io_weight_size(newsize) +
+ r5_io_weight_size(newsize),
+ 0, 0, NULL);
+
if (!sc_q) {
kmem_cache_destroy(sc);
return -ENOMEM;
}
for (i = conf->max_nr_stripes; i; i--) {
- struct stripe_queue *nsq;
+ struct stripe_queue *nsq_per_sh[STRIPE_QUEUE_SIZE];
nsh = kmem_cache_alloc(sc, GFP_KERNEL);
if (!nsh)
break;
- nsq = kmem_cache_alloc(sc_q, GFP_KERNEL);
- if (!nsq) {
+ /* allocate STRIPE_QUEUE_SIZE queues per stripe */
+ for (j = 0; j < ARRAY_SIZE(nsq_per_sh); j++)
+ nsq_per_sh[j] = kmem_cache_alloc(sc_q, GFP_KERNEL);
+
+ for (j = 0; j < ARRAY_SIZE(nsq_per_sh); j++)
+ if (!nsq_per_sh[j])
+ break;
+
+ if (j <= ARRAY_SIZE(nsq_per_sh)) {
kmem_cache_free(sc, nsh);
+ do
+ if (nsq_per_sh[j])
+ kmem_cache_free(sc_q, nsq_per_sh[j]);
+ while (--j >= 0);
break;
}
memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
- memset(nsq, 0, sizeof(*nsq) +
- (newsize-1)*sizeof(struct r5_queue_dev));
-
- nsq->raid_conf = conf;
- nsh->sq = nsq;
- spin_lock_init(&nsq->lock);
-
list_add(&nsh->lru, &newstripes);
+
+ for (j = 0; j < ARRAY_SIZE(nsq_per_sh); j++) {
+ void *weight_map;
+ nsq = nsq_per_sh[j];
+ memset(nsq, 0, (sizeof(*nsq)+(newsize-1) *
+ sizeof(struct r5_queue_dev)) +
+ r5_io_weight_size(newsize) +
+ r5_io_weight_size(newsize) +
+ r5_io_weight_size(newsize) +
+ r5_io_weight_size(newsize));
+ /* set the queue weight bitmaps to the free space at
+ * the end of nsq
+ */
+ weight_map = ((void *) nsq) +
+ offsetof(typeof(*nsq), dev) +
+ sizeof(struct r5_queue_dev) * newsize;
+ nsq->to_read = weight_map;
+ weight_map += r5_io_weight_size(newsize);
+ nsq->to_write = weight_map;
+ weight_map += r5_io_weight_size(newsize);
+ nsq->overwrite = weight_map;
+ weight_map += r5_io_weight_size(newsize);
+ nsq->overlap = weight_map;
+ nsq->raid_conf = conf;
+ spin_lock_init(&nsq->lock);
+ list_add(&nsq->list_node, &newqueues);
+ }
}
if (i) {
/* didn't get enough, give up */
while (!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru);
list_del(&nsh->lru);
- kmem_cache_free(sc_q, nsh->sq);
kmem_cache_free(sc, nsh);
}
+ while (!list_empty(&newqueues)) {
+ nsq = list_entry(newqueues.next,
+ struct stripe_queue,
+ list_node);
+ list_del(&nsh->lru);
+ kmem_cache_free(sc_q, nsq);
+ }
kmem_cache_destroy(sc_q);
kmem_cache_destroy(sc);
return -ENOMEM;
@@ -1133,8 +1263,11 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
err = -ENOMEM;
/* Step 4, return new stripes to service */
- while(!list_empty(&newstripes)) {
+ while (!list_empty(&newstripes)) {
+ nsq = list_entry(newqueues.next, struct stripe_queue,
+ list_node);
nsh = list_entry(newstripes.next, struct stripe_head, lru);
+ list_del_init(&nsq->list_node);
list_del_init(&nsh->lru);
for (i=conf->raid_disks; i < newsize; i++)
if (nsh->dev[i].page == NULL) {
@@ -1143,6 +1276,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
if (!p)
err = -ENOMEM;
}
+ nsh->sq = nsq;
release_stripe(nsh);
}
/* critical section pass, GFP_NOIO no longer needed */
@@ -1191,9 +1325,11 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
int error)
{
struct stripe_head *sh = bi->bi_private;
- raid5_conf_t *conf = sh->sq->raid_conf;
- int disks = sh->sq->disks, i;
+ struct stripe_queue *sq = sh->sq;
+ raid5_conf_t *conf = sq->raid_conf;
+ int disks = sq->disks;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
+ int i;
char b[BDEVNAME_SIZE];
mdk_rdev_t *rdev;
@@ -1271,8 +1407,9 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
struct stripe_head *sh = bi->bi_private;
struct stripe_queue *sq = sh->sq;
raid5_conf_t *conf = sq->raid_conf;
- int disks = sq->disks, i;
+ int disks = sq->disks;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
+ int i;
if (bi->bi_size)
return 1;
@@ -1303,7 +1440,6 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
static void raid5_build_block (struct stripe_head *sh, int i)
{
struct r5dev *dev = &sh->dev[i];
- struct r5_queue_dev *dev_q = &sh->sq->dev[i];
bio_init(&dev->req);
dev->req.bi_io_vec = &dev->vec;
@@ -1315,10 +1451,6 @@ static void raid5_build_block (struct stripe_head *sh, int i)
dev->req.bi_sector = sh->sector;
dev->req.bi_private = sh;
-
- dev->flags = 0;
- dev_q->sector = compute_blocknr(sh->sq->raid_conf, sh->sq->disks,
- sh->sector, sh->sq->pd_idx, i);
}
static void error(mddev_t *mddev, mdk_rdev_t *rdev)
@@ -1613,8 +1745,9 @@ static void compute_parity6(struct stripe_head *sh, int method)
if (i != pd_idx && i != qd_idx && sq->dev[i].towrite) {
chosen = sq->dev[i].towrite;
sq->dev[i].towrite = NULL;
+ clear_bit(i, sq->to_write);
- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+ if (test_and_clear_bit(i, sq->overlap))
wake_up(&conf->wait_for_overlap);
BUG_ON(sq->dev[i].written);
@@ -1714,8 +1847,9 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
/* Compute two missing blocks */
static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
{
- int i, count, disks = sh->sq->disks;
- int pd_idx = sh->sq->pd_idx;
+ struct stripe_queue *sq = sh->sq;
+ int i, count, disks = sq->disks;
+ int pd_idx = sq->pd_idx;
int qd_idx = raid6_next_disk(pd_idx, disks);
int d0_idx = raid6_next_disk(qd_idx, disks);
int faila, failb;
@@ -1917,10 +2051,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
if (sector >= sq->dev[dd_idx].sector + STRIPE_SECTORS)
set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
}
+
return 1;
overlap:
- set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
+ set_bit(dd_idx, sq->overlap);
spin_unlock_irq(&conf->device_lock);
spin_unlock(&sq->lock);
return 0;
@@ -1973,12 +2108,13 @@ handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh,
/* fail all writes first */
bi = sq->dev[i].towrite;
sq->dev[i].towrite = NULL;
+ clear_bit(i, sq->to_write);
if (bi) {
s->to_write--;
bitmap_end = 1;
}
- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+ if (test_and_clear_bit(i, sq->overlap))
wake_up(&conf->wait_for_overlap);
while (bi && bi->bi_sector <
@@ -2016,7 +2152,8 @@ handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh,
test_bit(R5_ReadError, &sh->dev[i].flags))) {
bi = sq->dev[i].toread;
sq->dev[i].toread = NULL;
- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+ clear_bit(i, sq->to_read);
+ if (test_and_clear_bit(i, sq->overlap))
wake_up(&conf->wait_for_overlap);
if (bi) s->to_read--;
while (bi && bi->bi_sector <
@@ -2718,7 +2855,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
static void handle_stripe5(struct stripe_head *sh)
{
struct stripe_queue *sq = sh->sq;
- raid5_conf_t *conf = sh->sq->raid_conf;
+ raid5_conf_t *conf = sq->raid_conf;
int disks = sq->disks, i;
struct bio *return_bi = NULL;
struct stripe_head_state s;
@@ -2746,6 +2883,8 @@ static void handle_stripe5(struct stripe_head *sh)
struct r5dev *dev = &sh->dev[i];
struct r5_queue_dev *dev_q = &sq->dev[i];
clear_bit(R5_Insync, &dev->flags);
+ if (test_and_clear_bit(i, sq->overwrite))
+ set_bit(R5_OVERWRITE, &dev->flags);
pr_debug("check %d: state 0x%lx toread %p read %p write %p "
"written %p\n", i, dev->flags, dev_q->toread,
@@ -3024,6 +3163,8 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
dev = &sh->dev[i];
clear_bit(R5_Insync, &dev->flags);
+ if (test_and_clear_bit(i, sq->overwrite))
+ set_bit(R5_OVERWRITE, &dev->flags);
pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
i, dev->flags, dev_q->toread, dev_q->towrite,
@@ -3035,7 +3176,8 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
spin_lock_irq(&conf->device_lock);
rbi = dev_q->toread;
dev_q->toread = NULL;
- if (test_and_clear_bit(R5_Overlap, &dev->flags))
+ clear_bit(i, sq->to_read);
+ if (test_and_clear_bit(i, sq->overlap))
wake_up(&conf->wait_for_overlap);
spin_unlock_irq(&conf->device_lock);
while (rbi && rbi->bi_sector <
@@ -3735,6 +3877,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
*/
raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
struct stripe_head *sh;
+ struct stripe_queue *sq;
int pd_idx;
sector_t first_sector, last_sector;
int raid_disks = conf->previous_raid_disks;
@@ -3790,21 +3933,22 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
pd_idx = stripe_to_pdidx(sector_nr+i, conf, conf->raid_disks);
sh = get_active_stripe(conf, sector_nr+i,
conf->raid_disks, pd_idx, 0);
+ sq = sh->sq;
set_bit(STRIPE_EXPANDING, &sh->state);
atomic_inc(&conf->reshape_stripes);
/* If any of this stripe is beyond the end of the old
* array, then we need to zero those blocks
*/
- for (j = sh->sq->disks; j--;) {
+ for (j = sq->disks; j--;) {
sector_t s;
int pd_idx = sh->sq->pd_idx;
if (j == pd_idx)
continue;
if (conf->level == 6 &&
- j == raid6_next_disk(pd_idx, sh->sq->disks))
+ j == raid6_next_disk(pd_idx, sq->disks))
continue;
- s = compute_blocknr(conf, sh->sq->disks, sh->sector,
+ s = compute_blocknr(conf, sq->disks, sh->sector,
pd_idx, j);
if (s < (mddev->array_size<<1)) {
skipped = 1;
@@ -3950,7 +4094,6 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
* it will be only one 'dd_idx' and only need one call to raid5_compute_sector.
*/
struct stripe_head *sh;
- struct stripe_queue *sq;
int dd_idx, pd_idx;
sector_t sector, logical_sector, last_sector;
int scnt = 0;
@@ -3984,7 +4127,6 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
return handled;
}
- sq = sh->sq;
set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
release_stripe(sh);
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 857e2bf..fbe622c 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -207,8 +207,18 @@ struct r6_state {
struct stripe_queue {
sector_t sector;
+ /* stripe queues are allocated with extra space to hold the following
+ * four bitmaps. One bit for each block in the stripe_head. These
+ * bitmaps enable use of hweight to count the number of blocks
+ * undergoing read, write, overwrite.
+ */
+ unsigned long *to_read;
+ unsigned long *to_write;
+ unsigned long *overwrite;
+ unsigned long *overlap; /* There is a pending overlapping request */
spinlock_t lock; /* protect bio lists and stripe_head state */
struct raid5_private_data *raid_conf;
+ struct list_head list_node;
int pd_idx; /* parity disk index */
int disks; /* disks in stripe */
struct r5_queue_dev {
@@ -225,7 +235,6 @@ struct stripe_queue {
#define R5_Insync 3 /* rdev && rdev->in_sync at start */
#define R5_Wantread 4 /* want to schedule a read */
#define R5_Wantwrite 5
-#define R5_Overlap 7 /* There is a pending overlapping request on this block */
#define R5_ReadError 8 /* seen a read error here recently */
#define R5_ReWrite 9 /* have tried to over-write the readerror */
next prev parent reply other threads:[~2007-10-06 17:06 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-10-06 17:06 [PATCH -mm 0/4] raid5: stripe_queue (+20% to +90% write performance) Dan Williams
2007-10-06 17:06 ` [PATCH -mm 1/4] raid5: add the stripe_queue object for tracking raid io requests (rev3) Dan Williams
2007-10-06 17:06 ` Dan Williams [this message]
2007-10-06 17:06 ` [PATCH -mm 3/4] raid5: convert add_stripe_bio to add_queue_bio Dan Williams
2007-10-06 17:06 ` [PATCH -mm 4/4] raid5: use stripe_queues to prioritize the "most deserving" requests (rev7) Dan Williams
2007-10-06 18:34 ` [PATCH -mm 0/4] raid5: stripe_queue (+20% to +90% write performance) Justin Piszcz
2007-10-07 17:30 ` Dan Williams
2007-10-08 0:47 ` Neil Brown
2007-10-09 6:21 ` Neil Brown
2007-10-09 22:56 ` Dan Williams
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20071006170649.23741.93602.stgit@dwillia2-linux.ch.intel.com \
--to=dan.j.williams@intel.com \
--cc=akpm@linux-foundation.org \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).