* [Patch 1/10] dm: Export dm_vcalloc()
2004-02-10 16:35 dm core patches Joe Thornber
@ 2004-02-10 16:57 ` Joe Thornber
2004-02-10 16:59 ` [Patch 2/10] dm: Lift to_bytes() and to_sectors() into dm.h Joe Thornber
` (9 subsequent siblings)
10 siblings, 0 replies; 24+ messages in thread
From: Joe Thornber @ 2004-02-10 16:57 UTC (permalink / raw)
To: Joe Thornber; +Cc: Linux Mailing List, Andrew Morton
Export dm_vcalloc()
--- diff/drivers/md/dm-table.c 2004-01-19 10:22:56.000000000 +0000
+++ source/drivers/md/dm-table.c 2004-02-10 16:11:17.000000000 +0000
@@ -149,7 +149,7 @@
return 0;
}
-static void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
+void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
{
unsigned long size;
void *addr;
@@ -858,6 +858,7 @@
}
+EXPORT_SYMBOL(dm_vcalloc);
EXPORT_SYMBOL(dm_get_device);
EXPORT_SYMBOL(dm_put_device);
EXPORT_SYMBOL(dm_table_event);
--- diff/drivers/md/dm.h 2004-01-19 10:22:56.000000000 +0000
+++ source/drivers/md/dm.h 2004-02-10 16:11:17.000000000 +0000
@@ -167,4 +167,6 @@
int dm_stripe_init(void);
void dm_stripe_exit(void);
+void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
+
#endif
^ permalink raw reply [flat|nested] 24+ messages in thread* [Patch 2/10] dm: Lift to_bytes() and to_sectors() into dm.h
2004-02-10 16:35 dm core patches Joe Thornber
2004-02-10 16:57 ` [Patch 1/10] dm: Export dm_vcalloc() Joe Thornber
@ 2004-02-10 16:59 ` Joe Thornber
2004-02-10 16:59 ` [Patch 3/10] dm: Get rid of struct dm_deferred_io in dm.c Joe Thornber
` (8 subsequent siblings)
10 siblings, 0 replies; 24+ messages in thread
From: Joe Thornber @ 2004-02-10 16:59 UTC (permalink / raw)
To: Joe Thornber; +Cc: Linux Mailing List, Andrew Morton
Lift to_bytes() and to_sectors() into dm.h
--- diff/drivers/md/dm.c 2004-01-19 10:22:56.000000000 +0000
+++ source/drivers/md/dm.c 2004-02-10 16:11:24.000000000 +0000
@@ -233,15 +233,6 @@
* interests of getting something for people to use I give
* you this clearly demarcated crap.
*---------------------------------------------------------------*/
-static inline sector_t to_sector(unsigned int bytes)
-{
- return bytes >> SECTOR_SHIFT;
-}
-
-static inline unsigned int to_bytes(sector_t sector)
-{
- return sector << SECTOR_SHIFT;
-}
/*
* Decrements the number of outstanding ios that a bio has been
--- diff/drivers/md/dm.h 2004-02-10 16:11:17.000000000 +0000
+++ source/drivers/md/dm.h 2004-02-10 16:11:24.000000000 +0000
@@ -151,6 +151,16 @@
return dm_round_up(n, size) / size;
}
+static inline sector_t to_sector(unsigned long n)
+{
+ return (n >> 9);
+}
+
+static inline unsigned long to_bytes(sector_t n)
+{
+ return (n << 9);
+}
+
/*
* The device-mapper can be driven through one of two interfaces;
* ioctl or filesystem, depending which patch you have applied.
^ permalink raw reply [flat|nested] 24+ messages in thread* [Patch 3/10] dm: Get rid of struct dm_deferred_io in dm.c
2004-02-10 16:35 dm core patches Joe Thornber
2004-02-10 16:57 ` [Patch 1/10] dm: Export dm_vcalloc() Joe Thornber
2004-02-10 16:59 ` [Patch 2/10] dm: Lift to_bytes() and to_sectors() into dm.h Joe Thornber
@ 2004-02-10 16:59 ` Joe Thornber
2004-02-10 17:33 ` Christophe Saout
2004-02-10 17:00 ` [Patch 4/10] dm: Maintain ordering when deferring bios Joe Thornber
` (7 subsequent siblings)
10 siblings, 1 reply; 24+ messages in thread
From: Joe Thornber @ 2004-02-10 16:59 UTC (permalink / raw)
To: Joe Thornber; +Cc: Linux Mailing List, Andrew Morton
Get rid of struct dm_deferred_io in dm.c. [Chripstophe Saout]
--- diff/drivers/md/dm.c 2004-02-10 16:11:24.000000000 +0000
+++ source/drivers/md/dm.c 2004-02-10 16:11:30.000000000 +0000
@@ -27,11 +27,6 @@
atomic_t io_count;
};
-struct deferred_io {
- struct bio *bio;
- struct deferred_io *next;
-};
-
/*
* Bits for the md->flags field.
*/
@@ -52,7 +47,7 @@
*/
atomic_t pending;
wait_queue_head_t wait;
- struct deferred_io *deferred;
+ struct bio *deferred;
/*
* The current mapping.
@@ -188,38 +183,20 @@
mempool_free(io, md->io_pool);
}
-static inline struct deferred_io *alloc_deferred(void)
-{
- return kmalloc(sizeof(struct deferred_io), GFP_NOIO);
-}
-
-static inline void free_deferred(struct deferred_io *di)
-{
- kfree(di);
-}
-
/*
* Add the bio to the list of deferred io.
*/
static int queue_io(struct mapped_device *md, struct bio *bio)
{
- struct deferred_io *di;
-
- di = alloc_deferred();
- if (!di)
- return -ENOMEM;
-
down_write(&md->lock);
if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
up_write(&md->lock);
- free_deferred(di);
return 1;
}
- di->bio = bio;
- di->next = md->deferred;
- md->deferred = di;
+ bio->bi_next = md->deferred;
+ md->deferred = bio;
up_write(&md->lock);
return 0; /* deferred successfully */
@@ -743,14 +720,14 @@
/*
* Requeue the deferred bios by calling generic_make_request.
*/
-static void flush_deferred_io(struct deferred_io *c)
+static void flush_deferred_io(struct bio *c)
{
- struct deferred_io *n;
+ struct bio *n;
while (c) {
- n = c->next;
- generic_make_request(c->bio);
- free_deferred(c);
+ n = c->bi_next;
+ c->bi_next = NULL;
+ generic_make_request(c);
c = n;
}
}
@@ -832,7 +809,7 @@
int dm_resume(struct mapped_device *md)
{
- struct deferred_io *def;
+ struct bio *def;
down_write(&md->lock);
if (!md->map ||
^ permalink raw reply [flat|nested] 24+ messages in thread* [Patch 4/10] dm: Maintain ordering when deferring bios
2004-02-10 16:35 dm core patches Joe Thornber
` (2 preceding siblings ...)
2004-02-10 16:59 ` [Patch 3/10] dm: Get rid of struct dm_deferred_io in dm.c Joe Thornber
@ 2004-02-10 17:00 ` Joe Thornber
2004-02-10 17:00 ` [Patch 5/10] dm: Tidy up the error path for alloc_dev() Joe Thornber
` (6 subsequent siblings)
10 siblings, 0 replies; 24+ messages in thread
From: Joe Thornber @ 2004-02-10 17:00 UTC (permalink / raw)
To: Joe Thornber; +Cc: Linux Mailing List, Andrew Morton
Make sure that we maintain ordering when deferring bios.
--- diff/drivers/md/dm.c 2004-02-10 16:11:30.000000000 +0000
+++ source/drivers/md/dm.c 2004-02-10 16:11:37.000000000 +0000
@@ -5,6 +5,7 @@
*/
#include "dm.h"
+#include "dm-bio-list.h"
#include <linux/init.h>
#include <linux/module.h>
@@ -47,7 +48,7 @@
*/
atomic_t pending;
wait_queue_head_t wait;
- struct bio *deferred;
+ struct bio_list deferred;
/*
* The current mapping.
@@ -195,8 +196,7 @@
return 1;
}
- bio->bi_next = md->deferred;
- md->deferred = bio;
+ bio_list_add(&md->deferred, bio);
up_write(&md->lock);
return 0; /* deferred successfully */
@@ -822,8 +822,7 @@
dm_table_resume_targets(md->map);
clear_bit(DMF_SUSPENDED, &md->flags);
clear_bit(DMF_BLOCK_IO, &md->flags);
- def = md->deferred;
- md->deferred = NULL;
+ def = bio_list_get(&md->deferred);
up_write(&md->lock);
flush_deferred_io(def);
--- diff/drivers/md/dm-bio-list.h 1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-bio-list.h 2004-02-10 16:11:37.000000000 +0000
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2004 Red Hat UK Ltd.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_BIO_LIST_H
+#define DM_BIO_LIST_H
+
+#include <linux/bio.h>
+
+struct bio_list {
+ struct bio *head;
+ struct bio *tail;
+};
+
+static inline void bio_list_init(struct bio_list *bl)
+{
+ bl->head = bl->tail = NULL;
+}
+
+static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
+{
+ bio->bi_next = NULL;
+
+ if (bl->tail)
+ bl->tail->bi_next = bio;
+ else
+ bl->head = bio;
+
+ bl->tail = bio;
+}
+
+static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
+{
+ if (bl->tail)
+ bl->tail->bi_next = bl2->head;
+ else
+ bl->head = bl2->head;
+
+ bl->tail = bl2->tail;
+}
+
+static inline struct bio *bio_list_pop(struct bio_list *bl)
+{
+ struct bio *bio = bl->head;
+
+ if (bio) {
+ bl->head = bl->head->bi_next;
+ if (!bl->head)
+ bl->tail = NULL;
+
+ bio->bi_next = NULL;
+ }
+
+ return bio;
+}
+
+static inline struct bio *bio_list_get(struct bio_list *bl)
+{
+ struct bio *bio = bl->head;
+
+ bl->head = bl->tail = NULL;
+
+ return bio;
+}
+
+#endif
^ permalink raw reply [flat|nested] 24+ messages in thread* [Patch 5/10] dm: Tidy up the error path for alloc_dev()
2004-02-10 16:35 dm core patches Joe Thornber
` (3 preceding siblings ...)
2004-02-10 17:00 ` [Patch 4/10] dm: Maintain ordering when deferring bios Joe Thornber
@ 2004-02-10 17:00 ` Joe Thornber
2004-02-10 17:01 ` [Patch 6/10] dm: block size bug with 64 bit devs Joe Thornber
` (5 subsequent siblings)
10 siblings, 0 replies; 24+ messages in thread
From: Joe Thornber @ 2004-02-10 17:00 UTC (permalink / raw)
To: Joe Thornber; +Cc: Linux Mailing List, Andrew Morton
Tidy up the error path for alloc_dev()
--- diff/drivers/md/dm.c 2004-02-10 16:11:37.000000000 +0000
+++ source/drivers/md/dm.c 2004-02-10 16:11:43.000000000 +0000
@@ -560,41 +560,28 @@
/* get a minor number for the dev */
r = persistent ? specific_minor(minor) : next_free_minor(&minor);
- if (r < 0) {
- kfree(md);
- return NULL;
- }
+ if (r < 0)
+ goto bad1;
memset(md, 0, sizeof(*md));
init_rwsem(&md->lock);
atomic_set(&md->holders, 1);
md->queue = blk_alloc_queue(GFP_KERNEL);
- if (!md->queue) {
- kfree(md);
- return NULL;
- }
+ if (!md->queue)
+ goto bad1;
md->queue->queuedata = md;
blk_queue_make_request(md->queue, dm_request);
md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
mempool_free_slab, _io_cache);
- if (!md->io_pool) {
- free_minor(minor);
- blk_put_queue(md->queue);
- kfree(md);
- return NULL;
- }
+ if (!md->io_pool)
+ goto bad2;
md->disk = alloc_disk(1);
- if (!md->disk) {
- mempool_destroy(md->io_pool);
- free_minor(minor);
- blk_put_queue(md->queue);
- kfree(md);
- return NULL;
- }
+ if (!md->disk)
+ goto bad3;
md->disk->major = _major;
md->disk->first_minor = minor;
@@ -609,6 +596,16 @@
init_waitqueue_head(&md->eventq);
return md;
+
+
+ bad3:
+ mempool_destroy(md->io_pool);
+ bad2:
+ blk_put_queue(md->queue);
+ free_minor(minor);
+ bad1:
+ kfree(md);
+ return NULL;
}
static void free_dev(struct mapped_device *md)
^ permalink raw reply [flat|nested] 24+ messages in thread* [Patch 6/10] dm: block size bug with 64 bit devs
2004-02-10 16:35 dm core patches Joe Thornber
` (4 preceding siblings ...)
2004-02-10 17:00 ` [Patch 5/10] dm: Tidy up the error path for alloc_dev() Joe Thornber
@ 2004-02-10 17:01 ` Joe Thornber
2004-02-10 17:01 ` [Patch 7/10] dm: Correct GFP flag in dm_table_create() Joe Thornber
` (4 subsequent siblings)
10 siblings, 0 replies; 24+ messages in thread
From: Joe Thornber @ 2004-02-10 17:01 UTC (permalink / raw)
To: Joe Thornber; +Cc: Linux Mailing List, Andrew Morton
With 32 bit sector_t the block device size _in bytes_ is also cut to
32 bit in __set_size when the block device is mount (a filesystem
mounted). The argument should be cast to loff_t before expanding the
sector count to a byte count and calling i_size_write.
[Christophe Saout]
--- diff/drivers/md/dm.c 2004-02-10 16:11:43.000000000 +0000
+++ source/drivers/md/dm.c 2004-02-10 16:11:50.000000000 +0000
@@ -639,7 +639,7 @@
bdev = bdget_disk(disk, 0);
if (bdev) {
down(&bdev->bd_inode->i_sem);
- i_size_write(bdev->bd_inode, size << SECTOR_SHIFT);
+ i_size_write(bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
up(&bdev->bd_inode->i_sem);
bdput(bdev);
}
^ permalink raw reply [flat|nested] 24+ messages in thread* [Patch 7/10] dm: Correct GFP flag in dm_table_create()
2004-02-10 16:35 dm core patches Joe Thornber
` (5 preceding siblings ...)
2004-02-10 17:01 ` [Patch 6/10] dm: block size bug with 64 bit devs Joe Thornber
@ 2004-02-10 17:01 ` Joe Thornber
2004-02-10 17:02 ` [Patch 8/10] dm: Zero size target sanity check Joe Thornber
` (3 subsequent siblings)
10 siblings, 0 replies; 24+ messages in thread
From: Joe Thornber @ 2004-02-10 17:01 UTC (permalink / raw)
To: Joe Thornber; +Cc: Linux Mailing List, Andrew Morton
For some reason dm_table_create() was allocating GFP_NOIO rather than
GFP_KERNEL.
--- diff/drivers/md/dm-table.c 2004-02-10 16:11:17.000000000 +0000
+++ source/drivers/md/dm-table.c 2004-02-10 16:11:58.000000000 +0000
@@ -205,7 +205,7 @@
int dm_table_create(struct dm_table **result, int mode, unsigned num_targets)
{
- struct dm_table *t = kmalloc(sizeof(*t), GFP_NOIO);
+ struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
if (!t)
return -ENOMEM;
^ permalink raw reply [flat|nested] 24+ messages in thread* [Patch 8/10] dm: Zero size target sanity check
2004-02-10 16:35 dm core patches Joe Thornber
` (6 preceding siblings ...)
2004-02-10 17:01 ` [Patch 7/10] dm: Correct GFP flag in dm_table_create() Joe Thornber
@ 2004-02-10 17:02 ` Joe Thornber
2004-02-10 17:02 ` [Patch 9/10] dm: Remove redundant spin lock in dec_pending() Joe Thornber
` (2 subsequent siblings)
10 siblings, 0 replies; 24+ messages in thread
From: Joe Thornber @ 2004-02-10 17:02 UTC (permalink / raw)
To: Joe Thornber; +Cc: Linux Mailing List, Andrew Morton
Add sanity check to dm_table_add_target() against zero length targets. [Christophe Saout]
--- diff/drivers/md/dm-table.c 2004-02-10 16:11:58.000000000 +0000
+++ source/drivers/md/dm-table.c 2004-02-10 16:12:04.000000000 +0000
@@ -655,6 +655,11 @@
memset(tgt, 0, sizeof(*tgt));
set_default_limits(&tgt->limits);
+ if (!len) {
+ tgt->error = "zero-length target";
+ return -EINVAL;
+ }
+
tgt->type = dm_get_target_type(type);
if (!tgt->type) {
tgt->error = "unknown target type";
^ permalink raw reply [flat|nested] 24+ messages in thread* [Patch 9/10] dm: Remove redundant spin lock in dec_pending()
2004-02-10 16:35 dm core patches Joe Thornber
` (7 preceding siblings ...)
2004-02-10 17:02 ` [Patch 8/10] dm: Zero size target sanity check Joe Thornber
@ 2004-02-10 17:02 ` Joe Thornber
2004-02-10 17:03 ` [Patch 10/10] dm: drop BIO_SEG_VALID bit Joe Thornber
2004-02-11 10:16 ` dm core patches Lars Marowsky-Bree
10 siblings, 0 replies; 24+ messages in thread
From: Joe Thornber @ 2004-02-10 17:02 UTC (permalink / raw)
To: Joe Thornber; +Cc: Linux Mailing List, Andrew Morton
Remove redundant spin lock in dec_pending()
--- diff/drivers/md/dm.c 2004-02-10 16:11:50.000000000 +0000
+++ source/drivers/md/dm.c 2004-02-10 16:12:10.000000000 +0000
@@ -217,14 +217,8 @@
*/
static inline void dec_pending(struct dm_io *io, int error)
{
- static spinlock_t _uptodate_lock = SPIN_LOCK_UNLOCKED;
- unsigned long flags;
-
- if (error) {
- spin_lock_irqsave(&_uptodate_lock, flags);
+ if (error)
io->error = error;
- spin_unlock_irqrestore(&_uptodate_lock, flags);
- }
if (atomic_dec_and_test(&io->io_count)) {
if (atomic_dec_and_test(&io->md->pending))
^ permalink raw reply [flat|nested] 24+ messages in thread* [Patch 10/10] dm: drop BIO_SEG_VALID bit
2004-02-10 16:35 dm core patches Joe Thornber
` (8 preceding siblings ...)
2004-02-10 17:02 ` [Patch 9/10] dm: Remove redundant spin lock in dec_pending() Joe Thornber
@ 2004-02-10 17:03 ` Joe Thornber
2004-02-11 10:16 ` dm core patches Lars Marowsky-Bree
10 siblings, 0 replies; 24+ messages in thread
From: Joe Thornber @ 2004-02-10 17:03 UTC (permalink / raw)
To: Joe Thornber; +Cc: Linux Mailing List, Andrew Morton
I just noticed that bio_clone copies the BIO_SEG_VALID bit from the
original bio when it was set. When we modify bi_idx or bi_vcnt
afterwards the segment counts are invalid and the bit must be dropped
(though it is fairly unlikely that it has already been set).
[Christophe Saout]
--- diff/drivers/md/dm.c 2004-02-10 16:12:10.000000000 +0000
+++ source/drivers/md/dm.c 2004-02-10 16:12:17.000000000 +0000
@@ -338,6 +338,7 @@
clone->bi_idx = idx;
clone->bi_vcnt = idx + bv_count;
clone->bi_size = to_bytes(len);
+ clone->bi_flags &= ~(1 << BIO_SEG_VALID);
return clone;
}
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: dm core patches
2004-02-10 16:35 dm core patches Joe Thornber
` (9 preceding siblings ...)
2004-02-10 17:03 ` [Patch 10/10] dm: drop BIO_SEG_VALID bit Joe Thornber
@ 2004-02-11 10:16 ` Lars Marowsky-Bree
2004-02-11 10:35 ` Joe Thornber
10 siblings, 1 reply; 24+ messages in thread
From: Lars Marowsky-Bree @ 2004-02-11 10:16 UTC (permalink / raw)
To: Joe Thornber, Linux Mailing List
On 2004-02-10T16:35:48,
Joe Thornber <thornber@redhat.com> said:
> Hi,
>
> Here's the latest set of patches to core dm. Please apply.
Hi Joe,
when will you be submitting the DM multipath personality?
Sincerely,
Lars Marowsky-Brée <lmb@suse.de>
--
High Availability & Clustering \ ever tried. ever failed. no matter.
SUSE Labs | try again. fail again. fail better.
Research & Development, SUSE LINUX AG \ -- Samuel Beckett
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: dm core patches
2004-02-11 10:16 ` dm core patches Lars Marowsky-Bree
@ 2004-02-11 10:35 ` Joe Thornber
2004-02-12 18:51 ` Lars Marowsky-Bree
0 siblings, 1 reply; 24+ messages in thread
From: Joe Thornber @ 2004-02-11 10:35 UTC (permalink / raw)
To: Lars Marowsky-Bree; +Cc: Joe Thornber, Linux Mailing List
On Wed, Feb 11, 2004 at 11:16:59AM +0100, Lars Marowsky-Bree wrote:
> when will you be submitting the DM multipath personality?
Not for a bit, it's still changing too much as I find more out about
the hardware (see the dm-devel@redhat.com list).
- Joe
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: dm core patches
2004-02-11 10:35 ` Joe Thornber
@ 2004-02-12 18:51 ` Lars Marowsky-Bree
2004-02-12 20:13 ` Joe Thornber
0 siblings, 1 reply; 24+ messages in thread
From: Lars Marowsky-Bree @ 2004-02-12 18:51 UTC (permalink / raw)
To: Joe Thornber; +Cc: Linux Mailing List
On 2004-02-11T10:35:41,
Joe Thornber <thornber@redhat.com> said:
> > when will you be submitting the DM multipath personality?
> Not for a bit, it's still changing too much as I find more out about
> the hardware (see the dm-devel@redhat.com list).
I checked the archives, but I couldn't find anything really 'in flux'.
Your priority based approach seems just fine to me.
What is still missing? This is really a killer feature for 2.6. Any help
I can offer?
Sincerely,
Lars Marowsky-Brée <lmb@suse.de>
--
High Availability & Clustering \ ever tried. ever failed. no matter.
SUSE Labs | try again. fail again. fail better.
Research & Development, SUSE LINUX AG \ -- Samuel Beckett
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: dm core patches
2004-02-12 18:51 ` Lars Marowsky-Bree
@ 2004-02-12 20:13 ` Joe Thornber
2004-02-13 15:12 ` Lars Marowsky-Bree
0 siblings, 1 reply; 24+ messages in thread
From: Joe Thornber @ 2004-02-12 20:13 UTC (permalink / raw)
To: Lars Marowsky-Bree; +Cc: Joe Thornber, Linux Mailing List
On Thu, Feb 12, 2004 at 07:51:45PM +0100, Lars Marowsky-Bree wrote:
> I checked the archives, but I couldn't find anything really 'in flux'.
> Your priority based approach seems just fine to me.
>
> What is still missing? This is really a killer feature for 2.6. Any help
> I can offer?
I think the main concern now is over the testing of paths. Sending an
io down an inactive path can be very expensive for some hardware
configurations. So I'm considering changing a couple of things:
- Only ever send io to 1 priority group at a time (even test ios).
To test the lower priority groups we'd have to periodically switch to
them and use them for a bit for both test io and proper io.
- For some hardware there are better ways of testing the path than
sending the test io. Should the drivers expose a test function ?
In the absence of this we'd fallback to the test io method.
The other thing we need is to try and get the drivers to deferentiate
between a media error and a path error, so that media errors get
reported up quickly and don't cause false path failures. This is
possibly an area that you could help with ?
- Joe
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: dm core patches
2004-02-12 20:13 ` Joe Thornber
@ 2004-02-13 15:12 ` Lars Marowsky-Bree
2004-02-13 15:39 ` Joe Thornber
2004-02-13 16:03 ` Jens Axboe
0 siblings, 2 replies; 24+ messages in thread
From: Lars Marowsky-Bree @ 2004-02-13 15:12 UTC (permalink / raw)
To: Joe Thornber; +Cc: Linux Mailing List, axboe
On 2004-02-12T20:13:40,
Joe Thornber <thornber@redhat.com> said:
> I think the main concern now is over the testing of paths. Sending an
> io down an inactive path can be very expensive for some hardware
> configurations. So I'm considering changing a couple of things:
>
> - Only ever send io to 1 priority group at a time (even test ios).
> To test the lower priority groups we'd have to periodically switch to
> them and use them for a bit for both test io and proper io.
You are missing the obvious answer:
- Periodically checking paths is a user-space issue and doesn't belong
into the kernel. User-space gets to handle this policy.
> - For some hardware there are better ways of testing the path than
> sending the test io. Should the drivers expose a test function ?
> In the absence of this we'd fallback to the test io method.
Again, with user-space taking care of this, it doesn't really matter.
Though exposing a test function does sound nice, even for user-space.
Moving it into kernel land is something which can always be done later,
if there is a really pressing problem.
> The other thing we need is to try and get the drivers to deferentiate
> between a media error and a path error, so that media errors get
> reported up quickly and don't cause false path failures. This is
> possibly an area that you could help with ?
I thought the IO stack in 2.6 provided us with such sense keys already,
which you'd then need to handle in the DM personality. Of course,
drivers need to make sure they pass up appropriate sense-keys, but
that's a hardware vendor issue and not something which should delay the
DM personality...
Jens, do you have the pointer on this handy?
Sincerely,
Lars Marowsky-Brée <lmb@suse.de>
--
High Availability & Clustering \ ever tried. ever failed. no matter.
SUSE Labs | try again. fail again. fail better.
Research & Development, SUSE LINUX AG \ -- Samuel Beckett
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: dm core patches
2004-02-13 15:12 ` Lars Marowsky-Bree
@ 2004-02-13 15:39 ` Joe Thornber
2004-02-13 16:08 ` Arjan van de Ven
` (2 more replies)
2004-02-13 16:03 ` Jens Axboe
1 sibling, 3 replies; 24+ messages in thread
From: Joe Thornber @ 2004-02-13 15:39 UTC (permalink / raw)
To: Lars Marowsky-Bree; +Cc: Joe Thornber, Linux Mailing List, axboe
On Fri, Feb 13, 2004 at 04:12:14PM +0100, Lars Marowsky-Bree wrote:
> On 2004-02-12T20:13:40,
> Joe Thornber <thornber@redhat.com> said:
>
> > I think the main concern now is over the testing of paths. Sending an
> > io down an inactive path can be very expensive for some hardware
> > configurations. So I'm considering changing a couple of things:
> >
> > - Only ever send io to 1 priority group at a time (even test ios).
> > To test the lower priority groups we'd have to periodically switch to
> > them and use them for a bit for both test io and proper io.
>
> You are missing the obvious answer:
>
> - Periodically checking paths is a user-space issue and doesn't belong
> into the kernel. User-space gets to handle this policy.
Yes, that is obvious, I had wanted to do failback automatically. But
pushing it to userland does allow people to write hardware specific
tests. I'll try it and see what people think.
Thanks,
- Joe
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: dm core patches
2004-02-13 15:39 ` Joe Thornber
@ 2004-02-13 16:08 ` Arjan van de Ven
2004-02-16 8:19 ` Lars Marowsky-Bree
2004-02-13 23:46 ` Mike Anderson
2004-02-16 12:17 ` Heinz Mauelshagen
2 siblings, 1 reply; 24+ messages in thread
From: Arjan van de Ven @ 2004-02-13 16:08 UTC (permalink / raw)
To: Joe Thornber; +Cc: Lars Marowsky-Bree, Linux Mailing List, axboe
[-- Attachment #1: Type: text/plain, Size: 435 bytes --]
> Yes, that is obvious, I had wanted to do failback automatically. But
> pushing it to userland does allow people to write hardware specific
> tests. I'll try it and see what people think.
one thing you can do is provide a way for drivers to wake the userspace
tester early. Say by default it polls every minute, but if the fiber
channel driver gets a LIP UP event it (via a central API) makes the
userspace daemon *now*.
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: dm core patches
2004-02-13 16:08 ` Arjan van de Ven
@ 2004-02-16 8:19 ` Lars Marowsky-Bree
2004-02-16 9:35 ` Arjan van de Ven
0 siblings, 1 reply; 24+ messages in thread
From: Lars Marowsky-Bree @ 2004-02-16 8:19 UTC (permalink / raw)
To: Arjan van de Ven, Joe Thornber; +Cc: Linux Mailing List, axboe
On 2004-02-13T17:08:59,
Arjan van de Ven <arjanv@redhat.com> said:
> one thing you can do is provide a way for drivers to wake the userspace
> tester early. Say by default it polls every minute, but if the fiber
> channel driver gets a LIP UP event it (via a central API) makes the
> userspace daemon *now*.
I may be missing something obvious, but a LIP UP should be accompanied
with a round of 'device detections' on that link, which already should
trigger a few hotplug events, no?
So this seems pretty much solved.
Sincerely,
Lars Marowsky-Brée <lmb@suse.de>
--
High Availability & Clustering \ ever tried. ever failed. no matter.
SUSE Labs | try again. fail again. fail better.
Research & Development, SUSE LINUX AG \ -- Samuel Beckett
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: dm core patches
2004-02-16 8:19 ` Lars Marowsky-Bree
@ 2004-02-16 9:35 ` Arjan van de Ven
0 siblings, 0 replies; 24+ messages in thread
From: Arjan van de Ven @ 2004-02-16 9:35 UTC (permalink / raw)
To: Lars Marowsky-Bree; +Cc: Joe Thornber, Linux Mailing List, axboe
[-- Attachment #1: Type: text/plain, Size: 1069 bytes --]
On Mon, Feb 16, 2004 at 09:19:45AM +0100, Lars Marowsky-Bree wrote:
> On 2004-02-13T17:08:59,
> Arjan van de Ven <arjanv@redhat.com> said:
>
> > one thing you can do is provide a way for drivers to wake the userspace
> > tester early. Say by default it polls every minute, but if the fiber
> > channel driver gets a LIP UP event it (via a central API) makes the
> > userspace daemon *now*.
>
> I may be missing something obvious, but a LIP UP should be accompanied
> with a round of 'device detections' on that link, which already should
> trigger a few hotplug events, no?
>
> So this seems pretty much solved.
not normaly; there are several reasons the loop can bounce briefly and right
now the fiber drivers don't notify linux of that every time. Maybe that's
for the better .... if it's a frequent thing that is short-timed then it
would be obscene to yank the disks from under the user (and force-umount his
fs) every few hours..
while in multipath you do want to at least stop using the current path if
there is another path that is not in negotiation...
[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: dm core patches
2004-02-13 15:39 ` Joe Thornber
2004-02-13 16:08 ` Arjan van de Ven
@ 2004-02-13 23:46 ` Mike Anderson
2004-02-16 12:17 ` Heinz Mauelshagen
2 siblings, 0 replies; 24+ messages in thread
From: Mike Anderson @ 2004-02-13 23:46 UTC (permalink / raw)
To: Joe Thornber; +Cc: Lars Marowsky-Bree, Linux Mailing List, axboe
Joe Thornber [thornber@redhat.com] wrote:
> > You are missing the obvious answer:
> >
> > - Periodically checking paths is a user-space issue and doesn't belong
> > into the kernel. User-space gets to handle this policy.
>
> Yes, that is obvious, I had wanted to do failback automatically. But
> pushing it to userland does allow people to write hardware specific
> tests. I'll try it and see what people think.
Be careful here. Your failback test packet cannot be a media access type
as this could cause volume transition thrashing in some types of
storage units so most likely you will use a test unit ready type packet.
These small size tests are not very good checks on there own for optical
based networks as the laser power needed to send them is really low
(newer vertical cavity lasers have reduced these types of failures, but
they still happens). Auto failback with heuristics and a credit based
model allows the path to be failed back in with a quick ejection and a
increasing time interval to start the whole cycle again. This keeps the
systems from heading into a failover / failback storm.
-andmike
--
Michael Anderson
andmike@us.ibm.com
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: dm core patches
2004-02-13 15:39 ` Joe Thornber
2004-02-13 16:08 ` Arjan van de Ven
2004-02-13 23:46 ` Mike Anderson
@ 2004-02-16 12:17 ` Heinz Mauelshagen
2 siblings, 0 replies; 24+ messages in thread
From: Heinz Mauelshagen @ 2004-02-16 12:17 UTC (permalink / raw)
To: Joe Thornber; +Cc: Lars Marowsky-Bree, Linux Mailing List, axboe
On Fri, Feb 13, 2004 at 03:39:36PM +0000, Joe Thornber wrote:
> On Fri, Feb 13, 2004 at 04:12:14PM +0100, Lars Marowsky-Bree wrote:
> > On 2004-02-12T20:13:40,
> > Joe Thornber <thornber@redhat.com> said:
> >
> > > I think the main concern now is over the testing of paths. Sending an
> > > io down an inactive path can be very expensive for some hardware
> > > configurations. So I'm considering changing a couple of things:
> > >
> > > - Only ever send io to 1 priority group at a time (even test ios).
> > > To test the lower priority groups we'd have to periodically switch to
> > > them and use them for a bit for both test io and proper io.
> >
> > You are missing the obvious answer:
> >
> > - Periodically checking paths is a user-space issue and doesn't belong
> > into the kernel. User-space gets to handle this policy.
>
> Yes, that is obvious, I had wanted to do failback automatically. But
> pushing it to userland does allow people to write hardware specific
> tests. I'll try it and see what people think.
Right, such policy belongs to userpsace it seems.
The reason why I put it into the multipath target is to cover the case,
where all paths are inoperational, the system is OOM _and_ the only
chance to recover from that is the hope to unfail a path in order to
release memory preasure.
'Sorry, userspace test handler can't run, your enterprise server
is a pile of sh..' is not acceptable in case there's a path we
could unfail IMO.
Regards,
Heinz -- The LVM Guy --
>
> Thanks,
>
> - Joe
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
*** Software bugs are stupid.
Nevertheless it needs not so stupid people to solve them ***
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
Heinz Mauelshagen Red Hat, Inc.
Consulting Development Engineer Am Sonnenhang 11
56242 Marienrachdorf
Germany
Mauelshagen@RedHat.com +49 2626 141200
FAX 924446
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: dm core patches
2004-02-13 15:12 ` Lars Marowsky-Bree
2004-02-13 15:39 ` Joe Thornber
@ 2004-02-13 16:03 ` Jens Axboe
1 sibling, 0 replies; 24+ messages in thread
From: Jens Axboe @ 2004-02-13 16:03 UTC (permalink / raw)
To: Lars Marowsky-Bree; +Cc: Joe Thornber, Linux Mailing List
On Fri, Feb 13 2004, Lars Marowsky-Bree wrote:
> > The other thing we need is to try and get the drivers to deferentiate
> > between a media error and a path error, so that media errors get
> > reported up quickly and don't cause false path failures. This is
> > possibly an area that you could help with ?
>
> I thought the IO stack in 2.6 provided us with such sense keys already,
> which you'd then need to handle in the DM personality. Of course,
> drivers need to make sure they pass up appropriate sense-keys, but
> that's a hardware vendor issue and not something which should delay the
> DM personality...
>
> Jens, do you have the pointer on this handy?
The mechanism is in place, but the SCSI stack still needs a few changes
to pass down the correct errors. The easiest would be to pass down
pseudo-sense keys (I'd rather just call them something else as not to
confuse things, io error hints or something) to
end_that_request_first(), changing uptodate from a bool to a hint.
I can help get this done, it's not something that should hold up dm-mp
by any stretch.
--
Jens Axboe
^ permalink raw reply [flat|nested] 24+ messages in thread