* [RFC][PATCH 1/10] I/O context inheritance
2008-04-22 13:49 taka
@ 2008-04-22 13:51 ` Hirokazu Takahashi
2008-04-22 13:53 ` [RFC][PATCH 2/10] " Hirokazu Takahashi
` (9 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Hirokazu Takahashi @ 2008-04-22 13:51 UTC (permalink / raw)
To: jens.axboe, agk; +Cc: dm-devel, linux-kernel
Make every bio points the iocontext of the process which originally
generated an I/O request.
- Assign the iocontext of the current process to a bio when it is
newly allocated.
- Assign the iocontext which the source bio has to a newly allocated
bio when it is duplicated.
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
--- linux-2.6.25.bio0/include/linux/bio.h 2008-04-22 15:48:36.000000000 +0900
+++ linux-2.6.25/include/linux/bio.h 2008-04-22 15:49:42.000000000 +0900
@@ -114,6 +114,8 @@ struct bio {
void *bi_private;
bio_destructor_t *bi_destructor; /* destructor */
+
+ struct io_context *bi_io_context;
};
/*
--- linux-2.6.25.bio0/include/linux/iocontext.h 2008-04-22 15:48:36.000000000 +0900
+++ linux-2.6.25/include/linux/iocontext.h 2008-04-22 15:49:42.000000000 +0900
@@ -85,7 +85,7 @@ struct io_context {
void *ioc_data;
};
-static inline struct io_context *ioc_task_link(struct io_context *ioc)
+static inline struct io_context *ioc_object_link(struct io_context *ioc)
{
/*
* if ref count is zero, don't allow sharing (ioc is going away, it's
@@ -99,4 +99,6 @@ static inline struct io_context *ioc_tas
return NULL;
}
+#define ioc_task_link(ioc) ioc_object_link(ioc)
+
#endif
--- linux-2.6.25.bio0/fs/bio.c 2008-04-22 15:48:31.000000000 +0900
+++ linux-2.6.25/fs/bio.c 2008-04-22 15:49:42.000000000 +0900
@@ -107,8 +107,16 @@ static inline struct bio_vec *bvec_alloc
return bvl;
}
+static inline void put_bio_context(struct bio *bio)
+{
+ if (bio->bi_io_context)
+ put_io_context(bio->bi_io_context);
+}
+
void bio_free(struct bio *bio, struct bio_set *bio_set)
{
+ put_bio_context(bio);
+
if (bio->bi_io_vec) {
const int pool_idx = BIO_POOL_IDX(bio);
@@ -177,10 +185,29 @@ out:
struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
{
- struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
+ struct bio *bio;
+ struct io_context *ioc;
- if (bio)
+ /*
+ * Set the io_context of the current process here since this newly
+ * created bio may be passed to a I/O scheduler through another
+ * kernel thread in half of the process that originate this bio.
+ * get_io_context function should called here so it can be blocked
+ * in it.
+ * Todo: when the current is an aio kernel thread, the io_context
+ * of the original process should be set instead of that of
+ * the thread.
+ */
+ ioc = get_io_context(gfp_mask, -1);
+ if (!ioc)
+ return NULL;
+
+ bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
+ if (bio) {
bio->bi_destructor = bio_fs_destructor;
+ bio->bi_io_context = ioc;
+ } else
+ put_io_context(ioc);
return bio;
}
@@ -262,6 +289,7 @@ void __bio_clone(struct bio *bio, struct
bio->bi_vcnt = bio_src->bi_vcnt;
bio->bi_size = bio_src->bi_size;
bio->bi_idx = bio_src->bi_idx;
+ bio->bi_io_context = ioc_object_link(bio_src->bi_io_context);
}
/**
^ permalink raw reply [flat|nested] 15+ messages in thread* [RFC][PATCH 2/10] I/O context inheritance
2008-04-22 13:49 taka
2008-04-22 13:51 ` [RFC][PATCH 1/10] I/O context inheritance Hirokazu Takahashi
@ 2008-04-22 13:53 ` Hirokazu Takahashi
2008-04-22 13:53 ` [RFC][PATCH 3/10] " Hirokazu Takahashi
` (8 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Hirokazu Takahashi @ 2008-04-22 13:53 UTC (permalink / raw)
To: jens.axboe, agk; +Cc: dm-devel, linux-kernel
Every I/O scheduler should get a iocontext from a given bio,
not from the current process.
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
--- linux-2.6.25.bio0/block/blk-core.c 2008-04-22 15:48:32.000000000 +0900
+++ linux-2.6.25/block/blk-core.c 2008-04-22 16:13:31.000000000 +0900
@@ -600,7 +600,8 @@ static inline void blk_free_request(stru
}
static struct request *
-blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
+blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask,
+ struct io_context *ioc)
{
struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
@@ -614,7 +615,7 @@ blk_alloc_request(struct request_queue *
rq->cmd_flags = rw | REQ_ALLOCED;
if (priv) {
- if (unlikely(elv_set_request(q, rq, gfp_mask))) {
+ if (unlikely(elv_set_request(q, rq, gfp_mask, ioc))) {
mempool_free(rq, q->rq.rq_pool);
return NULL;
}
@@ -702,7 +703,7 @@ static struct request *get_request(struc
{
struct request *rq = NULL;
struct request_list *rl = &q->rq;
- struct io_context *ioc = NULL;
+ struct io_context *ioc;
const int rw = rw_flags & 0x01;
int may_queue, priv;
@@ -710,9 +711,12 @@ static struct request *get_request(struc
if (may_queue == ELV_MQUEUE_NO)
goto rq_starved;
+ ioc = bio ? bio->bi_io_context : current->io_context;
+
if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
if (rl->count[rw]+1 >= q->nr_requests) {
- ioc = current_io_context(GFP_ATOMIC, q->node);
+ if (!ioc)
+ ioc = current_io_context(GFP_ATOMIC, q->node);
/*
* The queue will fill after this allocation, so set
* it as full, and mark this process as "batching".
@@ -754,7 +758,7 @@ static struct request *get_request(struc
spin_unlock_irq(q->queue_lock);
- rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
+ rq = blk_alloc_request(q, rw_flags, priv, gfp_mask, ioc);
if (unlikely(!rq)) {
/*
* Allocation failed presumably due to memory. Undo anything
--- linux-2.6.25.bio0/include/linux/elevator.h 2008-04-22 15:48:36.000000000 +0900
+++ linux-2.6.25/include/linux/elevator.h 2008-04-22 15:51:27.000000000 +0900
@@ -22,7 +22,7 @@ typedef struct request *(elevator_reques
typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
typedef int (elevator_may_queue_fn) (struct request_queue *, int);
-typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
+typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t, struct io_context *);
typedef void (elevator_put_req_fn) (struct request *);
typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
@@ -113,7 +113,7 @@ extern int elv_register_queue(struct req
extern void elv_unregister_queue(struct request_queue *q);
extern int elv_may_queue(struct request_queue *, int);
extern void elv_completed_request(struct request_queue *, struct request *);
-extern int elv_set_request(struct request_queue *, struct request *, gfp_t);
+extern int elv_set_request(struct request_queue *, struct request *, gfp_t, struct io_context *);
extern void elv_put_request(struct request_queue *, struct request *);
/*
--- linux-2.6.25.bio0/block/elevator.c 2008-04-22 15:48:32.000000000 +0900
+++ linux-2.6.25/block/elevator.c 2008-04-22 15:51:27.000000000 +0900
@@ -864,12 +864,13 @@ struct request *elv_former_request(struc
return NULL;
}
-int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
+int elv_set_request(struct request_queue *q, struct request *rq,
+ gfp_t gfp_mask, struct io_context *ioc)
{
elevator_t *e = q->elevator;
if (e->ops->elevator_set_req_fn)
- return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
+ return e->ops->elevator_set_req_fn(q, rq, gfp_mask, ioc);
rq->elevator_private = NULL;
return 0;
--- linux-2.6.25.bio0/block/cfq-iosched.c 2008-04-22 15:48:32.000000000 +0900
+++ linux-2.6.25/block/cfq-iosched.c 2008-04-22 17:15:21.000000000 +0900
@@ -1571,14 +1571,16 @@ static int cfq_cic_link(struct cfq_data
* than one device managed by cfq.
*/
static struct cfq_io_context *
-cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
+cfq_get_io_context(struct cfq_data *cfqd, struct io_context *ioc, gfp_t gfp_mask)
{
- struct io_context *ioc = NULL;
struct cfq_io_context *cic;
might_sleep_if(gfp_mask & __GFP_WAIT);
- ioc = get_io_context(gfp_mask, cfqd->queue->node);
+ ioc = ioc_object_link(ioc);
+
+ if (!ioc)
+ ioc = get_io_context(gfp_mask, cfqd->queue->node);
if (!ioc)
return NULL;
@@ -1938,7 +1940,8 @@ static void cfq_put_request(struct reque
* Allocate cfq data structures associated with this request.
*/
static int
-cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
+cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask,
+ struct io_context *ioc)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_io_context *cic;
@@ -1949,7 +1952,7 @@ cfq_set_request(struct request_queue *q,
might_sleep_if(gfp_mask & __GFP_WAIT);
- cic = cfq_get_io_context(cfqd, gfp_mask);
+ cic = cfq_get_io_context(cfqd, ioc, gfp_mask);
spin_lock_irqsave(q->queue_lock, flags);
^ permalink raw reply [flat|nested] 15+ messages in thread* [RFC][PATCH 3/10] I/O context inheritance
2008-04-22 13:49 taka
2008-04-22 13:51 ` [RFC][PATCH 1/10] I/O context inheritance Hirokazu Takahashi
2008-04-22 13:53 ` [RFC][PATCH 2/10] " Hirokazu Takahashi
@ 2008-04-22 13:53 ` Hirokazu Takahashi
2008-04-22 13:54 ` [RFC][PATCH 4/10] " Hirokazu Takahashi
` (7 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Hirokazu Takahashi @ 2008-04-22 13:53 UTC (permalink / raw)
To: jens.axboe, agk; +Cc: dm-devel, linux-kernel
Make every bio points the iocontext of the process which originally
generated an I/O request. (part 2)
- Assign the iocontext which the source bio has to a bio when it is
allocated as a bounce buffer.
- Assign the iocontext which the source bio has to bios when the source
bio is split into several ones, which some device mapper modules require.
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
--- linux-2.6.25.bio0/mm/bounce.c 2008-04-22 15:48:32.000000000 +0900
+++ linux-2.6.25/mm/bounce.c 2008-04-22 15:51:33.000000000 +0900
@@ -195,8 +195,11 @@ static void __blk_queue_bounce(struct re
/*
* irk, bounce it
*/
- if (!bio)
+ if (!bio) {
bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
+ put_io_context(bio->bi_io_context);
+ bio->bi_io_context = ioc_object_link((*bio_orig)->bi_io_context);
+ }
to = bio->bi_io_vec + i;
--- linux-2.6.25.bio0/drivers/md/dm.c 2008-04-22 15:48:33.000000000 +0900
+++ linux-2.6.25/drivers/md/dm.c 2008-04-22 17:16:49.000000000 +0900
@@ -665,6 +665,7 @@ static struct bio *split_bvec(struct bio
clone->bi_size = to_bytes(len);
clone->bi_io_vec->bv_offset = offset;
clone->bi_io_vec->bv_len = clone->bi_size;
+ clone->bi_io_context = ioc_object_link(bio->bi_io_context);
return clone;
}
^ permalink raw reply [flat|nested] 15+ messages in thread* [RFC][PATCH 4/10] I/O context inheritance
2008-04-22 13:49 taka
` (2 preceding siblings ...)
2008-04-22 13:53 ` [RFC][PATCH 3/10] " Hirokazu Takahashi
@ 2008-04-22 13:54 ` Hirokazu Takahashi
2008-04-22 13:55 ` [RFC][PATCH 5/10] " Hirokazu Takahashi
` (6 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Hirokazu Takahashi @ 2008-04-22 13:54 UTC (permalink / raw)
To: jens.axboe, agk; +Cc: dm-devel, linux-kernel
Make the aio kernel thread work on the iocontext of the process
which request an I/O.
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
--- linux-2.6.25.bio0/include/linux/aio.h 2008-04-22 15:48:36.000000000 +0900
+++ linux-2.6.25/include/linux/aio.h 2008-04-22 15:52:07.000000000 +0900
@@ -181,6 +181,7 @@ struct kioctx {
atomic_t users;
int dead;
struct mm_struct *mm;
+ struct io_context *io_context;
/* This needs improving */
unsigned long user_id;
--- linux-2.6.25.bio0/fs/aio.c 2008-04-22 15:48:31.000000000 +0900
+++ linux-2.6.25/fs/aio.c 2008-04-22 15:52:07.000000000 +0900
@@ -31,6 +31,7 @@
#include <linux/workqueue.h>
#include <linux/security.h>
#include <linux/eventfd.h>
+#include <linux/blkdev.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
@@ -217,6 +218,10 @@ static struct kioctx *ioctx_alloc(unsign
mm = ctx->mm = current->mm;
atomic_inc(&mm->mm_count);
+ ctx->io_context = get_io_context(GFP_KERNEL, -1);
+ if (!ctx->io_context)
+ goto out_freectx2;
+
atomic_set(&ctx->users, 1);
spin_lock_init(&ctx->ctx_lock);
spin_lock_init(&ctx->ring_info.ring_lock);
@@ -255,6 +260,8 @@ out_cleanup:
return ERR_PTR(-EAGAIN);
out_freectx:
+ put_io_context(ctx->io_context);
+out_freectx2:
mmdrop(mm);
kmem_cache_free(kioctx_cachep, ctx);
ctx = ERR_PTR(-ENOMEM);
@@ -376,6 +383,7 @@ void __put_ioctx(struct kioctx *ctx)
aio_free_ring(ctx);
mmdrop(ctx->mm);
ctx->mm = NULL;
+ put_io_context(ctx->io_context);
pr_debug("__put_ioctx: freeing %p\n", ctx);
kmem_cache_free(kioctx_cachep, ctx);
@@ -841,6 +849,7 @@ static void aio_kick_handler(struct work
struct mm_struct *mm;
int requeue;
+ current->io_context = ctx->io_context;
set_fs(USER_DS);
use_mm(ctx->mm);
spin_lock_irq(&ctx->ctx_lock);
@@ -849,6 +858,7 @@ static void aio_kick_handler(struct work
spin_unlock_irq(&ctx->ctx_lock);
unuse_mm(mm);
set_fs(oldfs);
+ current->io_context = NULL;
/*
* we're in a worker thread already, don't use queue_delayed_work,
*/
^ permalink raw reply [flat|nested] 15+ messages in thread* [RFC][PATCH 5/10] I/O context inheritance
2008-04-22 13:49 taka
` (3 preceding siblings ...)
2008-04-22 13:54 ` [RFC][PATCH 4/10] " Hirokazu Takahashi
@ 2008-04-22 13:55 ` Hirokazu Takahashi
2008-04-22 13:55 ` [RFC][PATCH 6/10] " Hirokazu Takahashi
` (5 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Hirokazu Takahashi @ 2008-04-22 13:55 UTC (permalink / raw)
To: jens.axboe, agk; +Cc: dm-devel, linux-kernel
The raid1 module makes use of temporary bios to sync physical disks
under the same md device. Make the bios point the iocontext of one of
the kernel threads in the module.
Meanwhile, regular read/write I/O requests will be cloned and may be
handled by one of the kernel threads in the modules. In this case,
the iocontext will be inherited from the source bio.
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
--- linux-2.6.25.bio0/include/linux/raid/raid1.h 2008-04-22 15:48:32.000000000 +0900
+++ linux-2.6.25/include/linux/raid/raid1.h 2008-04-22 15:52:38.000000000 +0900
@@ -100,6 +100,7 @@ struct r1bio_s {
struct list_head retry_list;
struct bitmap_update *bitmap_update;
+ struct io_context *io_context;
/*
* if the IO is in WRITE direction, then multiple bios are used.
* We choose the number when they are allocated.
--- linux-2.6.25.bio0/drivers/md/raid1.c 2008-04-22 15:48:36.000000000 +0900
+++ linux-2.6.25/drivers/md/raid1.c 2008-04-22 17:18:08.000000000 +0900
@@ -200,6 +200,8 @@ static void put_buf(r1bio_t *r1_bio)
if (bio->bi_end_io)
rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev);
}
+ put_io_context(r1_bio->io_context);
+ r1_bio->io_context = NULL;
mempool_free(r1_bio, conf->r1buf_pool);
@@ -1731,6 +1733,7 @@ static sector_t sync_request(mddev_t *md
r1_bio->sector = sector_nr;
r1_bio->state = 0;
set_bit(R1BIO_IsSync, &r1_bio->state);
+ r1_bio->io_context = ioc_object_link(current->io_context);
for (i=0; i < conf->raid_disks; i++) {
mdk_rdev_t *rdev;
@@ -1747,6 +1750,7 @@ static sector_t sync_request(mddev_t *md
bio->bi_size = 0;
bio->bi_end_io = NULL;
bio->bi_private = NULL;
+ bio->bi_io_context = r1_bio->io_context;
rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev == NULL ||
--- linux-2.6.25.bio0/drivers/md/md.c 2008-04-22 15:48:33.000000000 +0900
+++ linux-2.6.25/drivers/md/md.c 2008-04-22 15:52:38.000000000 +0900
@@ -5451,6 +5451,8 @@ void md_do_sync(mddev_t *mddev)
else
desc = "recovery";
+ current->io_context = alloc_io_context(GFP_KERNEL, -1); /*XXX*/
+
/* we overload curr_resync somewhat here.
* 0 == not engaged in resync at all
* 2 == checking that there is no conflict with another sync
^ permalink raw reply [flat|nested] 15+ messages in thread* [RFC][PATCH 6/10] I/O context inheritance
2008-04-22 13:49 taka
` (4 preceding siblings ...)
2008-04-22 13:55 ` [RFC][PATCH 5/10] " Hirokazu Takahashi
@ 2008-04-22 13:55 ` Hirokazu Takahashi
2008-04-22 13:57 ` [RFC][PATCH 7/10] " Hirokazu Takahashi
` (4 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Hirokazu Takahashi @ 2008-04-22 13:55 UTC (permalink / raw)
To: jens.axboe, agk; +Cc: dm-devel, linux-kernel
The raid10 module also makes use of temporary bios to sync physical disks
under the same md device. Make these bios point the iocontext of a kernel
thread in the module.
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
--- linux-2.6.25.bio0/include/linux/raid/raid10.h 2008-04-22 15:48:32.000000000 +0900
+++ linux-2.6.25/include/linux/raid/raid10.h 2008-04-22 15:52:58.000000000 +0900
@@ -94,6 +94,7 @@ struct r10bio_s {
int read_slot;
struct list_head retry_list;
+ struct io_context *io_context;
/*
* if the IO is in WRITE direction, then multiple bios are used,
* one for each copy.
--- linux-2.6.25.bio0/drivers/md/raid10.c 2008-04-22 15:48:33.000000000 +0900
+++ linux-2.6.25/drivers/md/raid10.c 2008-04-22 17:19:50.000000000 +0900
@@ -199,6 +199,9 @@ static void put_buf(r10bio_t *r10_bio)
{
conf_t *conf = mddev_to_conf(r10_bio->mddev);
+ put_io_context(r10_bio->io_context);
+ r10_bio->io_context = NULL;
+
mempool_free(r10_bio, conf->r10buf_pool);
lower_barrier(conf);
@@ -1761,6 +1764,7 @@ static sector_t sync_request(mddev_t *md
r10_bio->mddev = mddev;
set_bit(R10BIO_IsRecover, &r10_bio->state);
r10_bio->sector = sect;
+ r10_bio->io_context = ioc_object_link(current->io_context);
raid10_find_phys(conf, r10_bio);
/* Need to check if this section will still be
@@ -1793,6 +1797,7 @@ static sector_t sync_request(mddev_t *md
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
atomic_inc(&r10_bio->remaining);
+ bio->bi_io_context = r10_bio->io_context;
/* and we write to 'i' */
for (k=0; k<conf->copies; k++)
@@ -1808,6 +1813,7 @@ static sector_t sync_request(mddev_t *md
bio->bi_sector = r10_bio->devs[k].addr +
conf->mirrors[i].rdev->data_offset;
bio->bi_bdev = conf->mirrors[i].rdev->bdev;
+ bio->bi_io_context = r10_bio->io_context;
r10_bio->devs[0].devnum = d;
r10_bio->devs[1].devnum = i;
@@ -1861,6 +1867,7 @@ static sector_t sync_request(mddev_t *md
set_bit(R10BIO_IsSync, &r10_bio->state);
raid10_find_phys(conf, r10_bio);
r10_bio->sectors = (sector_nr | conf->chunk_mask) - sector_nr +1;
+ r10_bio->io_context = ioc_object_link(current->io_context);
for (i=0; i<conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
@@ -1880,6 +1887,7 @@ static sector_t sync_request(mddev_t *md
bio->bi_sector = r10_bio->devs[i].addr +
conf->mirrors[d].rdev->data_offset;
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
+ bio->bi_io_context = r10_bio->io_context;
count++;
}
^ permalink raw reply [flat|nested] 15+ messages in thread* [RFC][PATCH 7/10] I/O context inheritance
2008-04-22 13:49 taka
` (5 preceding siblings ...)
2008-04-22 13:55 ` [RFC][PATCH 6/10] " Hirokazu Takahashi
@ 2008-04-22 13:57 ` Hirokazu Takahashi
2008-04-22 13:58 ` [RFC][PATCH 8/10] " Hirokazu Takahashi
` (3 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Hirokazu Takahashi @ 2008-04-22 13:57 UTC (permalink / raw)
To: jens.axboe, agk; +Cc: dm-devel, linux-kernel
The raid5 module makes use of temporary bios to issue several I/O requests
in the same stripe, whenever it gets a write I/O request. In this case,
the temporary bios should inherit the iocontext from the source bio.
And the module also makes use of temporary bios to sync physical disks
under the same md device. In this case, these bios point the iocontext of
one of the kernel threads in the module.
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
--- linux-2.6.25.bio0/include/linux/raid/raid5.h 2008-04-22 15:48:32.000000000 +0900
+++ linux-2.6.25/include/linux/raid/raid5.h 2008-04-22 15:53:09.000000000 +0900
@@ -169,6 +169,7 @@ struct stripe_head {
spinlock_t lock;
int bm_seq; /* sequence number for bitmap flushes */
int disks; /* disks in stripe */
+ struct io_context *io_context;
/* stripe_operations
* @pending - pending ops flags (set for request->issue->complete)
* @ack - submitted ops flags (set for issue->complete)
--- linux-2.6.25.bio0/drivers/md/raid5.c 2008-04-22 15:48:32.000000000 +0900
+++ linux-2.6.25/drivers/md/raid5.c 2008-04-22 18:51:13.000000000 +0900
@@ -148,6 +148,8 @@ static void __release_stripe(raid5_conf_
}
atomic_dec(&conf->active_stripes);
if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
+ put_io_context(sh->io_context);
+ sh->io_context = NULL;
list_add_tail(&sh->lru, &conf->inactive_list);
wake_up(&conf->wait_for_stripe);
if (conf->retry_read_aligned)
@@ -235,7 +237,7 @@ static int grow_buffers(struct stripe_he
static void raid5_build_block (struct stripe_head *sh, int i);
-static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks)
+static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks, struct bio *bi)
{
raid5_conf_t *conf = sh->raid_conf;
int i;
@@ -253,6 +255,10 @@ static void init_stripe(struct stripe_he
sh->sector = sector;
sh->pd_idx = pd_idx;
sh->state = 0;
+ if (bi)
+ sh->io_context = ioc_object_link(bi->bi_io_context);
+ else
+ sh->io_context = ioc_object_link(current->io_context);
sh->disks = disks;
@@ -291,7 +297,7 @@ static void unplug_slaves(mddev_t *mddev
static void raid5_unplug_device(struct request_queue *q);
static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, int disks,
- int pd_idx, int noblock)
+ int pd_idx, int noblock, struct bio *bi)
{
struct stripe_head *sh;
@@ -321,7 +327,7 @@ static struct stripe_head *get_active_st
);
conf->inactive_blocked = 0;
} else
- init_stripe(sh, sector, pd_idx, disks);
+ init_stripe(sh, sector, pd_idx, disks, bi);
} else {
if (atomic_read(&sh->count)) {
BUG_ON(!list_empty(&sh->lru));
@@ -412,10 +418,19 @@ static void ops_run_io(struct stripe_hea
bi = &sh->dev[i].req;
bi->bi_rw = rw;
- if (rw == WRITE)
+ if (rw == WRITE) {
bi->bi_end_io = raid5_end_write_request;
- else
+ if (sh->dev[i].towrite) {
+ bi->bi_io_context = sh->dev[i].towrite->bi_io_context;
+ }
+ } else {
bi->bi_end_io = raid5_end_read_request;
+ if (sh->dev[i].toread) {
+ bi->bi_io_context = sh->dev[i].toread->bi_io_context;
+ }
+ }
+ if (!bi->bi_io_context)
+ bi->bi_io_context = sh->io_context;
rcu_read_lock();
rdev = rcu_dereference(conf->disks[i].rdev);
@@ -2551,7 +2566,7 @@ static void handle_stripe_expansion(raid
conf->max_degraded, &dd_idx,
&pd_idx, conf);
sh2 = get_active_stripe(conf, s, conf->raid_disks,
- pd_idx, 1);
+ pd_idx, 1, NULL);
if (sh2 == NULL)
/* so far only the early blocks of this stripe
* have been requested. When later blocks
@@ -3512,7 +3527,7 @@ static int make_request(struct request_q
(unsigned long long)new_sector,
(unsigned long long)logical_sector);
- sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK));
+ sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK), bi);
if (sh) {
if (unlikely(conf->expand_progress != MaxSector)) {
/* expansion might have moved on while waiting for a
@@ -3650,7 +3665,7 @@ static sector_t reshape_request(mddev_t
int skipped = 0;
pd_idx = stripe_to_pdidx(sector_nr+i, conf, conf->raid_disks);
sh = get_active_stripe(conf, sector_nr+i,
- conf->raid_disks, pd_idx, 0);
+ conf->raid_disks, pd_idx, 0, NULL);
set_bit(STRIPE_EXPANDING, &sh->state);
atomic_inc(&conf->reshape_stripes);
/* If any of this stripe is beyond the end of the old
@@ -3701,7 +3716,7 @@ static sector_t reshape_request(mddev_t
pd_idx = stripe_to_pdidx(first_sector, conf,
conf->previous_raid_disks);
sh = get_active_stripe(conf, first_sector,
- conf->previous_raid_disks, pd_idx, 0);
+ conf->previous_raid_disks, pd_idx, 0, NULL);
set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
@@ -3791,9 +3806,9 @@ static inline sector_t sync_request(mdde
bitmap_cond_end_sync(mddev->bitmap, sector_nr);
pd_idx = stripe_to_pdidx(sector_nr, conf, raid_disks);
- sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1);
+ sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1, NULL);
if (sh == NULL) {
- sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 0);
+ sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 0, NULL);
/* make sure we don't swamp the stripe cache if someone else
* is trying to get access
*/
@@ -3857,7 +3872,7 @@ static int retry_aligned_read(raid5_con
/* already done this stripe */
continue;
- sh = get_active_stripe(conf, sector, conf->raid_disks, pd_idx, 1);
+ sh = get_active_stripe(conf, sector, conf->raid_disks, pd_idx, 1, raid_bio);
if (!sh) {
/* failed to get a stripe - must wait */
^ permalink raw reply [flat|nested] 15+ messages in thread* [RFC][PATCH 8/10] I/O context inheritance
2008-04-22 13:49 taka
` (6 preceding siblings ...)
2008-04-22 13:57 ` [RFC][PATCH 7/10] " Hirokazu Takahashi
@ 2008-04-22 13:58 ` Hirokazu Takahashi
2008-04-22 13:59 ` [RFC][PATCH 9/10] " Hirokazu Takahashi
` (2 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Hirokazu Takahashi @ 2008-04-22 13:58 UTC (permalink / raw)
To: jens.axboe, agk; +Cc: dm-devel, linux-kernel
The crypt module will duplicate a given bio in its own way, so the newly
allocated bio should inherit the the io_context from the original one.
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
--- linux-2.6.25.bio0/drivers/md/dm-crypt.c 2008-04-22 15:48:36.000000000 +0900
+++ linux-2.6.25/drivers/md/dm-crypt.c 2008-04-22 15:53:28.000000000 +0900
@@ -581,6 +581,7 @@ static void clone_init(struct dm_crypt_i
clone->bi_end_io = crypt_endio;
clone->bi_bdev = cc->dev->bdev;
clone->bi_rw = io->base_bio->bi_rw;
+ clone->bi_io_context = ioc_object_link(io->base_bio->bi_io_context);
clone->bi_destructor = dm_crypt_bio_destructor;
}
^ permalink raw reply [flat|nested] 15+ messages in thread* [RFC][PATCH 9/10] I/O context inheritance
2008-04-22 13:49 taka
` (7 preceding siblings ...)
2008-04-22 13:58 ` [RFC][PATCH 8/10] " Hirokazu Takahashi
@ 2008-04-22 13:59 ` Hirokazu Takahashi
2008-04-22 14:00 ` [RFC][PATCH 10/10] " Hirokazu Takahashi
2008-04-22 14:03 ` [RFC][PATCH 0/10] " Hirokazu Takahashi
10 siblings, 0 replies; 15+ messages in thread
From: Hirokazu Takahashi @ 2008-04-22 13:59 UTC (permalink / raw)
To: jens.axboe, agk; +Cc: dm-devel, linux-kernel
The packet writing module creates a bio in its own way, so its iocontext
should be set here.
This code hasn't been tested yet.
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
--- linux-2.6.25.bio0/drivers/block/pktcdvd.c 2008-04-22 15:48:36.000000000 +0900
+++ linux-2.6.25/drivers/block/pktcdvd.c 2008-04-22 15:53:41.000000000 +0900
@@ -514,6 +514,7 @@ static void pkt_bio_finished(struct pktc
static void pkt_bio_destructor(struct bio *bio)
{
+ put_io_context(bio->bi_io_context);
kfree(bio->bi_io_vec);
kfree(bio);
}
@@ -522,6 +523,11 @@ static struct bio *pkt_bio_alloc(int nr_
{
struct bio_vec *bvl = NULL;
struct bio *bio;
+ struct io_context *ioc;
+
+ ioc = get_io_context(GFP_KERNEL, -1);
+ if (!ioc)
+ goto no_ioc;
bio = kmalloc(sizeof(struct bio), GFP_KERNEL);
if (!bio)
@@ -535,12 +541,15 @@ static struct bio *pkt_bio_alloc(int nr_
bio->bi_max_vecs = nr_iovecs;
bio->bi_io_vec = bvl;
bio->bi_destructor = pkt_bio_destructor;
+ bio->bi_io_context = ioc;
return bio;
no_bvl:
kfree(bio);
no_bio:
+ put_io_context(ioc);
+ no_ioc:
return NULL;
}
^ permalink raw reply [flat|nested] 15+ messages in thread* [RFC][PATCH 10/10] I/O context inheritance
2008-04-22 13:49 taka
` (8 preceding siblings ...)
2008-04-22 13:59 ` [RFC][PATCH 9/10] " Hirokazu Takahashi
@ 2008-04-22 14:00 ` Hirokazu Takahashi
2008-04-22 14:03 ` [RFC][PATCH 0/10] " Hirokazu Takahashi
10 siblings, 0 replies; 15+ messages in thread
From: Hirokazu Takahashi @ 2008-04-22 14:00 UTC (permalink / raw)
To: jens.axboe, agk; +Cc: dm-devel, linux-kernel
The floppy driver will allocate a bio on its stack, so its iocontext
should be set here.
This code hasn't been tested yet.
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
--- linux-2.6.25.bio0/drivers/block/floppy.c 2008-04-22 15:48:36.000000000 +0900
+++ linux-2.6.25/drivers/block/floppy.c 2008-04-22 21:33:40.000000000 +0900
@@ -3845,12 +3845,14 @@ static int __floppy_read_block_0(struct
init_completion(&complete);
bio.bi_private = &complete;
bio.bi_end_io = floppy_rb0_complete;
+ bio.bi_io_context = get_io_context(GFP_NOIO, -1);
submit_bio(READ, &bio);
generic_unplug_device(bdev_get_queue(bdev));
process_fd_request();
wait_for_completion(&complete);
+ put_io_context(bio.bi_io_context);
__free_page(page);
return 0;
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [RFC][PATCH 0/10] I/O context inheritance
2008-04-22 13:49 taka
` (9 preceding siblings ...)
2008-04-22 14:00 ` [RFC][PATCH 10/10] " Hirokazu Takahashi
@ 2008-04-22 14:03 ` Hirokazu Takahashi
2008-04-22 14:54 ` Jens Axboe
10 siblings, 1 reply; 15+ messages in thread
From: Hirokazu Takahashi @ 2008-04-22 14:03 UTC (permalink / raw)
To: jens.axboe, agk; +Cc: dm-devel, linux-kernel
Hi,
Sorry, these patches are for linux-2.6.25.
> This series of patches make the block I/O layer and the I/O schedulers
> be able to determine the right io_context of every I/O.
>
> The current implementation of the block I/O layer and the I/O schedulers
> assume that the current process is the one which issued the given I/O,
> then use the io_context of this process to control the I/O.
> But this assumption isn't quite right because several kernel threads
> will handle I/O requests on behalf of the processes which originated them.
> This often happens when you want to use device mapper modules.
>
> The patches make every bio has a pointer to an io_context, which will
> be set when it is allocated or cloned. So it makes it possible to find
> the right io_context from any bio at any place.
>
> I'm waiting for your comments.
>
> Thank you,
> Hirokazu Takahashi.
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [RFC][PATCH 0/10] I/O context inheritance
2008-04-22 14:03 ` [RFC][PATCH 0/10] " Hirokazu Takahashi
@ 2008-04-22 14:54 ` Jens Axboe
2008-04-22 18:10 ` [dm-devel] " Dan Williams
0 siblings, 1 reply; 15+ messages in thread
From: Jens Axboe @ 2008-04-22 14:54 UTC (permalink / raw)
To: Hirokazu Takahashi; +Cc: agk, dm-devel, linux-kernel
On Tue, Apr 22 2008, Hirokazu Takahashi wrote:
> Hi,
>
> Sorry, these patches are for linux-2.6.25.
>
> > This series of patches make the block I/O layer and the I/O schedulers
> > be able to determine the right io_context of every I/O.
> >
> > The current implementation of the block I/O layer and the I/O schedulers
> > assume that the current process is the one which issued the given I/O,
> > then use the io_context of this process to control the I/O.
> > But this assumption isn't quite right because several kernel threads
> > will handle I/O requests on behalf of the processes which originated them.
> > This often happens when you want to use device mapper modules.
> >
> > The patches make every bio has a pointer to an io_context, which will
> > be set when it is allocated or cloned. So it makes it possible to find
> > the right io_context from any bio at any place.
> >
> > I'm waiting for your comments.
Can you give a brief summary of what you need this stuff for?
--
Jens Axboe
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dm-devel] Re: [RFC][PATCH 0/10] I/O context inheritance
2008-04-22 14:54 ` Jens Axboe
@ 2008-04-22 18:10 ` Dan Williams
2008-04-23 3:27 ` Hirokazu Takahashi
0 siblings, 1 reply; 15+ messages in thread
From: Dan Williams @ 2008-04-22 18:10 UTC (permalink / raw)
To: device-mapper development; +Cc: Hirokazu Takahashi, linux-kernel, agk
On Tue, Apr 22, 2008 at 7:54 AM, Jens Axboe <jens.axboe@oracle.com> wrote:
> On Tue, Apr 22 2008, Hirokazu Takahashi wrote:
> > Hi,
> >
> > Sorry, these patches are for linux-2.6.25.
> >
> > > This series of patches make the block I/O layer and the I/O schedulers
> > > be able to determine the right io_context of every I/O.
> > >
> > > The current implementation of the block I/O layer and the I/O schedulers
> > > assume that the current process is the one which issued the given I/O,
> > > then use the io_context of this process to control the I/O.
> > > But this assumption isn't quite right because several kernel threads
> > > will handle I/O requests on behalf of the processes which originated them.
> > > This often happens when you want to use device mapper modules.
> > >
> > > The patches make every bio has a pointer to an io_context, which will
> > > be set when it is allocated or cloned. So it makes it possible to find
> > > the right io_context from any bio at any place.
> > >
> > > I'm waiting for your comments.
>
> Can you give a brief summary of what you need this stuff for?
>
I am not sure if this the intended application, but I have been
looking at supporting ionice over software-raid and this series seems
like a necessary first step.
--
Dan
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dm-devel] Re: [RFC][PATCH 0/10] I/O context inheritance
2008-04-22 18:10 ` [dm-devel] " Dan Williams
@ 2008-04-23 3:27 ` Hirokazu Takahashi
0 siblings, 0 replies; 15+ messages in thread
From: Hirokazu Takahashi @ 2008-04-23 3:27 UTC (permalink / raw)
To: dan.j.williams; +Cc: dm-devel, linux-kernel, agk
Hi,
> > On Tue, Apr 22 2008, Hirokazu Takahashi wrote:
> > > Hi,
> > >
> > > Sorry, these patches are for linux-2.6.25.
> > >
> > > > This series of patches make the block I/O layer and the I/O schedulers
> > > > be able to determine the right io_context of every I/O.
> > > >
> > > > The current implementation of the block I/O layer and the I/O schedulers
> > > > assume that the current process is the one which issued the given I/O,
> > > > then use the io_context of this process to control the I/O.
> > > > But this assumption isn't quite right because several kernel threads
> > > > will handle I/O requests on behalf of the processes which originated them.
> > > > This often happens when you want to use device mapper modules.
> > > >
> > > > The patches make every bio has a pointer to an io_context, which will
> > > > be set when it is allocated or cloned. So it makes it possible to find
> > > > the right io_context from any bio at any place.
> > > >
> > > > I'm waiting for your comments.
> >
> > Can you give a brief summary of what you need this stuff for?
> >
>
> I am not sure if this the intended application, but I have been
> looking at supporting ionice over software-raid and this series seems
> like a necessary first step.
Yes, the intention is to make ionice be able to work correctly over
device mapper modules such as software-raid and the multipath driver.
This mechanism is needed since quite a few device mapper modules have
kernel threads, including workqueues, handle I/O requests.
And I also have a plan to use this new feature for block I/O bandwidth
control based on cgroup with some enhancement. With this feature, we
can also trace the iocontext of the process or cgroup which made
a certain page dirtied.
Thank you,
Hirokazu Takahashi.
^ permalink raw reply [flat|nested] 15+ messages in thread