From: Kent Overstreet <koverstreet@google.com>
To: Tejun Heo <tj@kernel.org>
Cc: axboe@kernel.dk, vgoyal@redhat.com, ctalbott@google.com,
rni@google.com, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 7/9] block: implement bio_associate_current()
Date: Thu, 16 Feb 2012 17:19:07 -0800 [thread overview]
Message-ID: <20120217011907.GA15073@google.com> (raw)
In-Reply-To: <1329431878-28300-8-git-send-email-tj@kernel.org>
On Thu, Feb 16, 2012 at 02:37:56PM -0800, Tejun Heo wrote:
> This patch implements bio_associate_current() which associates the
> specified bio with %current. The bio will record the associated ioc
> and blkcg at that point and block layer will use the recorded ones
> regardless of which task actually ends up issuing the bio. bio
> release puts the associated ioc and blkcg.
Excellent.
Why not have bio_associate_current() called from submit_bio()? I would
expect that's what we want most of the time, and the places it's not
(mainly writeback) calling it before submit_bio() would do the right
thing.
It'd make things more consistent - rq_ioc() could be dropped, and
incorrect usage would be more obvious.
> It grabs and remembers ioc and blkcg instead of the task itself
> because task may already be dead by the time the bio is issued making
> ioc and blkcg inaccessible and those are all block layer cares about.
>
> elevator_set_req_fn() is updated such that the bio elvdata is being
> allocated for is available to the elevator.
>
> This doesn't update block cgroup policies yet. Further patches will
> implement the support.
>
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Cc: Vivek Goyal <vgoyal@redhat.com>
> Cc: Kent Overstreet <koverstreet@google.com>
> ---
> block/blk-core.c | 30 +++++++++++++++++-----
> block/cfq-iosched.c | 3 +-
> block/elevator.c | 5 ++-
> fs/bio.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
> include/linux/bio.h | 8 ++++++
> include/linux/blk_types.h | 10 +++++++
> include/linux/elevator.h | 6 +++-
> 7 files changed, 111 insertions(+), 12 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 195c5f7..e6a4f90 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -695,7 +695,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
> }
>
> static struct request *
> -blk_alloc_request(struct request_queue *q, struct io_cq *icq,
> +blk_alloc_request(struct request_queue *q, struct bio *bio, struct io_cq *icq,
> unsigned int flags, gfp_t gfp_mask)
> {
> struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
> @@ -709,7 +709,7 @@ blk_alloc_request(struct request_queue *q, struct io_cq *icq,
>
> if (flags & REQ_ELVPRIV) {
> rq->elv.icq = icq;
> - if (unlikely(elv_set_request(q, rq, gfp_mask))) {
> + if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
> mempool_free(rq, q->rq.rq_pool);
> return NULL;
> }
> @@ -809,6 +809,20 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
> }
>
> /**
> + * rq_ioc - determine io_context for request allocation
> + * @bio: request being allocated is for this bio (can be %NULL)
> + *
> + * Determine io_context to use for request allocation for @bio. May return
> + * %NULL if %current->io_context doesn't exist.
> + */
> +static struct io_context *rq_ioc(struct bio *bio)
> +{
> + if (bio && bio->bi_ioc)
> + return bio->bi_ioc;
> + return current->io_context;
> +}
> +
> +/**
> * get_request - get a free request
> * @q: request_queue to allocate request from
> * @rw_flags: RW and SYNC flags
> @@ -835,7 +849,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
> int may_queue;
> retry:
> et = q->elevator->type;
> - ioc = current->io_context;
> + ioc = rq_ioc(bio);
>
> if (unlikely(blk_queue_dead(q)))
> return NULL;
> @@ -918,14 +932,16 @@ retry:
>
> /* create icq if missing */
> if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
> - ioc = create_io_context(gfp_mask, q->node);
> - if (ioc)
> - icq = ioc_create_icq(ioc, q, gfp_mask);
> + create_io_context(gfp_mask, q->node);
> + ioc = rq_ioc(bio);
> + if (!ioc)
> + goto fail_alloc;
> + icq = ioc_create_icq(ioc, q, gfp_mask);
> if (!icq)
> goto fail_alloc;
> }
>
> - rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
> + rq = blk_alloc_request(q, bio, icq, rw_flags, gfp_mask);
> if (unlikely(!rq))
> goto fail_alloc;
>
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index 00e28a3..b2aabe8 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -3299,7 +3299,8 @@ split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq)
> * Allocate cfq data structures associated with this request.
> */
> static int
> -cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
> +cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
> + gfp_t gfp_mask)
> {
> struct cfq_data *cfqd = q->elevator->elevator_data;
> struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
> diff --git a/block/elevator.c b/block/elevator.c
> index 06d9869..6315a27 100644
> --- a/block/elevator.c
> +++ b/block/elevator.c
> @@ -663,12 +663,13 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
> return NULL;
> }
>
> -int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
> +int elv_set_request(struct request_queue *q, struct request *rq,
> + struct bio *bio, gfp_t gfp_mask)
> {
> struct elevator_queue *e = q->elevator;
>
> if (e->type->ops.elevator_set_req_fn)
> - return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask);
> + return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask);
> return 0;
> }
>
> diff --git a/fs/bio.c b/fs/bio.c
> index b980ecd..142214b 100644
> --- a/fs/bio.c
> +++ b/fs/bio.c
> @@ -19,12 +19,14 @@
> #include <linux/swap.h>
> #include <linux/bio.h>
> #include <linux/blkdev.h>
> +#include <linux/iocontext.h>
> #include <linux/slab.h>
> #include <linux/init.h>
> #include <linux/kernel.h>
> #include <linux/module.h>
> #include <linux/mempool.h>
> #include <linux/workqueue.h>
> +#include <linux/cgroup.h>
> #include <scsi/sg.h> /* for struct sg_iovec */
>
> #include <trace/events/block.h>
> @@ -418,6 +420,7 @@ void bio_put(struct bio *bio)
> * last put frees it
> */
> if (atomic_dec_and_test(&bio->bi_cnt)) {
> + bio_disassociate_task(bio);
> bio->bi_next = NULL;
> bio->bi_destructor(bio);
> }
> @@ -1641,6 +1644,64 @@ bad:
> }
> EXPORT_SYMBOL(bioset_create);
>
> +#ifdef CONFIG_BLK_CGROUP
> +/**
> + * bio_associate_current - associate a bio with %current
> + * @bio: target bio
> + *
> + * Associate @bio with %current if it hasn't been associated yet. Block
> + * layer will treat @bio as if it were issued by %current no matter which
> + * task actually issues it.
> + *
> + * This function takes an extra reference of @task's io_context and blkcg
> + * which will be put when @bio is released. The caller must own @bio,
> + * ensure %current->io_context exists, and is responsible for synchronizing
> + * calls to this function.
> + */
> +int bio_associate_current(struct bio *bio)
> +{
> + struct io_context *ioc;
> + struct cgroup_subsys_state *css;
> +
> + if (bio->bi_ioc)
> + return -EBUSY;
> +
> + ioc = current->io_context;
> + if (!ioc)
> + return -ENOENT;
> +
> + /* acquire active ref on @ioc and associate */
> + get_io_context_active(ioc);
> + bio->bi_ioc = ioc;
> +
> + /* associate blkcg if exists */
> + rcu_read_lock();
> + css = task_subsys_state(current, blkio_subsys_id);
> + if (css && css_tryget(css))
> + bio->bi_css = css;
> + rcu_read_unlock();
> +
> + return 0;
> +}
> +
> +/**
> + * bio_disassociate_task - undo bio_associate_current()
> + * @bio: target bio
> + */
> +void bio_disassociate_task(struct bio *bio)
> +{
> + if (bio->bi_ioc) {
> + put_io_context(bio->bi_ioc);
> + bio->bi_ioc = NULL;
> + }
> + if (bio->bi_css) {
> + css_put(bio->bi_css);
> + bio->bi_css = NULL;
> + }
> +}
> +
> +#endif /* CONFIG_BLK_CGROUP */
> +
> static void __init biovec_init_slabs(void)
> {
> int i;
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index 129a9c0..692d3d5 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -268,6 +268,14 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set
> extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
> extern unsigned int bvec_nr_vecs(unsigned short idx);
>
> +#ifdef CONFIG_BLK_CGROUP
> +int bio_associate_current(struct bio *bio);
> +void bio_disassociate_task(struct bio *bio);
> +#else /* CONFIG_BLK_CGROUP */
> +static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
> +static inline void bio_disassociate_task(struct bio *bio) { }
> +#endif /* CONFIG_BLK_CGROUP */
> +
> /*
> * bio_set is used to allow other portions of the IO system to
> * allocate their own private memory pools for bio and iovec structures.
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 4053cbd..0edb65d 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -14,6 +14,8 @@ struct bio;
> struct bio_integrity_payload;
> struct page;
> struct block_device;
> +struct io_context;
> +struct cgroup_subsys_state;
> typedef void (bio_end_io_t) (struct bio *, int);
> typedef void (bio_destructor_t) (struct bio *);
>
> @@ -66,6 +68,14 @@ struct bio {
> bio_end_io_t *bi_end_io;
>
> void *bi_private;
> +#ifdef CONFIG_BLK_CGROUP
> + /*
> + * Optional ioc and css associated with this bio. Put on bio
> + * release. Read comment on top of bio_associate_current().
> + */
> + struct io_context *bi_ioc;
> + struct cgroup_subsys_state *bi_css;
> +#endif
> #if defined(CONFIG_BLK_DEV_INTEGRITY)
> struct bio_integrity_payload *bi_integrity; /* data integrity */
> #endif
> diff --git a/include/linux/elevator.h b/include/linux/elevator.h
> index 97fb255..c03af76 100644
> --- a/include/linux/elevator.h
> +++ b/include/linux/elevator.h
> @@ -28,7 +28,8 @@ typedef int (elevator_may_queue_fn) (struct request_queue *, int);
>
> typedef void (elevator_init_icq_fn) (struct io_cq *);
> typedef void (elevator_exit_icq_fn) (struct io_cq *);
> -typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
> +typedef int (elevator_set_req_fn) (struct request_queue *, struct request *,
> + struct bio *, gfp_t);
> typedef void (elevator_put_req_fn) (struct request *);
> typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
> typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
> @@ -129,7 +130,8 @@ extern void elv_unregister_queue(struct request_queue *q);
> extern int elv_may_queue(struct request_queue *, int);
> extern void elv_abort_queue(struct request_queue *);
> extern void elv_completed_request(struct request_queue *, struct request *);
> -extern int elv_set_request(struct request_queue *, struct request *, gfp_t);
> +extern int elv_set_request(struct request_queue *q, struct request *rq,
> + struct bio *bio, gfp_t gfp_mask);
> extern void elv_put_request(struct request_queue *, struct request *);
> extern void elv_drain_elevator(struct request_queue *);
>
> --
> 1.7.7.3
>
next prev parent reply other threads:[~2012-02-17 1:19 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-16 22:37 [PATCHSET] blkcg: update locking and fix stacking Tejun Heo
2012-02-16 22:37 ` [PATCH 1/9] blkcg: use double locking instead of RCU for blkg synchronization Tejun Heo
2012-02-16 22:37 ` [PATCH 2/9] blkcg: drop unnecessary RCU locking Tejun Heo
2012-02-17 16:19 ` Vivek Goyal
2012-02-17 17:07 ` Tejun Heo
2012-02-17 17:14 ` Tejun Heo
2012-02-17 16:47 ` Vivek Goyal
2012-02-17 17:11 ` Tejun Heo
2012-02-17 17:28 ` Vivek Goyal
2012-02-17 17:43 ` Tejun Heo
2012-02-17 18:08 ` Vivek Goyal
2012-02-17 18:16 ` Tejun Heo
2012-02-22 0:49 ` [PATCH UPDATED " Tejun Heo
2012-02-16 22:37 ` [PATCH 3/9] block: restructure get_request() Tejun Heo
2012-02-16 22:37 ` [PATCH 4/9] block: interface update for ioc/icq creation functions Tejun Heo
2012-02-16 22:37 ` [PATCH 5/9] block: ioc_task_link() can't fail Tejun Heo
2012-02-17 20:41 ` Vivek Goyal
2012-02-17 22:18 ` Tejun Heo
2012-02-16 22:37 ` [PATCH 6/9] block: add io_context->active_ref Tejun Heo
2012-02-16 22:37 ` [PATCH 7/9] block: implement bio_associate_current() Tejun Heo
2012-02-17 1:19 ` Kent Overstreet [this message]
2012-02-17 22:14 ` Tejun Heo
2012-02-17 22:34 ` Vivek Goyal
2012-02-17 22:41 ` Tejun Heo
2012-02-17 22:51 ` Vivek Goyal
2012-02-17 22:57 ` Tejun Heo
2012-02-20 14:22 ` Vivek Goyal
2012-02-20 16:59 ` Tejun Heo
2012-02-20 19:14 ` Vivek Goyal
2012-02-20 21:21 ` Tejun Heo
2012-02-27 23:12 ` Chris Wright
2012-02-28 14:10 ` Vivek Goyal
2012-02-28 17:01 ` Chris Wright
2012-02-28 20:11 ` Stefan Hajnoczi
2012-02-20 14:36 ` Vivek Goyal
2012-02-20 17:01 ` Tejun Heo
2012-02-20 19:16 ` Vivek Goyal
2012-02-20 21:06 ` Tejun Heo
2012-02-20 21:10 ` Vivek Goyal
2012-02-17 22:56 ` Vivek Goyal
2012-02-17 23:06 ` Tejun Heo
2012-02-17 21:33 ` Vivek Goyal
2012-02-17 22:03 ` Tejun Heo
2012-02-17 22:29 ` Vivek Goyal
2012-02-17 22:38 ` Tejun Heo
2012-02-17 22:42 ` Tejun Heo
2012-02-16 22:37 ` [PATCH 8/9] block: make block cgroup policies follow bio task association Tejun Heo
2012-02-16 22:37 ` [PATCH 9/9] block: make blk-throttle preserve the issuing task on delayed bios Tejun Heo
2012-02-17 21:58 ` Vivek Goyal
2012-02-17 22:17 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120217011907.GA15073@google.com \
--to=koverstreet@google.com \
--cc=axboe@kernel.dk \
--cc=ctalbott@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=rni@google.com \
--cc=tj@kernel.org \
--cc=vgoyal@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.