From: Kent Overstreet <koverstreet@google.com>
To: Tejun Heo <tj@kernel.org>
Cc: axboe@kernel.dk, vgoyal@redhat.com, ctalbott@google.com,
rni@google.com, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 7/9] block: implement bio_associate_current()
Date: Thu, 16 Feb 2012 17:19:07 -0800 [thread overview]
Message-ID: <20120217011907.GA15073@google.com> (raw)
In-Reply-To: <1329431878-28300-8-git-send-email-tj@kernel.org>
On Thu, Feb 16, 2012 at 02:37:56PM -0800, Tejun Heo wrote:
> This patch implements bio_associate_current() which associates the
> specified bio with %current. The bio will record the associated ioc
> and blkcg at that point and block layer will use the recorded ones
> regardless of which task actually ends up issuing the bio. bio
> release puts the associated ioc and blkcg.
Excellent.
Why not have bio_associate_current() called from submit_bio()? I would
expect that's what we want most of the time, and the places it's not
(mainly writeback) calling it before submit_bio() would do the right
thing.
It'd make things more consistent - rq_ioc() could be dropped, and
incorrect usage would be more obvious.
> It grabs and remembers ioc and blkcg instead of the task itself
> because task may already be dead by the time the bio is issued making
> ioc and blkcg inaccessible and those are all block layer cares about.
>
> elevator_set_req_fn() is updated such that the bio elvdata is being
> allocated for is available to the elevator.
>
> This doesn't update block cgroup policies yet. Further patches will
> implement the support.
>
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Cc: Vivek Goyal <vgoyal@redhat.com>
> Cc: Kent Overstreet <koverstreet@google.com>
> ---
> block/blk-core.c | 30 +++++++++++++++++-----
> block/cfq-iosched.c | 3 +-
> block/elevator.c | 5 ++-
> fs/bio.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
> include/linux/bio.h | 8 ++++++
> include/linux/blk_types.h | 10 +++++++
> include/linux/elevator.h | 6 +++-
> 7 files changed, 111 insertions(+), 12 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 195c5f7..e6a4f90 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -695,7 +695,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
> }
>
> static struct request *
> -blk_alloc_request(struct request_queue *q, struct io_cq *icq,
> +blk_alloc_request(struct request_queue *q, struct bio *bio, struct io_cq *icq,
> unsigned int flags, gfp_t gfp_mask)
> {
> struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
> @@ -709,7 +709,7 @@ blk_alloc_request(struct request_queue *q, struct io_cq *icq,
>
> if (flags & REQ_ELVPRIV) {
> rq->elv.icq = icq;
> - if (unlikely(elv_set_request(q, rq, gfp_mask))) {
> + if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
> mempool_free(rq, q->rq.rq_pool);
> return NULL;
> }
> @@ -809,6 +809,20 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
> }
>
> /**
> + * rq_ioc - determine io_context for request allocation
> + * @bio: request being allocated is for this bio (can be %NULL)
> + *
> + * Determine io_context to use for request allocation for @bio. May return
> + * %NULL if %current->io_context doesn't exist.
> + */
> +static struct io_context *rq_ioc(struct bio *bio)
> +{
> + if (bio && bio->bi_ioc)
> + return bio->bi_ioc;
> + return current->io_context;
> +}
> +
> +/**
> * get_request - get a free request
> * @q: request_queue to allocate request from
> * @rw_flags: RW and SYNC flags
> @@ -835,7 +849,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
> int may_queue;
> retry:
> et = q->elevator->type;
> - ioc = current->io_context;
> + ioc = rq_ioc(bio);
>
> if (unlikely(blk_queue_dead(q)))
> return NULL;
> @@ -918,14 +932,16 @@ retry:
>
> /* create icq if missing */
> if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
> - ioc = create_io_context(gfp_mask, q->node);
> - if (ioc)
> - icq = ioc_create_icq(ioc, q, gfp_mask);
> + create_io_context(gfp_mask, q->node);
> + ioc = rq_ioc(bio);
> + if (!ioc)
> + goto fail_alloc;
> + icq = ioc_create_icq(ioc, q, gfp_mask);
> if (!icq)
> goto fail_alloc;
> }
>
> - rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
> + rq = blk_alloc_request(q, bio, icq, rw_flags, gfp_mask);
> if (unlikely(!rq))
> goto fail_alloc;
>
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index 00e28a3..b2aabe8 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -3299,7 +3299,8 @@ split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq)
> * Allocate cfq data structures associated with this request.
> */
> static int
> -cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
> +cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
> + gfp_t gfp_mask)
> {
> struct cfq_data *cfqd = q->elevator->elevator_data;
> struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
> diff --git a/block/elevator.c b/block/elevator.c
> index 06d9869..6315a27 100644
> --- a/block/elevator.c
> +++ b/block/elevator.c
> @@ -663,12 +663,13 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
> return NULL;
> }
>
> -int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
> +int elv_set_request(struct request_queue *q, struct request *rq,
> + struct bio *bio, gfp_t gfp_mask)
> {
> struct elevator_queue *e = q->elevator;
>
> if (e->type->ops.elevator_set_req_fn)
> - return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask);
> + return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask);
> return 0;
> }
>
> diff --git a/fs/bio.c b/fs/bio.c
> index b980ecd..142214b 100644
> --- a/fs/bio.c
> +++ b/fs/bio.c
> @@ -19,12 +19,14 @@
> #include <linux/swap.h>
> #include <linux/bio.h>
> #include <linux/blkdev.h>
> +#include <linux/iocontext.h>
> #include <linux/slab.h>
> #include <linux/init.h>
> #include <linux/kernel.h>
> #include <linux/module.h>
> #include <linux/mempool.h>
> #include <linux/workqueue.h>
> +#include <linux/cgroup.h>
> #include <scsi/sg.h> /* for struct sg_iovec */
>
> #include <trace/events/block.h>
> @@ -418,6 +420,7 @@ void bio_put(struct bio *bio)
> * last put frees it
> */
> if (atomic_dec_and_test(&bio->bi_cnt)) {
> + bio_disassociate_task(bio);
> bio->bi_next = NULL;
> bio->bi_destructor(bio);
> }
> @@ -1641,6 +1644,64 @@ bad:
> }
> EXPORT_SYMBOL(bioset_create);
>
> +#ifdef CONFIG_BLK_CGROUP
> +/**
> + * bio_associate_current - associate a bio with %current
> + * @bio: target bio
> + *
> + * Associate @bio with %current if it hasn't been associated yet. Block
> + * layer will treat @bio as if it were issued by %current no matter which
> + * task actually issues it.
> + *
> + * This function takes an extra reference of @task's io_context and blkcg
> + * which will be put when @bio is released. The caller must own @bio,
> + * ensure %current->io_context exists, and is responsible for synchronizing
> + * calls to this function.
> + */
> +int bio_associate_current(struct bio *bio)
> +{
> + struct io_context *ioc;
> + struct cgroup_subsys_state *css;
> +
> + if (bio->bi_ioc)
> + return -EBUSY;
> +
> + ioc = current->io_context;
> + if (!ioc)
> + return -ENOENT;
> +
> + /* acquire active ref on @ioc and associate */
> + get_io_context_active(ioc);
> + bio->bi_ioc = ioc;
> +
> + /* associate blkcg if exists */
> + rcu_read_lock();
> + css = task_subsys_state(current, blkio_subsys_id);
> + if (css && css_tryget(css))
> + bio->bi_css = css;
> + rcu_read_unlock();
> +
> + return 0;
> +}
> +
> +/**
> + * bio_disassociate_task - undo bio_associate_current()
> + * @bio: target bio
> + */
> +void bio_disassociate_task(struct bio *bio)
> +{
> + if (bio->bi_ioc) {
> + put_io_context(bio->bi_ioc);
> + bio->bi_ioc = NULL;
> + }
> + if (bio->bi_css) {
> + css_put(bio->bi_css);
> + bio->bi_css = NULL;
> + }
> +}
> +
> +#endif /* CONFIG_BLK_CGROUP */
> +
> static void __init biovec_init_slabs(void)
> {
> int i;
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index 129a9c0..692d3d5 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -268,6 +268,14 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set
> extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
> extern unsigned int bvec_nr_vecs(unsigned short idx);
>
> +#ifdef CONFIG_BLK_CGROUP
> +int bio_associate_current(struct bio *bio);
> +void bio_disassociate_task(struct bio *bio);
> +#else /* CONFIG_BLK_CGROUP */
> +static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
> +static inline void bio_disassociate_task(struct bio *bio) { }
> +#endif /* CONFIG_BLK_CGROUP */
> +
> /*
> * bio_set is used to allow other portions of the IO system to
> * allocate their own private memory pools for bio and iovec structures.
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 4053cbd..0edb65d 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -14,6 +14,8 @@ struct bio;
> struct bio_integrity_payload;
> struct page;
> struct block_device;
> +struct io_context;
> +struct cgroup_subsys_state;
> typedef void (bio_end_io_t) (struct bio *, int);
> typedef void (bio_destructor_t) (struct bio *);
>
> @@ -66,6 +68,14 @@ struct bio {
> bio_end_io_t *bi_end_io;
>
> void *bi_private;
> +#ifdef CONFIG_BLK_CGROUP
> + /*
> + * Optional ioc and css associated with this bio. Put on bio
> + * release. Read comment on top of bio_associate_current().
> + */
> + struct io_context *bi_ioc;
> + struct cgroup_subsys_state *bi_css;
> +#endif
> #if defined(CONFIG_BLK_DEV_INTEGRITY)
> struct bio_integrity_payload *bi_integrity; /* data integrity */
> #endif
> diff --git a/include/linux/elevator.h b/include/linux/elevator.h
> index 97fb255..c03af76 100644
> --- a/include/linux/elevator.h
> +++ b/include/linux/elevator.h
> @@ -28,7 +28,8 @@ typedef int (elevator_may_queue_fn) (struct request_queue *, int);
>
> typedef void (elevator_init_icq_fn) (struct io_cq *);
> typedef void (elevator_exit_icq_fn) (struct io_cq *);
> -typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
> +typedef int (elevator_set_req_fn) (struct request_queue *, struct request *,
> + struct bio *, gfp_t);
> typedef void (elevator_put_req_fn) (struct request *);
> typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
> typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
> @@ -129,7 +130,8 @@ extern void elv_unregister_queue(struct request_queue *q);
> extern int elv_may_queue(struct request_queue *, int);
> extern void elv_abort_queue(struct request_queue *);
> extern void elv_completed_request(struct request_queue *, struct request *);
> -extern int elv_set_request(struct request_queue *, struct request *, gfp_t);
> +extern int elv_set_request(struct request_queue *q, struct request *rq,
> + struct bio *bio, gfp_t gfp_mask);
> extern void elv_put_request(struct request_queue *, struct request *);
> extern void elv_drain_elevator(struct request_queue *);
>
> --
> 1.7.7.3
>
next prev parent reply other threads:[~2012-02-17 1:19 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-16 22:37 [PATCHSET] blkcg: update locking and fix stacking Tejun Heo
2012-02-16 22:37 ` [PATCH 1/9] blkcg: use double locking instead of RCU for blkg synchronization Tejun Heo
2012-02-16 22:37 ` [PATCH 2/9] blkcg: drop unnecessary RCU locking Tejun Heo
2012-02-17 16:19 ` Vivek Goyal
2012-02-17 17:07 ` Tejun Heo
2012-02-17 17:14 ` Tejun Heo
2012-02-17 16:47 ` Vivek Goyal
2012-02-17 17:11 ` Tejun Heo
2012-02-17 17:28 ` Vivek Goyal
2012-02-17 17:43 ` Tejun Heo
2012-02-17 18:08 ` Vivek Goyal
2012-02-17 18:16 ` Tejun Heo
2012-02-22 0:49 ` [PATCH UPDATED " Tejun Heo
2012-02-16 22:37 ` [PATCH 3/9] block: restructure get_request() Tejun Heo
2012-02-16 22:37 ` [PATCH 4/9] block: interface update for ioc/icq creation functions Tejun Heo
2012-02-16 22:37 ` [PATCH 5/9] block: ioc_task_link() can't fail Tejun Heo
2012-02-17 20:41 ` Vivek Goyal
2012-02-17 22:18 ` Tejun Heo
2012-02-16 22:37 ` [PATCH 6/9] block: add io_context->active_ref Tejun Heo
2012-02-16 22:37 ` [PATCH 7/9] block: implement bio_associate_current() Tejun Heo
2012-02-17 1:19 ` Kent Overstreet [this message]
2012-02-17 22:14 ` Tejun Heo
2012-02-17 22:34 ` Vivek Goyal
2012-02-17 22:41 ` Tejun Heo
2012-02-17 22:51 ` Vivek Goyal
2012-02-17 22:57 ` Tejun Heo
2012-02-20 14:22 ` Vivek Goyal
2012-02-20 16:59 ` Tejun Heo
2012-02-20 19:14 ` Vivek Goyal
2012-02-20 21:21 ` Tejun Heo
2012-02-27 23:12 ` Chris Wright
2012-02-28 14:10 ` Vivek Goyal
2012-02-28 17:01 ` Chris Wright
2012-02-28 20:11 ` Stefan Hajnoczi
2012-02-20 14:36 ` Vivek Goyal
2012-02-20 17:01 ` Tejun Heo
2012-02-20 19:16 ` Vivek Goyal
2012-02-20 21:06 ` Tejun Heo
2012-02-20 21:10 ` Vivek Goyal
2012-02-17 22:56 ` Vivek Goyal
2012-02-17 23:06 ` Tejun Heo
2012-02-17 21:33 ` Vivek Goyal
2012-02-17 22:03 ` Tejun Heo
2012-02-17 22:29 ` Vivek Goyal
2012-02-17 22:38 ` Tejun Heo
2012-02-17 22:42 ` Tejun Heo
2012-02-16 22:37 ` [PATCH 8/9] block: make block cgroup policies follow bio task association Tejun Heo
2012-02-16 22:37 ` [PATCH 9/9] block: make blk-throttle preserve the issuing task on delayed bios Tejun Heo
2012-02-17 21:58 ` Vivek Goyal
2012-02-17 22:17 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120217011907.GA15073@google.com \
--to=koverstreet@google.com \
--cc=axboe@kernel.dk \
--cc=ctalbott@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=rni@google.com \
--cc=tj@kernel.org \
--cc=vgoyal@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).