From: David Sterba <dave@jikos.cz>
To: Arne Jansen <sensille@gmx.net>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 1/5] btrfs: extend readahead interface
Date: Wed, 9 May 2012 16:48:01 +0200 [thread overview]
Message-ID: <20120509144800.GL19331@twin.jikos.cz> (raw)
In-Reply-To: <32d2d06e11e45bde2f092d647991b712d2e825b6.1334241664.git.sensille@gmx.net>
On Thu, Apr 12, 2012 at 05:54:38PM +0200, Arne Jansen wrote:
> @@ -97,30 +119,87 @@ struct reada_machine_work {
> +/*
> + * this is the default callback for readahead. It just descends into the
> + * tree within the range given at creation. if an error occurs, just cut
> + * this part of the tree
> + */
> +static void readahead_descend(struct btrfs_root *root, struct reada_control *rc,
> + u64 wanted_generation, struct extent_buffer *eb,
> + u64 start, int err, struct btrfs_key *top,
> + void *ctx)
> +{
> + int nritems;
> + u64 generation;
> + int level;
> + int i;
> +
> + BUG_ON(err == -EAGAIN); /* FIXME: not yet implemented, don't cancel
> + * readahead with default callback */
> +
> + if (err || eb == NULL) {
> + /*
> + * this is the error case, the extent buffer has not been
> + * read correctly. We won't access anything from it and
> + * just cleanup our data structures. Effectively this will
> + * cut the branch below this node from read ahead.
> + */
> + return;
> + }
> +
> + level = btrfs_header_level(eb);
> + if (level == 0) {
> + /*
> + * if this is a leaf, ignore the content.
> + */
> + return;
> + }
> +
> + nritems = btrfs_header_nritems(eb);
> + generation = btrfs_header_generation(eb);
> +
> + /*
> + * if the generation doesn't match, just ignore this node.
> + * This will cut off a branch from prefetch. Alternatively one could
> + * start a new (sub-) prefetch for this branch, starting again from
> + * root.
> + */
> + if (wanted_generation != generation)
> + return;
I think I saw passing wanted_generation = 0 somewheree, but cannot find
it now again. Is it an expected value for the default RA callback,
meaning eg. 'any generation I find' ?
> +
> + for (i = 0; i < nritems; i++) {
> + u64 n_gen;
> + struct btrfs_key key;
> + struct btrfs_key next_key;
> + u64 bytenr;
> +
> + btrfs_node_key_to_cpu(eb, &key, i);
> + if (i + 1 < nritems)
> + btrfs_node_key_to_cpu(eb, &next_key, i + 1);
> + else
> + next_key = *top;
> + bytenr = btrfs_node_blockptr(eb, i);
> + n_gen = btrfs_node_ptr_generation(eb, i);
> +
> + if (btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
> + btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
> + reada_add_block(rc, bytenr, &next_key,
> + level - 1, n_gen, ctx);
> + }
> +}
>
> @@ -142,65 +221,21 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
> re->scheduled_for = NULL;
> spin_unlock(&re->lock);
>
> - if (err == 0) {
> - nritems = level ? btrfs_header_nritems(eb) : 0;
> - generation = btrfs_header_generation(eb);
> - /*
> - * FIXME: currently we just set nritems to 0 if this is a leaf,
> - * effectively ignoring the content. In a next step we could
> - * trigger more readahead depending from the content, e.g.
> - * fetch the checksums for the extents in the leaf.
> - */
> - } else {
> + /*
> + * call hooks for all registered readaheads
> + */
> + list_for_each_entry(rec, &list, list) {
> + btrfs_tree_read_lock(eb);
> /*
> - * this is the error case, the extent buffer has not been
> - * read correctly. We won't access anything from it and
> - * just cleanup our data structures. Effectively this will
> - * cut the branch below this node from read ahead.
> + * we set the lock to blocking, as the callback might want to
> + * sleep on allocations.
What about a more finer control given to the callbacks? The blocking
lock may be unnecessary if the callback does not sleep.
My idea is to add a field to 'struct reada_uptodate_ctx', preset with
BTRFS_READ_LOCK by default, but let the RA user to set it to its needs.
> */
> - nritems = 0;
> - generation = 0;
> + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
> + rec->rc->callback(root, rec->rc, rec->generation, eb, start,
> + err, &re->top, rec->ctx);
> + btrfs_tree_read_unlock_blocking(eb);
> }
>
> @@ -521,12 +593,87 @@ static void reada_control_release(struct kref *kref)
> +/*
> + * context to pass from reada_add_block to worker in case the extent is
> + * already uptodate in memory
> + */
> +struct reada_uptodate_ctx {
> + struct btrfs_key top;
> + struct extent_buffer *eb;
> + struct reada_control *rc;
> + u64 logical;
> + u64 generation;
> + void *ctx;
> + struct btrfs_work work;
eg.
int lock_type;
int want_lock_blocking;
> +};
> +
> +/* worker for immediate processing of uptodate blocks */
> +static void reada_add_block_uptodate(struct btrfs_work *work)
> +{
> + struct reada_uptodate_ctx *ruc;
> +
> + ruc = container_of(work, struct reada_uptodate_ctx, work);
> +
> + btrfs_tree_read_lock(ruc->eb);
> + /*
> + * we set the lock to blocking, as the callback might want to sleep
> + * on allocations.
> + */
> + btrfs_set_lock_blocking_rw(ruc->eb, BTRFS_READ_LOCK);
same here
> + ruc->rc->callback(ruc->rc->root, ruc->rc, ruc->generation, ruc->eb,
> + ruc->logical, 0, &ruc->top, ruc->ctx);
> + btrfs_tree_read_unlock_blocking(ruc->eb);
> +
> + reada_control_elem_put(ruc->rc);
> + free_extent_buffer(ruc->eb);
> + kfree(ruc);
> +}
> +
> @@ -886,17 +1074,18 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
> .offset = (u64)-1
> };
>
> - rc = kzalloc(sizeof(*rc), GFP_NOFS);
> + rc = btrfs_reada_alloc(parent, root, key_start, key_end, callback);
> if (!rc)
> - return ERR_PTR(-ENOMEM);
> + return -ENOMEM;
>
> - rc->root = root;
> - rc->key_start = *key_start;
> - rc->key_end = *key_end;
> - atomic_set(&rc->elems, 0);
> - init_waitqueue_head(&rc->wait);
> - kref_init(&rc->refcnt);
> - kref_get(&rc->refcnt); /* one ref for having elements */
> + if (rcp) {
> + *rcp = rc;
> + /*
> + * as we return the rc, get an addition ref on it for
(additional)
> + * the caller
> + */
> + kref_get(&rc->refcnt);
> + }
>
> node = btrfs_root_node(root);
> start = node->start;
> @@ -904,35 +1093,36 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
> +int btrfs_reada_wait(struct reada_control *rc)
> {
> - struct reada_control *rc = handle;
> + struct btrfs_fs_info *fs_info = rc->root->fs_info;
> + int i;
>
> while (atomic_read(&rc->elems)) {
> wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
> - 5 * HZ);
> - dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0);
> + 1 * HZ);
I think it's recommended to use msecs_to_jiffies instead of HZ.
> + dump_devs(fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
> + printk(KERN_DEBUG "reada_wait on %p: %d elems\n", rc,
> + atomic_read(&rc->elems));
> }
>
> - dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0);
> + dump_devs(fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
>
> kref_put(&rc->refcnt, reada_control_release);
>
> return 0;
> }
--
The reference counting changed in a non-trivial way, I'd like to have
another look just at that to be sure, but from current review round it
looks ok.
The RA changes look independet, do you intend to submit it earlier that
with the whole droptree? It'd get wider testing.
david
next prev parent reply other threads:[~2012-05-09 14:48 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-04-12 15:54 [PATCH 0/5] btrfs: snapshot deletion via readahead Arne Jansen
2012-04-12 15:54 ` [PATCH 1/5] btrfs: extend readahead interface Arne Jansen
2012-05-09 14:48 ` David Sterba [this message]
2012-05-17 13:47 ` Arne Jansen
2012-04-12 15:54 ` [PATCH 2/5] btrfs: add droptree inode Arne Jansen
2012-04-12 15:54 ` [PATCH 3/5] btrfs: droptree structures and initialization Arne Jansen
2012-04-12 15:54 ` [PATCH 4/5] btrfs: droptree implementation Arne Jansen
2012-04-13 2:53 ` Tsutomu Itoh
2012-04-13 6:48 ` Arne Jansen
2012-04-12 15:54 ` [PATCH 5/5] btrfs: use droptree for snapshot deletion Arne Jansen
2012-04-13 3:40 ` [PATCH 0/5] btrfs: snapshot deletion via readahead Liu Bo
2012-04-13 4:54 ` cwillu
2012-04-13 6:53 ` Arne Jansen
2012-04-13 7:10 ` Liu Bo
2012-04-13 7:19 ` Arne Jansen
2012-04-13 7:43 ` Liu Bo
2012-04-17 7:35 ` Arne Jansen
2012-04-17 8:21 ` Liu Bo
2012-04-27 3:16 ` Liu Bo
2012-04-27 6:13 ` Arne Jansen
2012-04-27 6:48 ` Liu Bo
2012-04-13 7:20 ` Liu Bo
2012-04-13 10:31 ` Arne Jansen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120509144800.GL19331@twin.jikos.cz \
--to=dave@jikos.cz \
--cc=linux-btrfs@vger.kernel.org \
--cc=sensille@gmx.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).