From: Benny Halevy <bhalevy.lists@gmail.com>
To: Jim Rees <rees@umich.edu>
Cc: linux-nfs@vger.kernel.org, peter honeyman <honey@citi.umich.edu>
Subject: Re: [PATCH 21/34] pnfsblock: SPLITME: add extent manipulation functions
Date: Tue, 14 Jun 2011 11:40:24 -0400 [thread overview]
Message-ID: <4DF780E8.8060300@gmail.com> (raw)
In-Reply-To: <7075734d5615269fb396abdbf8d2b30cf602acc1.1307921138.git.rees@umich.edu>
Regarding the "SPLITME", please either fix the commit message
or split the patch :)
(I'm in favour of keeping this patch as it is)
Benny
On 2011-06-12 19:44, Jim Rees wrote:
> From: Fred Isaman <iisaman@citi.umich.edu>
> as it i
> Adds working implementations of various support functions
> to handle INVAL extents, needed by writes, such as
> mark_initialized_sectors and is_sector_initialized.
>
> SPLIT: this needs to be split into the exported functions, and the
> range support functions (which will be replaced eventually.)
>
> [pnfsblock: fix 64-bit compiler warnings for extent manipulation]
> Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
> ---
> fs/nfs/blocklayout/blocklayout.h | 30 ++++-
> fs/nfs/blocklayout/extents.c | 253 ++++++++++++++++++++++++++++++++++++++
> 2 files changed, 281 insertions(+), 2 deletions(-)
>
> diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
> index 06aa36a..a231d49 100644
> --- a/fs/nfs/blocklayout/blocklayout.h
> +++ b/fs/nfs/blocklayout/blocklayout.h
> @@ -35,6 +35,8 @@
> #include <linux/nfs_fs.h>
> #include "../pnfs.h"
>
> +#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> 9)
> +
> #define PG_pnfserr PG_owner_priv_1
> #define PagePnfsErr(page) test_bit(PG_pnfserr, &(page)->flags)
> #define SetPagePnfsErr(page) set_bit(PG_pnfserr, &(page)->flags)
> @@ -101,8 +103,23 @@ enum exstate4 {
> PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */
> };
>
> +#define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */
> +
> +struct my_tree_t {
> + sector_t mtt_step_size; /* Internal sector alignment */
> + struct list_head mtt_stub; /* Should be a radix tree */
> +};
> +
> struct pnfs_inval_markings {
> - /* STUB */
> + spinlock_t im_lock;
> + struct my_tree_t im_tree; /* Sectors that need LAYOUTCOMMIT */
> + sector_t im_block_size; /* Server blocksize in sectors */
> +};
> +
> +struct pnfs_inval_tracking {
> + struct list_head it_link;
> + int it_sector;
> + int it_tags;
> };
>
> /* sector_t fields are all in 512-byte sectors */
> @@ -121,7 +138,11 @@ struct pnfs_block_extent {
> static inline void
> INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
> {
> - /* STUB */
> + spin_lock_init(&marks->im_lock);
> + INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
> + marks->im_block_size = blocksize;
> + marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
> + blocksize);
> }
>
> enum extentclass4 {
> @@ -222,8 +243,13 @@ void free_block_dev(struct pnfs_block_dev *bdev);
> struct pnfs_block_extent *
> find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
> struct pnfs_block_extent **cow_read);
> +int mark_initialized_sectors(struct pnfs_inval_markings *marks,
> + sector_t offset, sector_t length,
> + sector_t **pages);
> void put_extent(struct pnfs_block_extent *be);
> struct pnfs_block_extent *alloc_extent(void);
> +struct pnfs_block_extent *get_extent(struct pnfs_block_extent *be);
> +int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect);
> int add_and_merge_extent(struct pnfs_block_layout *bl,
> struct pnfs_block_extent *new);
>
> diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
> index f0b3f13..3d36f66 100644
> --- a/fs/nfs/blocklayout/extents.c
> +++ b/fs/nfs/blocklayout/extents.c
> @@ -33,6 +33,259 @@
> #include "blocklayout.h"
> #define NFSDBG_FACILITY NFSDBG_PNFS_LD
>
> +/* Bit numbers */
> +#define EXTENT_INITIALIZED 0
> +#define EXTENT_WRITTEN 1
> +#define EXTENT_IN_COMMIT 2
> +#define INTERNAL_EXISTS MY_MAX_TAGS
> +#define INTERNAL_MASK ((1 << INTERNAL_EXISTS) - 1)
> +
> +/* Returns largest t<=s s.t. t%base==0 */
> +static inline sector_t normalize(sector_t s, int base)
> +{
> + sector_t tmp = s; /* Since do_div modifies its argument */
> + return s - do_div(tmp, base);
> +}
> +
> +static inline sector_t normalize_up(sector_t s, int base)
> +{
> + return normalize(s + base - 1, base);
> +}
> +
> +/* Complete stub using list while determine API wanted */
> +
> +/* Returns tags, or negative */
> +static int32_t _find_entry(struct my_tree_t *tree, u64 s)
> +{
> + struct pnfs_inval_tracking *pos;
> +
> + dprintk("%s(%llu) enter\n", __func__, s);
> + list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
> + if (pos->it_sector > s)
> + continue;
> + else if (pos->it_sector == s)
> + return pos->it_tags & INTERNAL_MASK;
> + else
> + break;
> + }
> + return -ENOENT;
> +}
> +
> +static inline
> +int _has_tag(struct my_tree_t *tree, u64 s, int32_t tag)
> +{
> + int32_t tags;
> +
> + dprintk("%s(%llu, %i) enter\n", __func__, s, tag);
> + s = normalize(s, tree->mtt_step_size);
> + tags = _find_entry(tree, s);
> + if ((tags < 0) || !(tags & (1 << tag)))
> + return 0;
> + else
> + return 1;
> +}
> +
> +/* Creates entry with tag, or if entry already exists, unions tag to it.
> + * If storage is not NULL, newly created entry will use it.
> + * Returns number of entries added, or negative on error.
> + */
> +static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
> + struct pnfs_inval_tracking *storage)
> +{
> + int found = 0;
> + struct pnfs_inval_tracking *pos;
> +
> + dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
> + list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
> + if (pos->it_sector > s)
> + continue;
> + else if (pos->it_sector == s) {
> + found = 1;
> + break;
> + } else
> + break;
> + }
> + if (found) {
> + pos->it_tags |= (1 << tag);
> + return 0;
> + } else {
> + struct pnfs_inval_tracking *new;
> + if (storage)
> + new = storage;
> + else {
> + new = kmalloc(sizeof(*new), GFP_KERNEL);
> + if (!new)
> + return -ENOMEM;
> + }
> + new->it_sector = s;
> + new->it_tags = (1 << tag);
> + list_add(&new->it_link, &pos->it_link);
> + return 1;
> + }
> +}
> +
> +/* XXXX Really want option to not create */
> +/* Over range, unions tag with existing entries, else creates entry with tag */
> +static int _set_range(struct my_tree_t *tree, int32_t tag, u64 s, u64 length)
> +{
> + u64 i;
> +
> + dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length);
> + for (i = normalize(s, tree->mtt_step_size); i < s + length;
> + i += tree->mtt_step_size)
> + if (_add_entry(tree, i, tag, NULL))
> + return -ENOMEM;
> + return 0;
> +}
> +
> +/* Ensure that future operations on given range of tree will not malloc */
> +static int _preload_range(struct my_tree_t *tree, u64 offset, u64 length)
> +{
> + u64 start, end, s;
> + int count, i, used = 0, status = -ENOMEM;
> + struct pnfs_inval_tracking **storage;
> +
> + dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
> + start = normalize(offset, tree->mtt_step_size);
> + end = normalize_up(offset + length, tree->mtt_step_size);
> + count = (int)(end - start) / (int)tree->mtt_step_size;
> +
> + /* Pre-malloc what memory we might need */
> + storage = kmalloc(sizeof(*storage) * count, GFP_KERNEL);
> + if (!storage)
> + return -ENOMEM;
> + for (i = 0; i < count; i++) {
> + storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking),
> + GFP_KERNEL);
> + if (!storage[i])
> + goto out_cleanup;
> + }
> +
> + /* Now need lock - HOW??? */
> +
> + for (s = start; s < end; s += tree->mtt_step_size)
> + used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
> +
> + /* Unlock - HOW??? */
> + status = 0;
> +
> + out_cleanup:
> + for (i = used; i < count; i++) {
> + if (!storage[i])
> + break;
> + kfree(storage[i]);
> + }
> + kfree(storage);
> + return status;
> +}
> +
> +static void set_needs_init(sector_t *array, sector_t offset)
> +{
> + sector_t *p = array;
> +
> + dprintk("%s enter\n", __func__);
> + if (!p)
> + return;
> + while (*p < offset)
> + p++;
> + if (*p == offset)
> + return;
> + else if (*p == ~0) {
> + *p++ = offset;
> + *p = ~0;
> + return;
> + } else {
> + sector_t *save = p;
> + dprintk("%s Adding %llu\n", __func__, (u64)offset);
> + while (*p != ~0)
> + p++;
> + p++;
> + memmove(save + 1, save, (char *)p - (char *)save);
> + *save = offset;
> + return;
> + }
> +}
> +
> +/* We are relying on page lock to serialize this */
> +int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect)
> +{
> + int rv;
> +
> + spin_lock(&marks->im_lock);
> + rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
> + spin_unlock(&marks->im_lock);
> + return rv;
> +}
> +
> +/* Marks sectors in [offest, offset_length) as having been initialized.
> + * All lengths are step-aligned, where step is min(pagesize, blocksize).
> + * Notes where partial block is initialized, and helps prepare it for
> + * complete initialization later.
> + */
> +/* Currently assumes offset is page-aligned */
> +int mark_initialized_sectors(struct pnfs_inval_markings *marks,
> + sector_t offset, sector_t length,
> + sector_t **pages)
> +{
> + sector_t s, start, end;
> + sector_t *array = NULL; /* Pages to mark */
> +
> + dprintk("%s(offset=%llu,len=%llu) enter\n",
> + __func__, (u64)offset, (u64)length);
> + s = max((sector_t) 3,
> + 2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
> + dprintk("%s set max=%llu\n", __func__, (u64)s);
> + if (pages) {
> + array = kmalloc(s * sizeof(sector_t), GFP_KERNEL);
> + if (!array)
> + goto outerr;
> + array[0] = ~0;
> + }
> +
> + start = normalize(offset, marks->im_block_size);
> + end = normalize_up(offset + length, marks->im_block_size);
> + if (_preload_range(&marks->im_tree, start, end - start))
> + goto outerr;
> +
> + spin_lock(&marks->im_lock);
> +
> + for (s = normalize_up(start, PAGE_CACHE_SECTORS);
> + s < offset; s += PAGE_CACHE_SECTORS) {
> + dprintk("%s pre-area pages\n", __func__);
> + /* Portion of used block is not initialized */
> + if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
> + set_needs_init(array, s);
> + }
> + if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
> + goto out_unlock;
> + for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
> + s < end; s += PAGE_CACHE_SECTORS) {
> + dprintk("%s post-area pages\n", __func__);
> + if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
> + set_needs_init(array, s);
> + }
> +
> + spin_unlock(&marks->im_lock);
> +
> + if (pages) {
> + if (array[0] == ~0) {
> + kfree(array);
> + *pages = NULL;
> + } else
> + *pages = array;
> + }
> + return 0;
> +
> + out_unlock:
> + spin_unlock(&marks->im_lock);
> + outerr:
> + if (pages) {
> + kfree(array);
> + *pages = NULL;
> + }
> + return -ENOMEM;
> +}
> +
> static void print_bl_extent(struct pnfs_block_extent *be)
> {
> dprintk("PRINT EXTENT extent %p\n", be);
next prev parent reply other threads:[~2011-06-14 15:40 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-06-12 23:43 [PATCH 00/34] pnfs block layout driver based on v3.0-rc2 Jim Rees
2011-06-12 23:43 ` [PATCH 01/34] pnfs: GETDEVICELIST Jim Rees
2011-06-12 23:43 ` [PATCH 02/34] pnfs: add set-clear layoutdriver interface Jim Rees
2011-06-12 23:43 ` [PATCH 03/34] pnfs: let layoutcommit code handle multiple segments Jim Rees
2011-06-13 14:36 ` Fred Isaman
2011-06-14 10:40 ` tao.peng
2011-06-14 13:58 ` Fred Isaman
2011-06-14 14:28 ` Benny Halevy
2011-06-12 23:43 ` [PATCH 04/34] pnfs: hook nfs_write_begin/end to allow layout driver manipulation Jim Rees
2011-06-13 14:44 ` Fred Isaman
2011-06-14 11:01 ` tao.peng
2011-06-14 14:05 ` Fred Isaman
2011-06-14 15:53 ` Peng Tao
2011-06-14 16:02 ` Fred Isaman
2011-06-12 23:43 ` [PATCH 05/34] pnfs: ask for layout_blksize and save it in nfs_server Jim Rees
2011-06-14 15:01 ` Benny Halevy
2011-06-14 15:08 ` Peng Tao
2011-06-12 23:44 ` [PATCH 06/34] pnfs: cleanup_layoutcommit Jim Rees
2011-06-13 21:19 ` Benny Halevy
2011-06-14 15:16 ` Peng Tao
2011-06-14 15:10 ` Benny Halevy
2011-06-14 15:21 ` Peng Tao
2011-06-14 15:19 ` Benny Halevy
2011-06-12 23:44 ` [PATCH 07/34] pnfsblock: define PNFS_BLOCK Kconfig option Jim Rees
2011-06-14 15:13 ` Benny Halevy
2011-06-12 23:44 ` [PATCH 08/34] pnfsblock: blocklayout stub Jim Rees
2011-06-12 23:44 ` [PATCH 09/34] pnfsblock: layout alloc and free Jim Rees
2011-06-12 23:44 ` [PATCH 10/34] Add support for simple rpc pipefs Jim Rees
2011-06-12 23:44 ` [PATCH 11/34] pnfs-block: Add block device discovery pipe Jim Rees
2011-06-12 23:44 ` [PATCH 12/34] pnfsblock: basic extent code Jim Rees
2011-06-12 23:44 ` [PATCH 13/34] pnfsblock: add device operations Jim Rees
2011-06-12 23:44 ` [PATCH 14/34] pnfsblock: remove " Jim Rees
2011-06-12 23:44 ` [PATCH 15/34] pnfsblock: lseg alloc and free Jim Rees
2011-06-12 23:44 ` [PATCH 16/34] pnfsblock: merge extents Jim Rees
2011-06-12 23:44 ` [PATCH 17/34] pnfsblock: call and parse getdevicelist Jim Rees
2011-06-14 15:36 ` Benny Halevy
2011-06-12 23:44 ` [PATCH 18/34] pnfsblock: allow use of PG_owner_priv_1 flag Jim Rees
2011-06-13 15:56 ` Fred Isaman
2011-06-12 23:44 ` [PATCH 19/34] pnfsblock: xdr decode pnfs_block_layout4 Jim Rees
2011-06-12 23:44 ` [PATCH 20/34] pnfsblock: find_get_extent Jim Rees
2011-06-12 23:44 ` [PATCH 21/34] pnfsblock: SPLITME: add extent manipulation functions Jim Rees
2011-06-14 15:40 ` Benny Halevy [this message]
2011-06-12 23:44 ` [PATCH 22/34] pnfsblock: merge rw extents Jim Rees
2011-06-12 23:44 ` [PATCH 23/34] pnfsblock: encode_layoutcommit Jim Rees
2011-06-14 15:44 ` Benny Halevy
2011-06-12 23:44 ` [PATCH 24/34] pnfsblock: cleanup_layoutcommit Jim Rees
2011-06-12 23:44 ` [PATCH 25/34] pnfsblock: bl_read_pagelist Jim Rees
2011-06-12 23:44 ` [PATCH 26/34] pnfsblock: write_begin Jim Rees
2011-06-12 23:44 ` [PATCH 27/34] pnfsblock: write_end Jim Rees
2011-06-12 23:44 ` [PATCH 28/34] pnfsblock: write_end_cleanup Jim Rees
2011-06-12 23:45 ` [PATCH 29/34] pnfsblock: bl_write_pagelist support functions Jim Rees
2011-06-12 23:45 ` [PATCH 30/34] pnfsblock: bl_write_pagelist Jim Rees
2011-06-12 23:45 ` [PATCH 31/34] pnfsblock: note written INVAL areas for layoutcommit Jim Rees
2011-06-12 23:45 ` [PATCH 32/34] pnfsblock: Implement release_inval_marks Jim Rees
2011-06-12 23:45 ` [PATCH 33/34] Add configurable prefetch size for layoutget Jim Rees
2011-06-12 23:45 ` [PATCH 34/34] NFS41: do not update isize if inode needs layoutcommit Jim Rees
2011-06-14 16:15 ` Benny Halevy
2011-06-14 16:22 ` Fred Isaman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4DF780E8.8060300@gmail.com \
--to=bhalevy.lists@gmail.com \
--cc=honey@citi.umich.edu \
--cc=linux-nfs@vger.kernel.org \
--cc=rees@umich.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).