linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Benny Halevy <bhalevy.lists@gmail.com>
To: Jim Rees <rees@umich.edu>
Cc: linux-nfs@vger.kernel.org, peter honeyman <honey@citi.umich.edu>
Subject: Re: [PATCH 21/34] pnfsblock: SPLITME: add extent manipulation functions
Date: Tue, 14 Jun 2011 11:40:24 -0400	[thread overview]
Message-ID: <4DF780E8.8060300@gmail.com> (raw)
In-Reply-To: <7075734d5615269fb396abdbf8d2b30cf602acc1.1307921138.git.rees@umich.edu>

Regarding the "SPLITME", please either fix the commit message
or split the patch :)
(I'm in favour of keeping this patch as it is)

Benny

On 2011-06-12 19:44, Jim Rees wrote:
> From: Fred Isaman <iisaman@citi.umich.edu>
>  as it i
> Adds working implementations of various support functions
> to handle INVAL extents, needed by writes, such as
> mark_initialized_sectors and is_sector_initialized.
> 
> SPLIT: this needs to be split into the exported functions, and the
> range support functions (which will be replaced eventually.)
> 
> [pnfsblock: fix 64-bit compiler warnings for extent manipulation]
> Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
> ---
>  fs/nfs/blocklayout/blocklayout.h |   30 ++++-
>  fs/nfs/blocklayout/extents.c     |  253 ++++++++++++++++++++++++++++++++++++++
>  2 files changed, 281 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
> index 06aa36a..a231d49 100644
> --- a/fs/nfs/blocklayout/blocklayout.h
> +++ b/fs/nfs/blocklayout/blocklayout.h
> @@ -35,6 +35,8 @@
>  #include <linux/nfs_fs.h>
>  #include "../pnfs.h"
>  
> +#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> 9)
> +
>  #define PG_pnfserr PG_owner_priv_1
>  #define PagePnfsErr(page)	test_bit(PG_pnfserr, &(page)->flags)
>  #define SetPagePnfsErr(page)	set_bit(PG_pnfserr, &(page)->flags)
> @@ -101,8 +103,23 @@ enum exstate4 {
>  	PNFS_BLOCK_NONE_DATA		= 3  /* unmapped, it's a hole */
>  };
>  
> +#define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */
> +
> +struct my_tree_t {
> +	sector_t		mtt_step_size;	/* Internal sector alignment */
> +	struct list_head	mtt_stub; /* Should be a radix tree */
> +};
> +
>  struct pnfs_inval_markings {
> -	/* STUB */
> +	spinlock_t	im_lock;
> +	struct my_tree_t im_tree;	/* Sectors that need LAYOUTCOMMIT */
> +	sector_t	im_block_size;	/* Server blocksize in sectors */
> +};
> +
> +struct pnfs_inval_tracking {
> +	struct list_head it_link;
> +	int		 it_sector;
> +	int		 it_tags;
>  };
>  
>  /* sector_t fields are all in 512-byte sectors */
> @@ -121,7 +138,11 @@ struct pnfs_block_extent {
>  static inline void
>  INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
>  {
> -	/* STUB */
> +	spin_lock_init(&marks->im_lock);
> +	INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
> +	marks->im_block_size = blocksize;
> +	marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
> +					   blocksize);
>  }
>  
>  enum extentclass4 {
> @@ -222,8 +243,13 @@ void free_block_dev(struct pnfs_block_dev *bdev);
>  struct pnfs_block_extent *
>  find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
>  		struct pnfs_block_extent **cow_read);
> +int mark_initialized_sectors(struct pnfs_inval_markings *marks,
> +			     sector_t offset, sector_t length,
> +			     sector_t **pages);
>  void put_extent(struct pnfs_block_extent *be);
>  struct pnfs_block_extent *alloc_extent(void);
> +struct pnfs_block_extent *get_extent(struct pnfs_block_extent *be);
> +int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect);
>  int add_and_merge_extent(struct pnfs_block_layout *bl,
>  			 struct pnfs_block_extent *new);
>  
> diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
> index f0b3f13..3d36f66 100644
> --- a/fs/nfs/blocklayout/extents.c
> +++ b/fs/nfs/blocklayout/extents.c
> @@ -33,6 +33,259 @@
>  #include "blocklayout.h"
>  #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
>  
> +/* Bit numbers */
> +#define EXTENT_INITIALIZED 0
> +#define EXTENT_WRITTEN     1
> +#define EXTENT_IN_COMMIT   2
> +#define INTERNAL_EXISTS    MY_MAX_TAGS
> +#define INTERNAL_MASK      ((1 << INTERNAL_EXISTS) - 1)
> +
> +/* Returns largest t<=s s.t. t%base==0 */
> +static inline sector_t normalize(sector_t s, int base)
> +{
> +	sector_t tmp = s; /* Since do_div modifies its argument */
> +	return s - do_div(tmp, base);
> +}
> +
> +static inline sector_t normalize_up(sector_t s, int base)
> +{
> +	return normalize(s + base - 1, base);
> +}
> +
> +/* Complete stub using list while determine API wanted */
> +
> +/* Returns tags, or negative */
> +static int32_t _find_entry(struct my_tree_t *tree, u64 s)
> +{
> +	struct pnfs_inval_tracking *pos;
> +
> +	dprintk("%s(%llu) enter\n", __func__, s);
> +	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
> +		if (pos->it_sector > s)
> +			continue;
> +		else if (pos->it_sector == s)
> +			return pos->it_tags & INTERNAL_MASK;
> +		else
> +			break;
> +	}
> +	return -ENOENT;
> +}
> +
> +static inline
> +int _has_tag(struct my_tree_t *tree, u64 s, int32_t tag)
> +{
> +	int32_t tags;
> +
> +	dprintk("%s(%llu, %i) enter\n", __func__, s, tag);
> +	s = normalize(s, tree->mtt_step_size);
> +	tags = _find_entry(tree, s);
> +	if ((tags < 0) || !(tags & (1 << tag)))
> +		return 0;
> +	else
> +		return 1;
> +}
> +
> +/* Creates entry with tag, or if entry already exists, unions tag to it.
> + * If storage is not NULL, newly created entry will use it.
> + * Returns number of entries added, or negative on error.
> + */
> +static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
> +		      struct pnfs_inval_tracking *storage)
> +{
> +	int found = 0;
> +	struct pnfs_inval_tracking *pos;
> +
> +	dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
> +	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
> +		if (pos->it_sector > s)
> +			continue;
> +		else if (pos->it_sector == s) {
> +			found = 1;
> +			break;
> +		} else
> +			break;
> +	}
> +	if (found) {
> +		pos->it_tags |= (1 << tag);
> +		return 0;
> +	} else {
> +		struct pnfs_inval_tracking *new;
> +		if (storage)
> +			new = storage;
> +		else {
> +			new = kmalloc(sizeof(*new), GFP_KERNEL);
> +			if (!new)
> +				return -ENOMEM;
> +		}
> +		new->it_sector = s;
> +		new->it_tags = (1 << tag);
> +		list_add(&new->it_link, &pos->it_link);
> +		return 1;
> +	}
> +}
> +
> +/* XXXX Really want option to not create */
> +/* Over range, unions tag with existing entries, else creates entry with tag */
> +static int _set_range(struct my_tree_t *tree, int32_t tag, u64 s, u64 length)
> +{
> +	u64 i;
> +
> +	dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length);
> +	for (i = normalize(s, tree->mtt_step_size); i < s + length;
> +	     i += tree->mtt_step_size)
> +		if (_add_entry(tree, i, tag, NULL))
> +			return -ENOMEM;
> +	return 0;
> +}
> +
> +/* Ensure that future operations on given range of tree will not malloc */
> +static int _preload_range(struct my_tree_t *tree, u64 offset, u64 length)
> +{
> +	u64 start, end, s;
> +	int count, i, used = 0, status = -ENOMEM;
> +	struct pnfs_inval_tracking **storage;
> +
> +	dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
> +	start = normalize(offset, tree->mtt_step_size);
> +	end = normalize_up(offset + length, tree->mtt_step_size);
> +	count = (int)(end - start) / (int)tree->mtt_step_size;
> +
> +	/* Pre-malloc what memory we might need */
> +	storage = kmalloc(sizeof(*storage) * count, GFP_KERNEL);
> +	if (!storage)
> +		return -ENOMEM;
> +	for (i = 0; i < count; i++) {
> +		storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking),
> +				     GFP_KERNEL);
> +		if (!storage[i])
> +			goto out_cleanup;
> +	}
> +
> +	/* Now need lock - HOW??? */
> +
> +	for (s = start; s < end; s += tree->mtt_step_size)
> +		used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
> +
> +	/* Unlock - HOW??? */
> +	status = 0;
> +
> + out_cleanup:
> +	for (i = used; i < count; i++) {
> +		if (!storage[i])
> +			break;
> +		kfree(storage[i]);
> +	}
> +	kfree(storage);
> +	return status;
> +}
> +
> +static void set_needs_init(sector_t *array, sector_t offset)
> +{
> +	sector_t *p = array;
> +
> +	dprintk("%s enter\n", __func__);
> +	if (!p)
> +		return;
> +	while (*p < offset)
> +		p++;
> +	if (*p == offset)
> +		return;
> +	else if (*p == ~0) {
> +		*p++ = offset;
> +		*p = ~0;
> +		return;
> +	} else {
> +		sector_t *save = p;
> +		dprintk("%s Adding %llu\n", __func__, (u64)offset);
> +		while (*p != ~0)
> +			p++;
> +		p++;
> +		memmove(save + 1, save, (char *)p - (char *)save);
> +		*save = offset;
> +		return;
> +	}
> +}
> +
> +/* We are relying on page lock to serialize this */
> +int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect)
> +{
> +	int rv;
> +
> +	spin_lock(&marks->im_lock);
> +	rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
> +	spin_unlock(&marks->im_lock);
> +	return rv;
> +}
> +
> +/* Marks sectors in [offest, offset_length) as having been initialized.
> + * All lengths are step-aligned, where step is min(pagesize, blocksize).
> + * Notes where partial block is initialized, and helps prepare it for
> + * complete initialization later.
> + */
> +/* Currently assumes offset is page-aligned */
> +int mark_initialized_sectors(struct pnfs_inval_markings *marks,
> +			     sector_t offset, sector_t length,
> +			     sector_t **pages)
> +{
> +	sector_t s, start, end;
> +	sector_t *array = NULL; /* Pages to mark */
> +
> +	dprintk("%s(offset=%llu,len=%llu) enter\n",
> +		__func__, (u64)offset, (u64)length);
> +	s = max((sector_t) 3,
> +		2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
> +	dprintk("%s set max=%llu\n", __func__, (u64)s);
> +	if (pages) {
> +		array = kmalloc(s * sizeof(sector_t), GFP_KERNEL);
> +		if (!array)
> +			goto outerr;
> +		array[0] = ~0;
> +	}
> +
> +	start = normalize(offset, marks->im_block_size);
> +	end = normalize_up(offset + length, marks->im_block_size);
> +	if (_preload_range(&marks->im_tree, start, end - start))
> +		goto outerr;
> +
> +	spin_lock(&marks->im_lock);
> +
> +	for (s = normalize_up(start, PAGE_CACHE_SECTORS);
> +	     s < offset; s += PAGE_CACHE_SECTORS) {
> +		dprintk("%s pre-area pages\n", __func__);
> +		/* Portion of used block is not initialized */
> +		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
> +			set_needs_init(array, s);
> +	}
> +	if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
> +		goto out_unlock;
> +	for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
> +	     s < end; s += PAGE_CACHE_SECTORS) {
> +		dprintk("%s post-area pages\n", __func__);
> +		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
> +			set_needs_init(array, s);
> +	}
> +
> +	spin_unlock(&marks->im_lock);
> +
> +	if (pages) {
> +		if (array[0] == ~0) {
> +			kfree(array);
> +			*pages = NULL;
> +		} else
> +			*pages = array;
> +	}
> +	return 0;
> +
> + out_unlock:
> +	spin_unlock(&marks->im_lock);
> + outerr:
> +	if (pages) {
> +		kfree(array);
> +		*pages = NULL;
> +	}
> +	return -ENOMEM;
> +}
> +
>  static void print_bl_extent(struct pnfs_block_extent *be)
>  {
>  	dprintk("PRINT EXTENT extent %p\n", be);

  reply	other threads:[~2011-06-14 15:40 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-06-12 23:43 [PATCH 00/34] pnfs block layout driver based on v3.0-rc2 Jim Rees
2011-06-12 23:43 ` [PATCH 01/34] pnfs: GETDEVICELIST Jim Rees
2011-06-12 23:43 ` [PATCH 02/34] pnfs: add set-clear layoutdriver interface Jim Rees
2011-06-12 23:43 ` [PATCH 03/34] pnfs: let layoutcommit code handle multiple segments Jim Rees
2011-06-13 14:36   ` Fred Isaman
2011-06-14 10:40     ` tao.peng
2011-06-14 13:58       ` Fred Isaman
2011-06-14 14:28       ` Benny Halevy
2011-06-12 23:43 ` [PATCH 04/34] pnfs: hook nfs_write_begin/end to allow layout driver manipulation Jim Rees
2011-06-13 14:44   ` Fred Isaman
2011-06-14 11:01     ` tao.peng
2011-06-14 14:05       ` Fred Isaman
2011-06-14 15:53         ` Peng Tao
2011-06-14 16:02           ` Fred Isaman
2011-06-12 23:43 ` [PATCH 05/34] pnfs: ask for layout_blksize and save it in nfs_server Jim Rees
2011-06-14 15:01   ` Benny Halevy
2011-06-14 15:08     ` Peng Tao
2011-06-12 23:44 ` [PATCH 06/34] pnfs: cleanup_layoutcommit Jim Rees
2011-06-13 21:19   ` Benny Halevy
2011-06-14 15:16     ` Peng Tao
2011-06-14 15:10   ` Benny Halevy
2011-06-14 15:21     ` Peng Tao
2011-06-14 15:19   ` Benny Halevy
2011-06-12 23:44 ` [PATCH 07/34] pnfsblock: define PNFS_BLOCK Kconfig option Jim Rees
2011-06-14 15:13   ` Benny Halevy
2011-06-12 23:44 ` [PATCH 08/34] pnfsblock: blocklayout stub Jim Rees
2011-06-12 23:44 ` [PATCH 09/34] pnfsblock: layout alloc and free Jim Rees
2011-06-12 23:44 ` [PATCH 10/34] Add support for simple rpc pipefs Jim Rees
2011-06-12 23:44 ` [PATCH 11/34] pnfs-block: Add block device discovery pipe Jim Rees
2011-06-12 23:44 ` [PATCH 12/34] pnfsblock: basic extent code Jim Rees
2011-06-12 23:44 ` [PATCH 13/34] pnfsblock: add device operations Jim Rees
2011-06-12 23:44 ` [PATCH 14/34] pnfsblock: remove " Jim Rees
2011-06-12 23:44 ` [PATCH 15/34] pnfsblock: lseg alloc and free Jim Rees
2011-06-12 23:44 ` [PATCH 16/34] pnfsblock: merge extents Jim Rees
2011-06-12 23:44 ` [PATCH 17/34] pnfsblock: call and parse getdevicelist Jim Rees
2011-06-14 15:36   ` Benny Halevy
2011-06-12 23:44 ` [PATCH 18/34] pnfsblock: allow use of PG_owner_priv_1 flag Jim Rees
2011-06-13 15:56   ` Fred Isaman
2011-06-12 23:44 ` [PATCH 19/34] pnfsblock: xdr decode pnfs_block_layout4 Jim Rees
2011-06-12 23:44 ` [PATCH 20/34] pnfsblock: find_get_extent Jim Rees
2011-06-12 23:44 ` [PATCH 21/34] pnfsblock: SPLITME: add extent manipulation functions Jim Rees
2011-06-14 15:40   ` Benny Halevy [this message]
2011-06-12 23:44 ` [PATCH 22/34] pnfsblock: merge rw extents Jim Rees
2011-06-12 23:44 ` [PATCH 23/34] pnfsblock: encode_layoutcommit Jim Rees
2011-06-14 15:44   ` Benny Halevy
2011-06-12 23:44 ` [PATCH 24/34] pnfsblock: cleanup_layoutcommit Jim Rees
2011-06-12 23:44 ` [PATCH 25/34] pnfsblock: bl_read_pagelist Jim Rees
2011-06-12 23:44 ` [PATCH 26/34] pnfsblock: write_begin Jim Rees
2011-06-12 23:44 ` [PATCH 27/34] pnfsblock: write_end Jim Rees
2011-06-12 23:44 ` [PATCH 28/34] pnfsblock: write_end_cleanup Jim Rees
2011-06-12 23:45 ` [PATCH 29/34] pnfsblock: bl_write_pagelist support functions Jim Rees
2011-06-12 23:45 ` [PATCH 30/34] pnfsblock: bl_write_pagelist Jim Rees
2011-06-12 23:45 ` [PATCH 31/34] pnfsblock: note written INVAL areas for layoutcommit Jim Rees
2011-06-12 23:45 ` [PATCH 32/34] pnfsblock: Implement release_inval_marks Jim Rees
2011-06-12 23:45 ` [PATCH 33/34] Add configurable prefetch size for layoutget Jim Rees
2011-06-12 23:45 ` [PATCH 34/34] NFS41: do not update isize if inode needs layoutcommit Jim Rees
2011-06-14 16:15   ` Benny Halevy
2011-06-14 16:22     ` Fred Isaman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4DF780E8.8060300@gmail.com \
    --to=bhalevy.lists@gmail.com \
    --cc=honey@citi.umich.edu \
    --cc=linux-nfs@vger.kernel.org \
    --cc=rees@umich.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).