From: Benny Halevy <bhalevy@tonian.com>
To: Boaz Harrosh <bharrosh@panasas.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>,
Benny Halevy <bhalevy@panasas.com>,
Brent Welch <welch@panasas.com>,
NFS list <linux-nfs@vger.kernel.org>,
open-osd <osd-dev@open-osd.org>
Subject: Re: [PATCH 18/19] pnfs-obj: move to ore 02: move to ORE
Date: Fri, 07 Oct 2011 13:26:59 -0400 [thread overview]
Message-ID: <4E8F3663.3000602@tonian.com> (raw)
In-Reply-To: <1317724610-27857-1-git-send-email-bharrosh@panasas.com>
On 2011-10-04 06:36, Boaz Harrosh wrote:
> In this patch we are actually moving to the ORE.
> (Object Raid Engine).
>
> objio_state holds a pointer to an ore_io_state. Once
> we have an ore_io_state at hand we can call the ore
> for reading/writting. We register on the done path
> to kick off the nfs io_done mechanism.
>
> Again for Ease of reviewing the old code is "#if 0"
> but is not removed so the diff command works better.
> The old code will be removed in the next patch.
>
> Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Acked-by: Benny Halevy <bhalevy@tonian.com>
Thanks!
> ---
> fs/nfs/objlayout/objio_osd.c | 133 +++++++++++++++++++-----------------------
> 1 files changed, 59 insertions(+), 74 deletions(-)
>
> diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
> index bd7ec26..00b3849 100644
> --- a/fs/nfs/objlayout/objio_osd.c
> +++ b/fs/nfs/objlayout/objio_osd.c
> @@ -44,12 +44,6 @@
>
> #define NFSDBG_FACILITY NFSDBG_PNFS_LD
>
> -#define _LLU(x) ((unsigned long long)x)
> -
> -enum { BIO_MAX_PAGES_KMALLOC =
> - (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
> -};
> -
> struct objio_dev_ent {
> struct nfs4_deviceid_node id_node;
> struct ore_dev od;
> @@ -124,37 +118,13 @@ OBJIO_LSEG(struct pnfs_layout_segment *lseg)
> return container_of(lseg, struct objio_segment, lseg);
> }
>
> -struct objio_state;
> -typedef int (*objio_done_fn)(struct objio_state *ios);
> -
> struct objio_state {
> /* Generic layer */
> struct objlayout_io_res oir;
>
> - struct page **pages;
> - unsigned pgbase;
> - unsigned nr_pages;
> - unsigned long count;
> - loff_t offset;
> bool sync;
> -
> - struct ore_layout *layout;
> - struct ore_components *oc;
> -
> - struct kref kref;
> - objio_done_fn done;
> - void *private;
> -
> - unsigned long length;
> - unsigned numdevs; /* Actually used devs in this IO */
> - /* A per-device variable array of size numdevs */
> - struct _objio_per_comp {
> - struct bio *bio;
> - struct osd_request *or;
> - unsigned long length;
> - u64 offset;
> - unsigned dev;
> - } per_dev[];
> + /*FIXME: Support for extra_bytes at ore_get_rw_state() */
> + struct ore_io_state *ios;
> };
>
> /* Send and wait for a get_device_info of devices in the layout,
> @@ -374,16 +344,16 @@ void objio_free_lseg(struct pnfs_layout_segment *lseg)
> }
>
> static int
> -objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
> +objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading,
> struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase,
> loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags,
> struct objio_state **outp)
> {
> struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
> - struct objio_state *ios;
> + struct ore_io_state *ios;
> + int ret;
> struct __alloc_objio_state {
> struct objio_state objios;
> - struct _objio_per_comp per_dev[objio_seg->oc.numdevs];
> struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs];
> } *aos;
>
> @@ -391,30 +361,33 @@ objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
> if (unlikely(!aos))
> return -ENOMEM;
>
> - ios = &aos->objios;
> -
> - ios->layout = &objio_seg->layout;
> - ios->oc = &objio_seg->oc;
> objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs,
> aos->ioerrs, rpcdata, pnfs_layout_type);
>
> + ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading,
> + offset, count, &ios);
> + if (unlikely(ret)) {
> + kfree(aos);
> + return ret;
> + }
> +
> ios->pages = pages;
> ios->pgbase = pgbase;
> - ios->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
> - ios->offset = offset;
> - ios->count = count;
> - ios->sync = 0;
> + ios->private = aos;
> BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT);
>
> - *outp = ios;
> + aos->objios.sync = 0;
> + aos->objios.ios = ios;
> + *outp = &aos->objios;
> return 0;
> }
>
> void objio_free_result(struct objlayout_io_res *oir)
> {
> - struct objio_state *ios = container_of(oir, struct objio_state, oir);
> + struct objio_state *objios = container_of(oir, struct objio_state, oir);
>
> - kfree(ios);
> + ore_put_io_state(objios->ios);
> + kfree(objios);
> }
>
> enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
> @@ -447,7 +420,7 @@ enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
> }
> }
>
> -static void __on_dev_error(struct objio_state *ios, bool is_write,
> +static void __on_dev_error(struct ore_io_state *ios,
> struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep,
> u64 dev_offset, u64 dev_len)
> {
> @@ -465,9 +438,10 @@ static void __on_dev_error(struct objio_state *ios, bool is_write,
>
> objlayout_io_set_result(&objios->oir, comp,
> &pooid, osd_pri_2_pnfs_err(oep),
> - dev_offset, dev_len, is_write);
> + dev_offset, dev_len, !ios->reading);
> }
>
> +#if 0
> static void _clear_bio(struct bio *bio)
> {
> struct bio_vec *bv;
> @@ -786,26 +760,28 @@ static int _io_exec(struct objio_state *ios)
>
> return ret;
> }
> +#endif
>
> /*
> * read
> */
> -static int _read_done(struct objio_state *ios)
> +static void _read_done(struct ore_io_state *ios, void *private)
> {
> + struct objio_state *objios = private;
> ssize_t status;
> - int ret = _io_check(ios, false);
> + int ret = ore_check_io(ios, &__on_dev_error);
>
> - _io_free(ios);
> + /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
>
> if (likely(!ret))
> status = ios->length;
> else
> status = ret;
>
> - objlayout_read_done(&ios->oir, status, ios->sync);
> - return ret;
> + objlayout_read_done(&objios->oir, status, objios->sync);
> }
>
> +#if 0
> static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
> {
> struct osd_request *or = NULL;
> @@ -860,49 +836,50 @@ err:
> _io_free(ios);
> return ret;
> }
> +#endif
>
> int objio_read_pagelist(struct nfs_read_data *rdata)
> {
> - struct objio_state *ios;
> + struct objio_state *objios;
> int ret;
>
> - ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout,
> + ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true,
> rdata->lseg, rdata->args.pages, rdata->args.pgbase,
> rdata->args.offset, rdata->args.count, rdata,
> - GFP_KERNEL, &ios);
> - if (unlikely(ret))
> - return ret;
> -
> - ret = _io_rw_pagelist(ios, GFP_KERNEL);
> + GFP_KERNEL, &objios);
> if (unlikely(ret))
> return ret;
>
> - return _read_exec(ios);
> + objios->ios->done = _read_done;
> + dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
> + rdata->args.offset, rdata->args.count);
> + return ore_read(objios->ios);
> }
>
> /*
> * write
> */
> -static int _write_done(struct objio_state *ios)
> +static void _write_done(struct ore_io_state *ios, void *private)
> {
> + struct objio_state *objios = private;
> ssize_t status;
> - int ret = _io_check(ios, true);
> + int ret = ore_check_io(ios, &__on_dev_error);
>
> - _io_free(ios);
> + /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
>
> if (likely(!ret)) {
> /* FIXME: should be based on the OSD's persistence model
> * See OSD2r05 Section 4.13 Data persistence model */
> - ios->oir.committed = NFS_FILE_SYNC;
> + objios->oir.committed = NFS_FILE_SYNC;
> status = ios->length;
> } else {
> status = ret;
> }
>
> - objlayout_write_done(&ios->oir, status, ios->sync);
> - return ret;
> + objlayout_write_done(&objios->oir, status, objios->sync);
> }
>
> +#if 0
> static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
> {
> struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp];
> @@ -984,27 +961,35 @@ err:
> _io_free(ios);
> return ret;
> }
> +#endif
>
> int objio_write_pagelist(struct nfs_write_data *wdata, int how)
> {
> - struct objio_state *ios;
> + struct objio_state *objios;
> int ret;
>
> - ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout,
> + ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false,
> wdata->lseg, wdata->args.pages, wdata->args.pgbase,
> wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
> - &ios);
> + &objios);
> if (unlikely(ret))
> return ret;
>
> - ios->sync = 0 != (how & FLUSH_SYNC);
> + objios->sync = 0 != (how & FLUSH_SYNC);
>
> - /* TODO: ios->stable = stable; */
> - ret = _io_rw_pagelist(ios, GFP_NOFS);
> + if (!objios->sync)
> + objios->ios->done = _write_done;
> +
> + dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
> + wdata->args.offset, wdata->args.count);
> + ret = ore_write(objios->ios);
> if (unlikely(ret))
> return ret;
>
> - return _write_exec(ios);
> + if (objios->sync)
> + _write_done(objios->ios, objios);
> +
> + return 0;
> }
>
> static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
next prev parent reply other threads:[~2011-10-11 2:34 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-10-04 10:24 [PATCHSET 00/19] objlayout: Move to ORE Boaz Harrosh
2011-10-04 10:28 ` [PATCH 01/19] exofs: Rename struct ore_components comps => oc Boaz Harrosh
2011-10-04 10:28 ` [PATCH 02/19] exofs: Remove unused data_map member from exofs_sb_info Boaz Harrosh
2011-10-04 10:29 ` [PATCH 03/19] ore: Make ore_striping_info and ore_calc_stripe_info public Boaz Harrosh
2011-10-04 10:29 ` [PATCH 04/19] ore/exofs: Change the type of the devices array (API change) Boaz Harrosh
2011-10-04 10:30 ` [PATCH 05/19] ore: Only IO one group at a time " Boaz Harrosh
2011-10-04 10:30 ` [PATCH 06/19] ore: cleanup: Embed an ore_striping_info inside ore_io_state Boaz Harrosh
2011-10-04 10:31 ` [PATCH 07/19] ore: Remove check for ios->kern_buff in _prepare_for_striping to later Boaz Harrosh
2011-10-04 10:32 ` [PATCH 08/19] exofs: Support for short read/writes Boaz Harrosh
2011-10-04 10:32 ` [PATCH 09/19] ore: " Boaz Harrosh
2011-10-04 10:33 ` [PATCH 10/19] ore: Support for partial component table Boaz Harrosh
2011-10-04 10:34 ` [PATCH 11/19] ore/exofs: Define new ore_verify_layout Boaz Harrosh
2011-10-04 10:34 ` [PATCH 12/19] ore/exofs: Change ore_check_io API Boaz Harrosh
2011-10-04 10:34 ` [PATCH 13/19] pnfs-obj: Remove redundant EOF from objlayout_io_state Boaz Harrosh
2011-10-07 16:58 ` Benny Halevy
2011-10-04 10:35 ` [PATCH 14/19] pnfs-obj: Return PNFS_NOT_ATTEMPTED in case of read/write_pagelist Boaz Harrosh
2011-10-07 17:06 ` Benny Halevy
2011-10-04 10:35 ` [PATCH 15/19] pnfs-obj: Get rid of objlayout_{alloc,free}_io_state Boaz Harrosh
2011-10-07 17:17 ` Benny Halevy
2011-10-04 10:36 ` [PATCH 16/19] pnfs-obj: Rename objlayout_io_state => objlayout_io_res Boaz Harrosh
2011-10-04 12:20 ` Jim Rees
2011-10-04 12:27 ` Boaz Harrosh
2011-10-04 10:36 ` [PATCH 17/19] pnfs-obj: move to ore 01: ore_layout & ore_components Boaz Harrosh
2011-10-07 17:26 ` Benny Halevy
2011-10-04 10:36 ` [PATCH 18/19] pnfs-obj: move to ore 02: move to ORE Boaz Harrosh
2011-10-07 17:26 ` Benny Halevy [this message]
2011-10-04 10:37 ` [PATCH 19/19] pnfs-obj: move to ore 03: Remove old raid engine Boaz Harrosh
2011-10-07 17:27 ` Benny Halevy
2011-10-04 12:04 ` [PATCHSET 00/19] objlayout: Move to ORE Benny Halevy
2011-10-04 12:24 ` Boaz Harrosh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4E8F3663.3000602@tonian.com \
--to=bhalevy@tonian.com \
--cc=Trond.Myklebust@netapp.com \
--cc=bhalevy@panasas.com \
--cc=bharrosh@panasas.com \
--cc=linux-nfs@vger.kernel.org \
--cc=osd-dev@open-osd.org \
--cc=welch@panasas.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).