* [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr @ 2012-05-23 9:02 andros 2012-05-23 9:02 ` [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN andros ` (2 more replies) 0 siblings, 3 replies; 7+ messages in thread From: andros @ 2012-05-23 9:02 UTC (permalink / raw) To: trond.myklebust; +Cc: linux-nfs, Andy Adamson From: Andy Adamson <andros@netapp.com> We only support one layout type per file system, so one threshold_item4 per mdsthreshold4. Signed-off-by: Andy Adamson <andros@netapp.com> --- fs/nfs/nfs4xdr.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++- include/linux/nfs4.h | 7 +++ include/linux/nfs_xdr.h | 10 ++++ 3 files changed, 140 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index db040e9..db199f8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -99,9 +99,12 @@ static int nfs4_stat_to_errno(int); #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) #define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +/* We support only one layout type per file system */ +#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8) /* This is based on getfattr, which uses the most attributes: */ #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ - 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz)) + 3 + 3 + 3 + nfs4_owner_maxsz + \ + nfs4_group_maxsz + decode_mdsthreshold_maxsz)) #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ nfs4_fattr_value_maxsz) #define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) @@ -1170,6 +1173,16 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c bitmask[1] & nfs4_fattr_bitmap[1], hdr); } +static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, + struct compound_hdr *hdr) +{ + encode_getattr_three(xdr, + bitmask[0] & nfs4_fattr_bitmap[0], + bitmask[1] & nfs4_fattr_bitmap[1], + bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD, + hdr); +} + static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { encode_getattr_three(xdr, @@ -2161,7 +2174,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); encode_getfh(xdr, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); + encode_getfattr_open(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -4183,6 +4196,110 @@ xdr_error: return status; } +static int decode_threshold_hint(struct xdr_stream *xdr, + uint32_t *bitmap, + uint64_t *res, + uint32_t hint_bit) +{ + __be32 *p; + + *res = 0; + if (likely(bitmap[0] & hint_bit)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, res); + } + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_first_threshold_item4(struct xdr_stream *xdr, + struct nfs4_threshold *res) +{ + __be32 *p, *savep; + uint32_t bitmap[3] = {0,}, attrlen; + int status; + + /* layout type */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) { + print_overflow_msg(__func__, xdr); + return -EIO; + } + res->l_type = be32_to_cpup(p); + + /* thi_hintset bitmap */ + status = decode_attr_bitmap(xdr, bitmap); + if (status < 0) + goto xdr_error; + + /* thi_hintlist length */ + status = decode_attr_length(xdr, &attrlen, &savep); + if (status < 0) + goto xdr_error; + /* thi_hintlist */ + status = decode_threshold_hint(xdr, bitmap, &res->rd_sz, THRESHOLD_RD); + if (status < 0) + goto xdr_error; + status = decode_threshold_hint(xdr, bitmap, &res->wr_sz, THRESHOLD_WR); + if (status < 0) + goto xdr_error; + status = decode_threshold_hint(xdr, bitmap, &res->rd_io_sz, + THRESHOLD_RD_IO); + if (status < 0) + goto xdr_error; + status = decode_threshold_hint(xdr, bitmap, &res->wr_io_sz, + THRESHOLD_WR_IO); + if (status < 0) + goto xdr_error; + + status = verify_attr_len(xdr, savep, attrlen); + res->bm = bitmap[0]; + + dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", + __func__, res->bm, res->rd_sz, res->wr_sz, res->rd_io_sz, + res->wr_io_sz); +xdr_error: + dprintk("%s ret=%d!\n", __func__, status); + return status; +} + +/* + * Thresholds on pNFS direct I/O vrs MDS I/O + */ +static int decode_attr_mdsthreshold(struct xdr_stream *xdr, + uint32_t *bitmap, + struct nfs4_threshold *res) +{ + __be32 *p; + int status = 0; + uint32_t num; + + if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U))) + return -EIO; + if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + num = be32_to_cpup(p); + if (num == 0) + return 0; + if (num > 1) + printk(KERN_INFO "%s: Warning: Multiple pNFS layout " + "drivers per filesystem not supported\n", + __func__); + + status = decode_first_threshold_item4(xdr, res); + } + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fattr *fattr, struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, @@ -4289,6 +4406,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; + status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold); + if (status < 0) + goto xdr_error; + xdr_error: dprintk("%s: xdr returned %d\n", __func__, -status); return status; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 0987146..72b6bad 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -526,6 +526,13 @@ enum lock_type4 { #define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23) #define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30) #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) +#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) + +/* MDS threshold bitmap bits */ +#define THRESHOLD_RD (1UL << 0) +#define THRESHOLD_WR (1UL << 1) +#define THRESHOLD_RD_IO (1UL << 2) +#define THRESHOLD_WR_IO (1UL << 3) #define NFSPROC4_NULL 0 #define NFSPROC4_COMPOUND 1 diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2e53a3f..5b8e42e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -35,6 +35,15 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid return a->major == b->major && a->minor == b->minor; } +struct nfs4_threshold { + __u32 bm; + __u32 l_type; + __u64 rd_sz; + __u64 wr_sz; + __u64 rd_io_sz; + __u64 wr_io_sz; +}; + struct nfs_fattr { unsigned int valid; /* which fields are valid */ umode_t mode; @@ -67,6 +76,7 @@ struct nfs_fattr { unsigned long gencount; struct nfs4_string *owner_name; struct nfs4_string *group_name; + struct nfs4_threshold *mdsthreshold; /* pNFS threshold hints */ }; #define NFS_ATTR_FATTR_TYPE (1U << 0) -- 1.7.7.6 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN 2012-05-23 9:02 [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr andros @ 2012-05-23 9:02 ` andros 2012-05-23 9:02 ` [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros 2012-05-23 9:02 ` [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters andros 2 siblings, 0 replies; 7+ messages in thread From: andros @ 2012-05-23 9:02 UTC (permalink / raw) To: trond.myklebust; +Cc: linux-nfs, Andy Adamson From: Andy Adamson <andros@netapp.com> Signed-off-by: Andy Adamson <andros@netapp.com> --- fs/nfs/inode.c | 2 ++ fs/nfs/nfs4proc.c | 38 +++++++++++++++++++++++++++++++++----- fs/nfs/pnfs.c | 12 ++++++++++++ fs/nfs/pnfs.h | 21 +++++++++++++++++++++ include/linux/nfs_fs.h | 1 + 5 files changed, 69 insertions(+), 5 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9ad81ce..889f7e5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -641,6 +641,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f nfs_init_lock_context(&ctx->lock_context); ctx->lock_context.open_context = ctx; INIT_LIST_HEAD(&ctx->list); + ctx->mdsthreshold = NULL; return ctx; } @@ -669,6 +670,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) put_rpccred(ctx->cred); dput(ctx->dentry); nfs_sb_deactive(sb); + kfree(ctx->mdsthreshold); kfree(ctx); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 78784e5..d84c633 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1781,7 +1781,14 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct /* * Returns a referenced nfs4_state */ -static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) +static int _nfs4_do_open(struct inode *dir, + struct dentry *dentry, + fmode_t fmode, + int flags, + struct iattr *sattr, + struct rpc_cred *cred, + struct nfs4_state **res, + struct nfs4_threshold **ctx_th) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; @@ -1806,6 +1813,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode if (opendata == NULL) goto err_put_state_owner; + if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { + opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); + if (!opendata->f_attr.mdsthreshold) + goto err_opendata_put; + } if (dentry->d_inode != NULL) opendata->state = nfs4_get_open_state(dentry->d_inode, sp); @@ -1831,11 +1843,19 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode nfs_setattr_update_inode(state->inode, sattr); nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); } + + if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) + *ctx_th = opendata->f_attr.mdsthreshold; + else + kfree(opendata->f_attr.mdsthreshold); + opendata->f_attr.mdsthreshold = NULL; + nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); *res = state; return 0; err_opendata_put: + kfree(opendata->f_attr.mdsthreshold); nfs4_opendata_put(opendata); err_put_state_owner: nfs4_put_state_owner(sp); @@ -1845,14 +1865,21 @@ out_err: } -static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) +static struct nfs4_state *nfs4_do_open(struct inode *dir, + struct dentry *dentry, + fmode_t fmode, + int flags, + struct iattr *sattr, + struct rpc_cred *cred, + struct nfs4_threshold **ctx_th) { struct nfs4_exception exception = { }; struct nfs4_state *res; int status; do { - status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res); + status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, + &res, ctx_th); if (status == 0) break; /* NOTE: BAD_SEQID means the server and client disagree about the @@ -2176,7 +2203,8 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags struct nfs4_state *state; /* Protect against concurrent sillydeletes */ - state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred); + state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, + ctx->cred, &ctx->mdsthreshold); if (IS_ERR(state)) return ERR_CAST(state); ctx->state = state; @@ -2778,7 +2806,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, fmode = ctx->mode; } sattr->ia_mode &= ~current_umask(); - state = nfs4_do_open(dir, de, fmode, flags, sattr, cred); + state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5d09a36..cbcb6ae 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1630,3 +1630,15 @@ out_free: kfree(data); goto out; } + +struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) +{ + struct nfs4_threshold *thp; + + thp = kzalloc(sizeof(*thp), GFP_NOFS); + if (!thp) { + dprintk("%s mdsthreshold allocation failed\n", __func__); + return NULL; + } + return thp; +} diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 7980756..29fd23c 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -227,6 +227,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, const struct nfs_pgio_completion_ops *compl_ops); int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head, const struct nfs_pgio_completion_ops *compl_ops); +struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); /* nfs4_deviceid_flags */ enum { @@ -360,6 +361,14 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } +static inline bool +pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src, + struct nfs_server *nfss) +{ + return (dst && src && src->bm != 0 && + nfss->pnfs_curr_ld->id == src->l_type); +} + #ifdef NFS_DEBUG void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); #else @@ -485,6 +494,18 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) return 0; } +static inline bool +pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src, + struct nfs_server *nfss) +{ + return false; +} + +static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) +{ + return NULL; +} + #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6cc7dba..ca4a707 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -102,6 +102,7 @@ struct nfs_open_context { int error; struct list_head list; + struct nfs4_threshold *mdsthreshold; }; struct nfs_open_dir_context { -- 1.7.7.6 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold 2012-05-23 9:02 [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr andros 2012-05-23 9:02 ` [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN andros @ 2012-05-23 9:02 ` andros 2012-05-23 18:19 ` Myklebust, Trond 2012-05-23 9:02 ` [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters andros 2 siblings, 1 reply; 7+ messages in thread From: andros @ 2012-05-23 9:02 UTC (permalink / raw) To: trond.myklebust; +Cc: linux-nfs, Andy Adamson From: Andy Adamson <andros@netapp.com> Keep track of the number of bytes read or written, including those queued up to be flushed. For use by mdsthreshold i/o size hints. No locking needed as this is used as hint information. Signed-off-by: Andy Adamson <andros@netapp.com> --- fs/nfs/file.c | 8 ++++++-- fs/nfs/inode.c | 2 ++ fs/nfs/pnfs.c | 3 +++ include/linux/nfs_fs.h | 3 +++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8eda8a6..c4cc096 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -203,8 +203,10 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); if (!result) { result = generic_file_aio_read(iocb, iov, nr_segs, pos); - if (result > 0) + if (result > 0) { + NFS_I(inode)->read_io += result; nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); + } } return result; } @@ -613,8 +615,10 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0) result = err; } - if (result > 0) + if (result > 0) { + NFS_I(inode)->write_io += written; nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); + } out: return result; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 889f7e5..a6f5fbb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -323,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_gid = -2; inode->i_blocks = 0; memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + nfsi->write_io = 0; + nfsi->read_io = 0; nfsi->read_cache_jiffies = fattr->time_start; nfsi->attr_gencount = fattr->gencount; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cbcb6ae..6620606 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, dprintk("%s:Begin lo %p\n", __func__, lo); if (list_empty(&lo->plh_segs)) { + /* Reset MDS Threshold I/O counters */ + NFS_I(lo->plh_inode)->write_io = 0; + NFS_I(lo->plh_inode)->read_io = 0; if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) put_layout_hdr_locked(lo); return 0; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index ca4a707..c6954ac 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -201,6 +201,9 @@ struct nfs_inode { /* pNFS layout information */ struct pnfs_layout_hdr *layout; + /* how many bytes have been written/read and how many bytes queued up */ + __u64 write_io; + __u64 read_io; #endif /* CONFIG_NFS_V4*/ #ifdef CONFIG_NFS_FSCACHE struct fscache_cookie *fscache; -- 1.7.7.6 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold 2012-05-23 9:02 ` [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros @ 2012-05-23 18:19 ` Myklebust, Trond 2012-05-23 18:41 ` Adamson, Andy 0 siblings, 1 reply; 7+ messages in thread From: Myklebust, Trond @ 2012-05-23 18:19 UTC (permalink / raw) To: Adamson, Andy; +Cc: linux-nfs@vger.kernel.org T24gV2VkLCAyMDEyLTA1LTIzIGF0IDA1OjAyIC0wNDAwLCBhbmRyb3NAbmV0YXBwLmNvbSB3cm90 ZToNCj4gRnJvbTogQW5keSBBZGFtc29uIDxhbmRyb3NAbmV0YXBwLmNvbT4NCj4gDQo+IEtlZXAg dHJhY2sgb2YgdGhlIG51bWJlciBvZiBieXRlcyByZWFkIG9yIHdyaXR0ZW4sIGluY2x1ZGluZyB0 aG9zZSBxdWV1ZWQNCj4gdXAgdG8gYmUgZmx1c2hlZC4gRm9yIHVzZSBieSBtZHN0aHJlc2hvbGQg aS9vIHNpemUgaGludHMuDQo+IA0KPiBObyBsb2NraW5nIG5lZWRlZCBhcyB0aGlzIGlzIHVzZWQg YXMgaGludCBpbmZvcm1hdGlvbi4NCj4gDQo+IFNpZ25lZC1vZmYtYnk6IEFuZHkgQWRhbXNvbiA8 YW5kcm9zQG5ldGFwcC5jb20+DQo+IC0tLQ0KPiAgZnMvbmZzL2ZpbGUuYyAgICAgICAgICB8ICAg IDggKysrKysrLS0NCj4gIGZzL25mcy9pbm9kZS5jICAgICAgICAgfCAgICAyICsrDQo+ICBmcy9u ZnMvcG5mcy5jICAgICAgICAgIHwgICAgMyArKysNCj4gIGluY2x1ZGUvbGludXgvbmZzX2ZzLmgg fCAgICAzICsrKw0KPiAgNCBmaWxlcyBjaGFuZ2VkLCAxNCBpbnNlcnRpb25zKCspLCAyIGRlbGV0 aW9ucygtKQ0KPiANCj4gZGlmZiAtLWdpdCBhL2ZzL25mcy9maWxlLmMgYi9mcy9uZnMvZmlsZS5j DQo+IGluZGV4IDhlZGE4YTYuLmM0Y2MwOTYgMTAwNjQ0DQo+IC0tLSBhL2ZzL25mcy9maWxlLmMN Cj4gKysrIGIvZnMvbmZzL2ZpbGUuYw0KPiBAQCAtMjAzLDggKzIwMywxMCBAQCBuZnNfZmlsZV9y ZWFkKHN0cnVjdCBraW9jYiAqaW9jYiwgY29uc3Qgc3RydWN0IGlvdmVjICppb3YsDQo+ICAJcmVz dWx0ID0gbmZzX3JldmFsaWRhdGVfbWFwcGluZyhpbm9kZSwgaW9jYi0+a2lfZmlscC0+Zl9tYXBw aW5nKTsNCj4gIAlpZiAoIXJlc3VsdCkgew0KPiAgCQlyZXN1bHQgPSBnZW5lcmljX2ZpbGVfYWlv X3JlYWQoaW9jYiwgaW92LCBucl9zZWdzLCBwb3MpOw0KPiAtCQlpZiAocmVzdWx0ID4gMCkNCj4g KwkJaWYgKHJlc3VsdCA+IDApIHsNCj4gKwkJCU5GU19JKGlub2RlKS0+cmVhZF9pbyArPSByZXN1 bHQ7DQoNClNob3VsZCB3ZSBwZXJoYXBzIHJhdGhlciBkbyB0aGlzIGZyb20gbmZzX3JlYWRwYWdl cygpLCBuZnNfcmVhZHBhZ2UoKQ0KYW5kIG5mc19kaXJlY3RfcmVhZCgpPw0KDQpJZiB3ZSBkbyBp dCBoZXJlIGluIG5mc19maWxlX3JlYWQsIHdlIG1pc3MgbW1hcGVkIHJlYWRzLCBPX0RJUkVDVCBy ZWFkcywNCmFzIHdlbGwgYXMgc3BsaWNlIHJlYWRzLiBXZSBhbHNvIGNvdW50IHJlYWQgY2FjaGUg aGl0cyB3aGVyZSB3ZSBkb24ndA0KaGF2ZSB0byBhY3R1YWxseSBhY2Nlc3MgdGhlIHNlcnZlci4N Cg0KPiAgCQkJbmZzX2FkZF9zdGF0cyhpbm9kZSwgTkZTSU9TX05PUk1BTFJFQURCWVRFUywgcmVz dWx0KTsNCj4gKwkJfQ0KPiAgCX0NCj4gIAlyZXR1cm4gcmVzdWx0Ow0KPiAgfQ0KPiBAQCAtNjEz LDggKzYxNSwxMCBAQCBzdGF0aWMgc3NpemVfdCBuZnNfZmlsZV93cml0ZShzdHJ1Y3Qga2lvY2Ig KmlvY2IsIGNvbnN0IHN0cnVjdCBpb3ZlYyAqaW92LA0KPiAgCQlpZiAoZXJyIDwgMCkNCj4gIAkJ CXJlc3VsdCA9IGVycjsNCj4gIAl9DQo+IC0JaWYgKHJlc3VsdCA+IDApDQo+ICsJaWYgKHJlc3Vs dCA+IDApIHsNCj4gKwkJTkZTX0koaW5vZGUpLT53cml0ZV9pbyArPSB3cml0dGVuOw0KDQpGb3Ig dGhlIHNhbWUgcmVhc29uLCBwZXJoYXBzIHdlIHNob3VsZCBtb3ZlIHRoaXMgdG8NCm5mc19kaXJl Y3Rfd3JpdGVfc2NoZWR1bGVfaW92ZWMoKSwgYW5kIG5mc193cml0ZV9lbmQoKS4NCg0KPiAgCQlu ZnNfYWRkX3N0YXRzKGlub2RlLCBORlNJT1NfTk9STUFMV1JJVFRFTkJZVEVTLCB3cml0dGVuKTsN Cj4gKwl9DQo+ICBvdXQ6DQo+ICAJcmV0dXJuIHJlc3VsdDsNCj4gIA0KPiBkaWZmIC0tZ2l0IGEv ZnMvbmZzL2lub2RlLmMgYi9mcy9uZnMvaW5vZGUuYw0KPiBpbmRleCA4ODlmN2U1Li5hNmY1ZmJi IDEwMDY0NA0KPiAtLS0gYS9mcy9uZnMvaW5vZGUuYw0KPiArKysgYi9mcy9uZnMvaW5vZGUuYw0K PiBAQCAtMzIzLDYgKzMyMyw4IEBAIG5mc19maGdldChzdHJ1Y3Qgc3VwZXJfYmxvY2sgKnNiLCBz dHJ1Y3QgbmZzX2ZoICpmaCwgc3RydWN0IG5mc19mYXR0ciAqZmF0dHIpDQo+ICAJCWlub2RlLT5p X2dpZCA9IC0yOw0KPiAgCQlpbm9kZS0+aV9ibG9ja3MgPSAwOw0KPiAgCQltZW1zZXQobmZzaS0+ Y29va2lldmVyZiwgMCwgc2l6ZW9mKG5mc2ktPmNvb2tpZXZlcmYpKTsNCj4gKwkJbmZzaS0+d3Jp dGVfaW8gPSAwOw0KPiArCQluZnNpLT5yZWFkX2lvID0gMDsNCj4gIA0KPiAgCQluZnNpLT5yZWFk X2NhY2hlX2ppZmZpZXMgPSBmYXR0ci0+dGltZV9zdGFydDsNCj4gIAkJbmZzaS0+YXR0cl9nZW5j b3VudCA9IGZhdHRyLT5nZW5jb3VudDsNCj4gZGlmZiAtLWdpdCBhL2ZzL25mcy9wbmZzLmMgYi9m cy9uZnMvcG5mcy5jDQo+IGluZGV4IGNiY2I2YWUuLjY2MjA2MDYgMTAwNjQ0DQo+IC0tLSBhL2Zz L25mcy9wbmZzLmMNCj4gKysrIGIvZnMvbmZzL3BuZnMuYw0KPiBAQCAtMzk1LDYgKzM5NSw5IEBA IG1hcmtfbWF0Y2hpbmdfbHNlZ3NfaW52YWxpZChzdHJ1Y3QgcG5mc19sYXlvdXRfaGRyICpsbywN Cj4gIAlkcHJpbnRrKCIlczpCZWdpbiBsbyAlcFxuIiwgX19mdW5jX18sIGxvKTsNCj4gIA0KPiAg CWlmIChsaXN0X2VtcHR5KCZsby0+cGxoX3NlZ3MpKSB7DQo+ICsJCS8qIFJlc2V0IE1EUyBUaHJl c2hvbGQgSS9PIGNvdW50ZXJzICovDQo+ICsJCU5GU19JKGxvLT5wbGhfaW5vZGUpLT53cml0ZV9p byA9IDA7DQo+ICsJCU5GU19JKGxvLT5wbGhfaW5vZGUpLT5yZWFkX2lvID0gMDsNCj4gIAkJaWYg KCF0ZXN0X2FuZF9zZXRfYml0KE5GU19MQVlPVVRfREVTVFJPWUVELCAmbG8tPnBsaF9mbGFncykp DQo+ICAJCQlwdXRfbGF5b3V0X2hkcl9sb2NrZWQobG8pOw0KPiAgCQlyZXR1cm4gMDsNCj4gZGlm ZiAtLWdpdCBhL2luY2x1ZGUvbGludXgvbmZzX2ZzLmggYi9pbmNsdWRlL2xpbnV4L25mc19mcy5o DQo+IGluZGV4IGNhNGE3MDcuLmM2OTU0YWMgMTAwNjQ0DQo+IC0tLSBhL2luY2x1ZGUvbGludXgv bmZzX2ZzLmgNCj4gKysrIGIvaW5jbHVkZS9saW51eC9uZnNfZnMuaA0KPiBAQCAtMjAxLDYgKzIw MSw5IEBAIHN0cnVjdCBuZnNfaW5vZGUgew0KPiAgDQo+ICAJLyogcE5GUyBsYXlvdXQgaW5mb3Jt YXRpb24gKi8NCj4gIAlzdHJ1Y3QgcG5mc19sYXlvdXRfaGRyICpsYXlvdXQ7DQo+ICsJLyogaG93 IG1hbnkgYnl0ZXMgaGF2ZSBiZWVuIHdyaXR0ZW4vcmVhZCBhbmQgaG93IG1hbnkgYnl0ZXMgcXVl dWVkIHVwICovDQo+ICsJX191NjQgd3JpdGVfaW87DQo+ICsJX191NjQgcmVhZF9pbzsNCj4gICNl bmRpZiAvKiBDT05GSUdfTkZTX1Y0Ki8NCg0KXl5eXiBUaGlzIGRvZXNuJ3QgbG9vayBhcyBpZiBp dCB3aWxsIGNvbXBpbGUgd2l0aG91dCBDT05GSUdfTkZTX1Y0Lg0KDQo+ICAjaWZkZWYgQ09ORklH X05GU19GU0NBQ0hFDQo+ICAJc3RydWN0IGZzY2FjaGVfY29va2llCSpmc2NhY2hlOw0KDQotLSAN ClRyb25kIE15a2xlYnVzdA0KTGludXggTkZTIGNsaWVudCBtYWludGFpbmVyDQoNCk5ldEFwcA0K VHJvbmQuTXlrbGVidXN0QG5ldGFwcC5jb20NCnd3dy5uZXRhcHAuY29tDQoNCg== ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold 2012-05-23 18:19 ` Myklebust, Trond @ 2012-05-23 18:41 ` Adamson, Andy 0 siblings, 0 replies; 7+ messages in thread From: Adamson, Andy @ 2012-05-23 18:41 UTC (permalink / raw) To: Myklebust, Trond; +Cc: Adamson, Andy, linux-nfs@vger.kernel.org On May 23, 2012, at 2:19 PM, Myklebust, Trond wrote: > On Wed, 2012-05-23 at 05:02 -0400, andros@netapp.com wrote: >> From: Andy Adamson <andros@netapp.com> >> >> Keep track of the number of bytes read or written, including those queued >> up to be flushed. For use by mdsthreshold i/o size hints. >> >> No locking needed as this is used as hint information. >> >> Signed-off-by: Andy Adamson <andros@netapp.com> >> --- >> fs/nfs/file.c | 8 ++++++-- >> fs/nfs/inode.c | 2 ++ >> fs/nfs/pnfs.c | 3 +++ >> include/linux/nfs_fs.h | 3 +++ >> 4 files changed, 14 insertions(+), 2 deletions(-) >> >> diff --git a/fs/nfs/file.c b/fs/nfs/file.c >> index 8eda8a6..c4cc096 100644 >> --- a/fs/nfs/file.c >> +++ b/fs/nfs/file.c >> @@ -203,8 +203,10 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, >> result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); >> if (!result) { >> result = generic_file_aio_read(iocb, iov, nr_segs, pos); >> - if (result > 0) >> + if (result > 0) { >> + NFS_I(inode)->read_io += result; > > Should we perhaps rather do this from nfs_readpages(), nfs_readpage() > and nfs_direct_read()? > > If we do it here in nfs_file_read, we miss mmaped reads, O_DIRECT reads, > as well as splice reads. Well that's not good. > We also count read cache hits where we don't > have to actually access the server. OK. > >> nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); >> + } >> } >> return result; >> } >> @@ -613,8 +615,10 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, >> if (err < 0) >> result = err; >> } >> - if (result > 0) >> + if (result > 0) { >> + NFS_I(inode)->write_io += written; > > For the same reason, perhaps we should move this to > nfs_direct_write_schedule_iovec(), and nfs_write_end(). > >> nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); >> + } >> out: >> return result; >> >> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c >> index 889f7e5..a6f5fbb 100644 >> --- a/fs/nfs/inode.c >> +++ b/fs/nfs/inode.c >> @@ -323,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) >> inode->i_gid = -2; >> inode->i_blocks = 0; >> memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); >> + nfsi->write_io = 0; >> + nfsi->read_io = 0; >> >> nfsi->read_cache_jiffies = fattr->time_start; >> nfsi->attr_gencount = fattr->gencount; >> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c >> index cbcb6ae..6620606 100644 >> --- a/fs/nfs/pnfs.c >> +++ b/fs/nfs/pnfs.c >> @@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, >> dprintk("%s:Begin lo %p\n", __func__, lo); >> >> if (list_empty(&lo->plh_segs)) { >> + /* Reset MDS Threshold I/O counters */ >> + NFS_I(lo->plh_inode)->write_io = 0; >> + NFS_I(lo->plh_inode)->read_io = 0; >> if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) >> put_layout_hdr_locked(lo); >> return 0; >> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h >> index ca4a707..c6954ac 100644 >> --- a/include/linux/nfs_fs.h >> +++ b/include/linux/nfs_fs.h >> @@ -201,6 +201,9 @@ struct nfs_inode { >> >> /* pNFS layout information */ >> struct pnfs_layout_hdr *layout; >> + /* how many bytes have been written/read and how many bytes queued up */ >> + __u64 write_io; >> + __u64 read_io; >> #endif /* CONFIG_NFS_V4*/ > > ^^^^ This doesn't look as if it will compile without CONFIG_NFS_V4. I'll fix and resend. Thanks for the review :) -->Andy > >> #ifdef CONFIG_NFS_FSCACHE >> struct fscache_cookie *fscache; > > -- > Trond Myklebust > Linux NFS client maintainer > > NetApp > Trond.Myklebust@netapp.com > www.netapp.com > ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters 2012-05-23 9:02 [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr andros 2012-05-23 9:02 ` [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN andros 2012-05-23 9:02 ` [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros @ 2012-05-23 9:02 ` andros 2012-05-23 13:25 ` Boaz Harrosh 2 siblings, 1 reply; 7+ messages in thread From: andros @ 2012-05-23 9:02 UTC (permalink / raw) To: trond.myklebust; +Cc: linux-nfs, Andy Adamson From: Andy Adamson <andros@netapp.com> Signed-off-by: Andy Adamson <andros@netapp.com> --- fs/nfs/pnfs.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 79 insertions(+), 0 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6620606..b8323aa 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -936,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, } /* + * Use mdsthreshold hints set at each OPEN to determine if I/O should go + * to the MDS or over pNFS + * + * The nfs_inode read_io and write_io fields are cumulative counters reset + * when there are no layout segments. Note that in pnfs_update_layout iomode + * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a + * WRITE request. + * + * A return of true means use MDS I/O. + * + * From rfc 5661: + * If a file's size is smaller than the file size threshold, data accesses + * SHOULD be sent to the metadata server. If an I/O request has a length that + * is below the I/O size threshold, the I/O SHOULD be sent to the metadata + * server. If both file size and I/O size are provided, the client SHOULD + * reach or exceed both thresholds before sending its read or write + * requests to the data server. + */ +static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, + struct inode *ino, int iomode) +{ + struct nfs4_threshold *t = ctx->mdsthreshold; + struct nfs_inode *nfsi = NFS_I(ino); + loff_t fsize = i_size_read(ino); + bool size = false, size_set = false, io = false, io_set = false, ret = false; + + if (t == NULL) + return ret; + + dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", + __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); + + switch (iomode) { + case IOMODE_READ: + if (t->bm & THRESHOLD_RD) { + dprintk("%s fsize %llu\n", __func__, fsize); + size_set = true; + if (fsize < t->rd_sz) + size = true; + } + if (t->bm & THRESHOLD_RD_IO) { + dprintk("%s nfsi->read_io %llu\n", __func__, + nfsi->read_io); + io_set = true; + if (nfsi->read_io < t->rd_io_sz) + io = true; + } + break; + case IOMODE_RW: + if (t->bm & THRESHOLD_WR) { + dprintk("%s fsize %llu\n", __func__, fsize); + size_set = true; + if (fsize < t->wr_sz) + size = true; + } + if (t->bm & THRESHOLD_WR_IO) { + dprintk("%s nfsi->write_io %llu\n", __func__, + nfsi->write_io); + io_set = true; + if (nfsi->write_io < t->wr_io_sz) + io = true; + } + break; + } + if (size_set && io_set) { + if (size && io) + ret = true; + } else if (size || io) + ret = true; + + dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); + return ret; +} + +/* * Layout segment is retreived from the server if not cached. * The appropriate layout segment is referenced and returned to the caller. */ @@ -962,6 +1037,10 @@ pnfs_update_layout(struct inode *ino, if (!pnfs_enabled_sb(NFS_SERVER(ino))) return NULL; + + if (pnfs_within_mdsthreshold(ctx, ino, iomode)) + return NULL; + spin_lock(&ino->i_lock); lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); if (lo == NULL) { -- 1.7.7.6 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters 2012-05-23 9:02 ` [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters andros @ 2012-05-23 13:25 ` Boaz Harrosh 0 siblings, 0 replies; 7+ messages in thread From: Boaz Harrosh @ 2012-05-23 13:25 UTC (permalink / raw) To: andros; +Cc: trond.myklebust, linux-nfs On 05/23/2012 12:02 PM, andros@netapp.com wrote: > From: Andy Adamson <andros@netapp.com> > > Signed-off-by: Andy Adamson <andros@netapp.com> > --- > fs/nfs/pnfs.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 files changed, 79 insertions(+), 0 deletions(-) > > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c > index 6620606..b8323aa 100644 > --- a/fs/nfs/pnfs.c > +++ b/fs/nfs/pnfs.c > @@ -936,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, > } > > /* > + * Use mdsthreshold hints set at each OPEN to determine if I/O should go > + * to the MDS or over pNFS > + * > + * The nfs_inode read_io and write_io fields are cumulative counters reset > + * when there are no layout segments. Note that in pnfs_update_layout iomode > + * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a > + * WRITE request. > + * > + * A return of true means use MDS I/O. > + * > + * From rfc 5661: > + * If a file's size is smaller than the file size threshold, data accesses > + * SHOULD be sent to the metadata server. If an I/O request has a length that > + * is below the I/O size threshold, the I/O SHOULD be sent to the metadata > + * server. If both file size and I/O size are provided, the client SHOULD > + * reach or exceed both thresholds before sending its read or write > + * requests to the data server. > + */ > +static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, > + struct inode *ino, int iomode) > +{ > + struct nfs4_threshold *t = ctx->mdsthreshold; > + struct nfs_inode *nfsi = NFS_I(ino); > + loff_t fsize = i_size_read(ino); > + bool size = false, size_set = false, io = false, io_set = false, ret = false; > + > + if (t == NULL) > + return ret; > + > + dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", > + __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); > + > + switch (iomode) { > + case IOMODE_READ: > + if (t->bm & THRESHOLD_RD) { > + dprintk("%s fsize %llu\n", __func__, fsize); > + size_set = true; > + if (fsize < t->rd_sz) > + size = true; > + } > + if (t->bm & THRESHOLD_RD_IO) { > + dprintk("%s nfsi->read_io %llu\n", __func__, > + nfsi->read_io); > + io_set = true; > + if (nfsi->read_io < t->rd_io_sz) > + io = true; > + } > + break; > + case IOMODE_RW: > + if (t->bm & THRESHOLD_WR) { > + dprintk("%s fsize %llu\n", __func__, fsize); > + size_set = true; > + if (fsize < t->wr_sz) > + size = true; > + } > + if (t->bm & THRESHOLD_WR_IO) { > + dprintk("%s nfsi->write_io %llu\n", __func__, > + nfsi->write_io); > + io_set = true; > + if (nfsi->write_io < t->wr_io_sz) > + io = true; > + } > + break; > + } > + if (size_set && io_set) { > + if (size && io) > + ret = true; > + } else if (size || io) > + ret = true; > + > + dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); > + return ret; > +} > + > +/* > * Layout segment is retreived from the server if not cached. > * The appropriate layout segment is referenced and returned to the caller. > */ > @@ -962,6 +1037,10 @@ pnfs_update_layout(struct inode *ino, > > if (!pnfs_enabled_sb(NFS_SERVER(ino))) > return NULL; > + > + if (pnfs_within_mdsthreshold(ctx, ino, iomode)) > + return NULL; > + Would we want to use these counters as the recommended layout_size in read and write, instead of current's PAGE_SIZE? Boaz > spin_lock(&ino->i_lock); > lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); > if (lo == NULL) { ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2012-05-23 18:41 UTC | newest] Thread overview: 7+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2012-05-23 9:02 [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr andros 2012-05-23 9:02 ` [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN andros 2012-05-23 9:02 ` [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros 2012-05-23 18:19 ` Myklebust, Trond 2012-05-23 18:41 ` Adamson, Andy 2012-05-23 9:02 ` [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters andros 2012-05-23 13:25 ` Boaz Harrosh
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).