linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: andros@netapp.com
To: trond.myklebust@netapp.com
Cc: linux-nfs@vger.kernel.org, Andy Adamson <andros@netapp.com>
Subject: [PATCH 16/16] pnfs: wave 3: turn off pNFS on ds connection failure
Date: Mon, 14 Feb 2011 14:18:36 -0500	[thread overview]
Message-ID: <1297711116-3139-17-git-send-email-andros@netapp.com> (raw)
In-Reply-To: <1297711116-3139-1-git-send-email-andros@netapp.com>

From: Andy Adamson <andros@netapp.com>

If a data server is unavailable, go through MDS.

Mark the deviceid containing the data server as a negative cache entry.
Do not try to connect to any data server on a deviceid marked as a negative
cache entry. Mark any layout that tries to use the marked deviceid as failed.

Inodes with a layout marked as fails will not use the layout for I/O, and will
not perform any more layoutgets.
Inodes without a layout will still do layoutget, but the layout will get
marked immediately.

Signed-off-by: Andy Adamson <andros@netapp.com>
---
 fs/nfs/nfs4filelayout.c    |    4 +++-
 fs/nfs/nfs4filelayout.h    |    3 +++
 fs/nfs/nfs4filelayoutdev.c |   27 +++++++++++++++++++++++----
 fs/nfs/pnfs.c              |   18 ++++++++++++++----
 fs/nfs/pnfs.h              |    4 ++++
 5 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index c818042..3768377 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -232,7 +232,9 @@ filelayout_read_pagelist(struct nfs_read_data *data)
 	idx = nfs4_fl_calc_ds_index(lseg, j);
 	ds = nfs4_fl_prepare_ds(lseg, idx);
 	if (!ds) {
-		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
+		/* Either layout fh index faulty, or ds connect failed */
+		set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
+		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
 		return PNFS_NOT_ATTEMPTED;
 	}
 	dprintk("%s USE DS:ip %x %hu\n", __func__,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 9fef76e..1809aa6 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -97,5 +97,8 @@ extern struct nfs4_file_layout_dsaddr *
 nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id);
 struct nfs4_file_layout_dsaddr *
 get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
+void filelayout_mark_devid_negative(struct nfs_client *clp,
+				    struct pnfs_deviceid_node *devid,
+				    int err, u32 ds_ipaddr);
 
 #endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index e8496f3..b8b3dbb 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -553,6 +553,19 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
 		i = j;
 	return flseg->fh_array[i];
 }
+void
+filelayout_mark_devid_negative(struct nfs_client *mds_clp,
+			       struct pnfs_deviceid_node *devid,
+			       int err, u32 ds_addr)
+{
+	u32 *p = (u32 *)&devid->de_id;
+
+	printk(KERN_ERR "NFS: data server %x connection error %d."
+			" Deviceid [%x%x%x%x] marked out of use.\n",
+			ds_addr, err, p[0], p[1], p[2], p[3]);
+
+	pnfs_mark_devid_negative(mds_clp, devid);
+}
 
 struct nfs4_pnfs_ds *
 nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
@@ -567,13 +580,19 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 	}
 
 	if (!ds->ds_clp) {
+		struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
 		int err;
 
-		err = nfs4_ds_connect(NFS_SERVER(lseg->pls_layout->plh_inode),
-					  dsaddr->ds_list[ds_idx]);
+		if (dsaddr->deviceid.de_flags & NFS4_DEVICE_ID_NEG_ENTRY) {
+			/* Already tried to connect, don't try again */
+			dprintk("%s Deviceid marked out of use\n", __func__);
+			return NULL;
+		}
+		err = nfs4_ds_connect(s, ds);
 		if (err) {
-			printk(KERN_ERR "%s nfs4_ds_connect error %d\n",
-			       __func__, err);
+			filelayout_mark_devid_negative(s->nfs_client,
+						       &dsaddr->deviceid, err,
+						       ntohl(ds->ds_ip_addr));
 			return NULL;
 		}
 	}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6f4a5ab..912b1ff 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -761,15 +761,16 @@ pnfs_update_layout(struct inode *ino,
 		dprintk("%s matches recall, use MDS\n", __func__);
 		goto out_unlock;
 	}
+
+	/* If LAYOUTGET or pNFS I/O already failed once we don't try again */
+	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
+		goto out_unlock;
+
 	/* Check to see if the layout for the given range already exists */
 	lseg = pnfs_find_lseg(lo, iomode);
 	if (lseg)
 		goto out_unlock;
 
-	/* if LAYOUTGET already failed once we don't try again */
-	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
-		goto out_unlock;
-
 	if (pnfs_layoutgets_blocked(lo, NULL, 0))
 		goto out_unlock;
 	atomic_inc(&lo->plh_outstanding);
@@ -1052,3 +1053,12 @@ pnfs_put_deviceid_cache(struct nfs_client *clp)
 	}
 }
 EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
+
+void
+pnfs_mark_devid_negative(struct nfs_client *clp, struct pnfs_deviceid_node *d)
+{
+	spin_lock(&clp->cl_devid_cache->dc_lock);
+	d->de_flags |= NFS4_DEVICE_ID_NEG_ENTRY;
+	spin_unlock(&clp->cl_devid_cache->dc_lock);
+}
+EXPORT_SYMBOL_GPL(pnfs_mark_devid_negative);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 585023f..a760363 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -131,6 +131,8 @@ struct pnfs_deviceid_node {
 	struct hlist_node	de_node;
 	struct nfs4_deviceid	de_id;
 	atomic_t		de_ref;
+	unsigned long		de_flags;
+#define NFS4_DEVICE_ID_NEG_ENTRY		1
 };
 
 struct pnfs_deviceid_cache {
@@ -151,6 +153,8 @@ extern struct pnfs_deviceid_node *pnfs_add_deviceid(
 				struct pnfs_deviceid_node *);
 extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
 			      struct pnfs_deviceid_node *devid);
+extern void pnfs_mark_devid_negative(struct nfs_client *clp,
+				     struct pnfs_deviceid_node *d);
 
 extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
 extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
-- 
1.7.2.3


  parent reply	other threads:[~2011-02-14 19:19 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-02-14 19:18 [PATCH 0/16] pnfs wave 3 submission andros
2011-02-14 19:18 ` [PATCH 01/16] NFS remove unnecessary CONFIG_NFS_V4 from nfs_read_data andros
2011-02-15  9:16   ` Christoph Hellwig
2011-02-15  9:24     ` Taousif_Ansari-G5Y5guI6XLZWk0Htik3J/w
2011-02-15 14:51     ` Andy Adamson
2011-02-14 19:18 ` [PATCH 02/16] NFS put_layout_hdr can remove nfsi->layout andros
2011-02-14 19:18 ` [PATCH 03/16] NFS move nfs_client initialization into nfs_get_client andros
2011-02-16  2:58   ` Benny Halevy
2011-02-16 16:00     ` Andy Adamson
2011-02-14 19:18 ` [PATCH 04/16] pnfs: wave 3: send zero stateid seqid on v4.1 i/o andros
2011-02-14 19:18 ` [PATCH 05/16] pnfs: wave 3: new flag for state renewal check andros
2011-02-14 19:18 ` [PATCH 06/16] pnfs: wave 3: new flag for lease time check andros
2011-02-14 19:18 ` [PATCH 07/16] pnfs: wave 3: add MDS mount DS only check andros
2011-02-14 19:18 ` [PATCH 08/16] pnfs: wave 3: lseg refcounting andros
2011-02-15  9:25   ` Christoph Hellwig
2011-02-15 14:48     ` Fred Isaman
2011-02-15 14:58       ` Christoph Hellwig
2011-02-15 14:59         ` Benny Halevy
2011-02-15 15:06           ` Christoph Hellwig
2011-02-15 15:11             ` Fred Isaman
2011-02-15 16:02             ` Christoph Hellwig
2011-02-15 16:37               ` William A. (Andy) Adamson
2011-02-15 19:17                 ` Andy Adamson
2011-02-15 19:29                   ` Benny Halevy
2011-02-15 19:30                     ` Andy Adamson
2011-02-15 15:07         ` Fred Isaman
2011-02-14 19:18 ` [PATCH 09/16] pnfs: wave 3: shift pnfs_update_layout locations andros
2011-02-14 23:14   ` Trond Myklebust
2011-02-15 14:41     ` Fred Isaman
2011-02-15 15:00       ` Trond Myklebust
2011-02-16  3:11       ` Benny Halevy
2011-02-14 19:18 ` [PATCH 10/16] pnfs: wave 3: coelesce across layout stripes andros
2011-02-14 23:42   ` Trond Myklebust
2011-02-15 14:43     ` William A. (Andy) Adamson
2011-02-15 15:03       ` Trond Myklebust
     [not found]         ` <1297782220.10103.13.camel-rJ7iovZKK19ZJLDQqaL3InhyD016LWXt@public.gmane.org>
2011-02-15 15:10           ` Andy Adamson
2011-02-14 19:18 ` [PATCH 11/16] pnfs: wave 3: generic read andros
2011-02-14 23:36   ` Trond Myklebust
2011-02-15 14:47     ` Andy Adamson
2011-02-16  3:16   ` Benny Halevy
2011-02-16 14:53     ` Andy Adamson
2011-02-16 15:09       ` Trond Myklebust
2011-02-16 15:52         ` Benny Halevy
2011-02-16 15:56           ` Andy Adamson
2011-02-16 15:57           ` Sager, Mike
2011-02-14 19:18 ` [PATCH 12/16] pnfs: wave 3: data server connection andros
2011-02-14 19:18 ` [PATCH 13/16] pnfs: wave 3: filelayout i/o helpers andros
2011-02-15  9:31   ` Christoph Hellwig
2011-02-15 15:12     ` Andy Adamson
2011-02-14 19:18 ` [PATCH 14/16] pnfs: wave 3: filelayout read andros
2011-02-14 19:18 ` [PATCH 15/16] pnfs: wave 3: filelayout async error handler andros
2011-02-14 19:18 ` andros [this message]
2011-02-14 22:39 ` [PATCH 0/16] pnfs wave 3 submission Trond Myklebust
2011-02-15 14:44   ` William A. (Andy) Adamson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1297711116-3139-17-git-send-email-andros@netapp.com \
    --to=andros@netapp.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=trond.myklebust@netapp.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).