From: Wendy Cheng <wcheng@redhat.com>
To: nfs@lists.sourceforge.net, cluster-devel@redhat.com
Cc: Lon Hohberger <lhh@redhat.com>
Subject: [PATCH 3/4 Revised] NLM - kernel lockd-statd changes
Date: Thu, 05 Apr 2007 17:52:32 -0400 [thread overview]
Message-ID: <46156FA0.4030506@redhat.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 780 bytes --]
This kernel patch (based on 2.6.21-rc4) should be paired with nfs-utils
user mode changes (patch 4-4, based on nfs-utils-1.1.0-rc1) that is
optional. If changes made in patch 4-4 is not presented in nfs-utils,
the rpc.statd will ignore whatever this kernel patch does.
The changes record the ip interface that accepts the lock requests and
passes the correct "my_name" (in standard IPV4 dot notation) to user
mode statd (instead of system_utsname.nodename). This enables rpc.statd
to add the correct taken-over IPv4 address into the 3rd parameter of
ha_callout program. Current nfs-utils always resets "my_name" into
loopback address (127.0.0.1), regardless the statement made in rpc.statd
man page. Check out "man rpc.statd" and "man sm-notify" for details.
-- Wendy
[-- Attachment #2: 003_nlm_statd.patch --]
[-- Type: text/x-patch, Size: 14486 bytes --]
Signed-off-by: S. Wendy Cheng <wcheng@redhat.com>
Signed-off-by: Lon Hohberger <lhh@redhat.com>
fs/lockd/clntproc.c | 2
fs/lockd/host.c | 61 +++++++++++++++++++-----
fs/lockd/mon.c | 104 +++++++++++++++++++++++++++++++++++------
include/linux/lockd/lockd.h | 11 +++-
include/linux/lockd/sm_inter.h | 3 -
net/sunrpc/svcsock.c | 40 +++++++++++++++
6 files changed, 191 insertions(+), 30 deletions(-)
--- linux-nlm-2/include/linux/lockd/sm_inter.h 2007-03-26 18:25:38.000000000 -0400
+++ linux/include/linux/lockd/sm_inter.h 2007-04-03 21:55:42.000000000 -0400
@@ -25,6 +25,7 @@
*/
struct nsm_args {
__be32 addr; /* remote address */
+ __be32 serv; /* server ip address */
u32 prog; /* RPC callback info */
u32 vers;
u32 proc;
@@ -40,7 +41,7 @@ struct nsm_res {
u32 state;
};
-int nsm_monitor(struct nlm_host *);
+int nsm_monitor(struct nlm_host *, __be32 ip);
int nsm_unmonitor(struct nlm_host *);
extern int nsm_local_state;
--- linux-nlm-2/include/linux/lockd/lockd.h 2007-03-26 18:25:38.000000000 -0400
+++ linux/include/linux/lockd/lockd.h 2007-04-04 10:45:14.000000000 -0400
@@ -39,12 +39,12 @@
struct nlm_host {
struct hlist_node h_hash; /* doubly linked list */
struct sockaddr_in h_addr; /* peer address */
+ __be32 h_server; /* server ip for NLM failover */
struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */
char * h_name; /* remote hostname */
u32 h_version; /* interface version */
unsigned short h_proto; /* transport proto */
unsigned short h_reclaiming : 1,
- h_server : 1, /* server side, not client side */
h_inuse : 1;
wait_queue_head_t h_gracewait; /* wait while reclaiming */
struct rw_semaphore h_rwsem; /* Reboot recovery lock */
@@ -62,11 +62,18 @@ struct nlm_host {
struct nsm_handle * h_nsmhandle; /* NSM status handle */
};
+struct nsm_fo_monitored {
+ struct list_head list;
+ __be32 addr;
+};
+
struct nsm_handle {
struct list_head sm_link;
atomic_t sm_count;
char * sm_name;
struct sockaddr_in sm_addr;
+ struct mutex sm_mutex;
+ struct nsm_fo_monitored sm_serverip;
unsigned int sm_monitored : 1,
sm_sticky : 1; /* don't unmonitor */
};
@@ -254,7 +261,7 @@ static inline int
nlmsvc_check_grace_period(struct nlm_args *argp)
{
/* check for system wide grace period */
- if (nlmsvc_grace_period)
+ if (nlmsvc_grace_period)
return 1;
/* check for per exported fsid grace period */
--- linux-nlm-2/net/sunrpc/svcsock.c 2007-03-26 18:26:06.000000000 -0400
+++ linux/net/sunrpc/svcsock.c 2007-04-04 17:09:15.000000000 -0400
@@ -1111,6 +1111,44 @@ failed:
return;
}
+/* Added for NLM-cluster failover implementation */
+static inline void svc_tcp_get_server_address(struct svc_rqst *rqstp)
+{
+ struct socket *sock = rqstp->rq_sock->sk_sock;
+ struct sockaddr_in6 sin6;
+ struct sockaddr_in *sin = (struct sockaddr_in *) &sin6;
+ int len, err;
+
+ /* ref: inet_getname, inet6_getname, and sys_getsockname */
+ err = sock->ops->getname(sock, (struct sockaddr *) sin, &len, 0);
+ if (err) {
+ dprintk("svc_tcp_get_server_address: getname err=%d\n", err);
+ return;
+ }
+
+ switch (rqstp->rq_sock->sk_sk->sk_family) {
+ case AF_INET:
+ /* sanity check */
+ if (sin->sin_family != AF_INET)
+ printk("sunrpc: inet address family mismatch %d\n",
+ (int) sin->sin_family);
+ rqstp->rq_daddr.addr = sin->sin_addr;
+ break;
+ case AF_INET6:
+ /* sanity check */
+ if (sin6.sin6_family != AF_INET6)
+ printk("sunrpc: inet6 address family mismatch %d\n",
+ (int) sin6.sin6_family);
+ ipv6_addr_copy(&rqstp->rq_daddr.addr6, &sin6.sin6_addr);
+ break;
+ default:
+ break;
+ }
+
+ /* no error return */
+ return;
+}
+
/*
* Receive data from a TCP socket.
*/
@@ -1260,6 +1298,8 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
if (serv->sv_stats)
serv->sv_stats->nettcpcnt++;
+ svc_tcp_get_server_address(rqstp);
+
return len;
err_delete:
--- linux-nlm-2/fs/lockd/host.c 2007-03-26 18:19:11.000000000 -0400
+++ linux/fs/lockd/host.c 2007-04-04 12:02:50.000000000 -0400
@@ -34,16 +34,16 @@ static DEFINE_MUTEX(nlm_host_mutex);
static void nlm_gc_hosts(void);
static struct nsm_handle * __nsm_find(const struct sockaddr_in *,
- const char *, int, int);
+ const char *, int, int, __be32);
static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
const char *hostname,
- int hostname_len);
+ int hostname_len, __be32 ip);
/*
* Common host lookup routine for server & client
*/
static struct nlm_host *
-nlm_lookup_host(int server, const struct sockaddr_in *sin,
+nlm_lookup_host(union svc_addr_u *server, const struct sockaddr_in *sin,
int proto, int version,
const char *hostname,
int hostname_len)
@@ -53,6 +53,7 @@ nlm_lookup_host(int server, const struct
struct nlm_host *host;
struct nsm_handle *nsm = NULL;
int hash;
+ __be32 server_ip;
dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
NIPQUAD(sin->sin_addr.s_addr), proto, version,
@@ -60,6 +61,13 @@ nlm_lookup_host(int server, const struct
hostname_len,
hostname? hostname : "<none>");
+ /* NLM failover: ipv4 for now */
+ if (server)
+ server_ip = server->addr.s_addr;
+ else
+ server_ip = 0;
+
+ dprintk("lockd: server_ip = %u.%u.%u.%u\n", NIPQUAD(server_ip));
hash = NLM_ADDRHASH(sin->sin_addr.s_addr);
@@ -89,7 +97,7 @@ nlm_lookup_host(int server, const struct
continue;
if (host->h_version != version)
continue;
- if (host->h_server != server)
+ if (host->h_server != server_ip)
continue;
/* Move to head of hash chain. */
@@ -107,7 +115,7 @@ nlm_lookup_host(int server, const struct
/* Sadly, the host isn't in our hash table yet. See if
* we have an NSM handle for it. If not, create one.
*/
- if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len)))
+ if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len, server_ip)))
goto out;
host = kzalloc(sizeof(*host), GFP_KERNEL);
@@ -130,7 +138,10 @@ nlm_lookup_host(int server, const struct
host->h_state = 0; /* pseudo NSM state */
host->h_nsmstate = 0; /* real NSM state */
host->h_nsmhandle = nsm;
- host->h_server = server;
+
+ /* NLM failover: only ipv4 for now */
+ host->h_server = server_ip;
+
hlist_add_head(&host->h_hash, chain);
INIT_LIST_HEAD(&host->h_lockowners);
spin_lock_init(&host->h_lock);
@@ -180,7 +191,7 @@ struct nlm_host *
nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
const char *hostname, int hostname_len)
{
- return nlm_lookup_host(0, sin, proto, version,
+ return nlm_lookup_host(NULL, sin, proto, version,
hostname, hostname_len);
}
@@ -191,7 +202,7 @@ struct nlm_host *
nlmsvc_lookup_host(struct svc_rqst *rqstp,
const char *hostname, int hostname_len)
{
- return nlm_lookup_host(1, svc_addr_in(rqstp),
+ return nlm_lookup_host(&rqstp->rq_daddr, svc_addr_in(rqstp),
rqstp->rq_prot, rqstp->rq_vers,
hostname, hostname_len);
}
@@ -314,7 +325,7 @@ void nlm_host_rebooted(const struct sock
hostname, NIPQUAD(sin->sin_addr));
/* Find the NSM handle for this peer */
- if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0)))
+ if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0, 0)))
return;
/* When reclaiming locks on this peer, make sure that
@@ -445,7 +456,7 @@ static DEFINE_MUTEX(nsm_mutex);
static struct nsm_handle *
__nsm_find(const struct sockaddr_in *sin,
const char *hostname, int hostname_len,
- int create)
+ int create, __be32 server_ip)
{
struct nsm_handle *nsm = NULL;
struct list_head *pos;
@@ -490,6 +501,11 @@ __nsm_find(const struct sockaddr_in *sin
atomic_set(&nsm->sm_count, 1);
list_add(&nsm->sm_link, &nsm_handles);
+
+ /* NLM failover */
+ mutex_init(&nsm->sm_mutex);
+ INIT_LIST_HEAD(&nsm->sm_serverip.list);
+ nsm->sm_serverip.addr = server_ip;
}
out:
@@ -498,9 +514,28 @@ out:
}
static struct nsm_handle *
-nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len)
+nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len,
+ __be32 server_ip)
+{
+ return __nsm_find(sin, hostname, hostname_len, 1, server_ip);
+}
+
+/*
+ * NLM failover:
+ * nsm_mutex should be obtained before entry
+ * fo_ip not NULL
+ */
+void
+nsm_release_fo_ip(struct nsm_fo_monitored *fo_ip)
{
- return __nsm_find(sin, hostname, hostname_len, 1);
+ struct list_head *pos, *n, *head=&fo_ip->list;
+ struct nsm_fo_monitored *server_ip;
+
+ list_for_each_safe(pos, n, head) {
+ server_ip = list_entry(pos, struct nsm_fo_monitored, list);
+ kfree(server_ip);
+ }
+ return;
}
/*
@@ -515,6 +550,8 @@ nsm_release(struct nsm_handle *nsm)
mutex_lock(&nsm_mutex);
if (atomic_read(&nsm->sm_count) == 0) {
list_del(&nsm->sm_link);
+ if (!list_empty(&nsm->sm_serverip.list))
+ nsm_release_fo_ip(&nsm->sm_serverip);
kfree(nsm);
}
mutex_unlock(&nsm_mutex);
--- linux-nlm-2/fs/lockd/mon.c 2007-03-26 18:19:10.000000000 -0400
+++ linux/fs/lockd/mon.c 2007-04-04 16:11:05.000000000 -0400
@@ -30,7 +30,7 @@ int nsm_local_state;
* Common procedure for SM_MON/SM_UNMON calls
*/
static int
-nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
+nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, __be32 server_ip)
{
struct rpc_clnt *clnt;
int status;
@@ -48,6 +48,12 @@ nsm_mon_unmon(struct nsm_handle *nsm, u3
memset(&args, 0, sizeof(args));
args.mon_name = nsm->sm_name;
+
+ /* NLM failover:
+ * only IPV4 is supported at this moment
+ */
+ args.serv = server_ip;
+
args.addr = nsm->sm_addr.sin_addr.s_addr;
args.prog = NLM_PROGRAM;
args.vers = 3;
@@ -65,28 +71,71 @@ nsm_mon_unmon(struct nsm_handle *nsm, u3
return status;
}
+static inline
+int nsm_is_monitored(struct nlm_host *host, __be32 server)
+{
+ struct nsm_handle *nsm = host->h_nsmhandle;
+ struct list_head *pos, *head;
+ struct nsm_fo_monitored *fo_entry;
+
+ /* client */
+ if (!server)
+ return nsm->sm_monitored;
+
+ /* server */
+ if (!nsm->sm_monitored)
+ return 0;
+
+ /* search for monitored list */
+ mutex_lock(&nsm->sm_mutex);
+ head = &nsm->sm_serverip.list;
+ list_for_each(pos, head) {
+ fo_entry = list_entry(pos, struct nsm_fo_monitored, list);
+ if (fo_entry->addr == server) {
+ mutex_unlock(&nsm->sm_mutex);
+ return 1;
+ }
+ }
+ mutex_unlock(&nsm->sm_mutex);
+
+ return 0;
+}
+
/*
* Set up monitoring of a remote host
*/
int
-nsm_monitor(struct nlm_host *host)
+nsm_monitor(struct nlm_host *host, __be32 server)
{
struct nsm_handle *nsm = host->h_nsmhandle;
struct nsm_res res;
int status;
+ struct nsm_fo_monitored *fo_entry;
dprintk("lockd: nsm_monitor(%s)\n", host->h_name);
BUG_ON(nsm == NULL);
- if (nsm->sm_monitored)
+ if (nsm_is_monitored(host, server)) {
+ dprintk("nsm_monitor: sm_monitored is true - returning 0\n");
return 0;
+ }
- status = nsm_mon_unmon(nsm, SM_MON, &res);
+ status = nsm_mon_unmon(nsm, SM_MON, &res, server);
if (status < 0 || res.status != 0)
printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name);
- else
+ else if (nsm->sm_monitored) {
+ fo_entry = kzalloc(sizeof(struct nsm_fo_monitored), GFP_KERNEL);
+ if (!fo_entry) {
+ printk("lockd: out of memory, can't add fo_entry\n");
+ return -ENOMEM;
+ }
+ fo_entry->addr = server;
+ INIT_LIST_HEAD(&fo_entry->list);
+ list_add(&fo_entry->list, &nsm->sm_serverip.list);
+ } else
nsm->sm_monitored = 1;
+
return status;
}
@@ -98,7 +147,9 @@ nsm_unmonitor(struct nlm_host *host)
{
struct nsm_handle *nsm = host->h_nsmhandle;
struct nsm_res res;
- int status = 0;
+ int status = 0, error=0;
+ struct list_head *pos, *head=&nsm->sm_serverip.list;
+ struct nsm_fo_monitored *fo_entry;
if (nsm == NULL)
return 0;
@@ -108,11 +159,21 @@ nsm_unmonitor(struct nlm_host *host)
&& nsm->sm_monitored && !nsm->sm_sticky) {
dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
- status = nsm_mon_unmon(nsm, SM_UNMON, &res);
- if (status < 0)
- printk(KERN_NOTICE "lockd: cannot unmonitor %s\n",
+ /* Unmonitor each server IP
+ * todo: need to re-think error handling
+ */
+ mutex_lock(&nsm->sm_mutex);
+ list_for_each(pos, head) {
+ fo_entry = list_entry(pos, struct nsm_fo_monitored, list);
+ status = nsm_mon_unmon(nsm, SM_UNMON, &res, fo_entry->addr);
+ if (status < 0) {
+ error++;
+ printk(KERN_NOTICE "lockd: cannot unmonitor %s\n",
host->h_name);
- else
+ }
+ }
+ mutex_unlock(&nsm->sm_mutex);
+ if (!error)
nsm->sm_monitored = 0;
}
nsm_release(nsm);
@@ -144,6 +205,13 @@ nsm_create(void)
return rpc_create(&args);
}
+/* We want "buffer" in xdr_encode_common() to hold
+ * either the system_utsname.nodename string (__NEW_UTS_LEN+1)
+ * or IPv4 dot notation (16 bytes+1) for now.
+ */
+
+#define XDR_ENCODE_BUF_LEN __NEW_UTS_LEN+1
+
/*
* XDR functions for NSM.
*/
@@ -151,7 +219,8 @@ nsm_create(void)
static __be32 *
xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
{
- char buffer[20], *name;
+ char *name;
+ char buffer[XDR_ENCODE_BUF_LEN];
/*
* Use the dotted-quad IP address of the remote host as
@@ -161,13 +230,20 @@ xdr_encode_common(struct rpc_rqst *rqstp
*/
if (nsm_use_hostnames) {
name = argp->mon_name;
- } else {
+ } else {
sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
name = buffer;
}
- if (!(p = xdr_encode_string(p, name))
- || !(p = xdr_encode_string(p, utsname()->nodename)))
+ if (!(p = xdr_encode_string(p, name)))
+ return ERR_PTR(-EIO);
+
+ if (argp->serv)
+ sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->serv));
+ else
+ sprintf(buffer, "%s", utsname()->nodename);
+ if (!(p = xdr_encode_string(p, buffer)))
return ERR_PTR(-EIO);
+
*p++ = htonl(argp->prog);
*p++ = htonl(argp->vers);
*p++ = htonl(argp->proc);
--- linux-nlm-2/fs/lockd/clntproc.c 2007-03-26 18:19:10.000000000 -0400
+++ linux/fs/lockd/clntproc.c 2007-04-03 21:49:04.000000000 -0400
@@ -500,7 +500,7 @@ nlmclnt_lock(struct nlm_rqst *req, struc
unsigned char fl_flags = fl->fl_flags;
int status = -ENOLCK;
- if (nsm_monitor(host) < 0) {
+ if (nsm_monitor(host, 0) < 0) {
printk(KERN_NOTICE "lockd: failed to monitor %s\n",
host->h_name);
goto out;
[-- Attachment #3: Type: text/plain, Size: 345 bytes --]
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
[-- Attachment #4: Type: text/plain, Size: 140 bytes --]
_______________________________________________
NFS maillist - NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs
next reply other threads:[~2007-04-05 22:13 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-04-05 21:52 Wendy Cheng [this message]
2007-04-10 9:09 ` [PATCH 3/4 Revised] NLM - kernel lockd-statd changes Olaf Kirch
2007-04-10 14:41 ` Lon Hohberger
2007-04-10 15:00 ` Wendy Cheng
2007-04-10 18:16 ` Wendy Cheng
2007-04-11 4:50 ` Neil Brown
2007-04-13 19:16 ` Lon Hohberger
2007-04-13 19:31 ` Wendy Cheng
2007-04-17 11:52 ` Olaf Kirch
2007-04-17 13:24 ` Wendy Cheng
2007-04-17 14:51 ` Olaf Kirch
2007-04-17 15:09 ` Wendy Cheng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=46156FA0.4030506@redhat.com \
--to=wcheng@redhat.com \
--cc=cluster-devel@redhat.com \
--cc=lhh@redhat.com \
--cc=nfs@lists.sourceforge.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox