From: Wendy Cheng <wcheng@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH 3/4 Revised] NLM - kernel lockd-statd changes
Date: Thu, 05 Apr 2007 17:52:32 -0400 [thread overview]
Message-ID: <46156FA0.4030506@redhat.com> (raw)
This kernel patch (based on 2.6.21-rc4) should be paired with nfs-utils
user mode changes (patch 4-4, based on nfs-utils-1.1.0-rc1) that is
optional. If changes made in patch 4-4 is not presented in nfs-utils,
the rpc.statd will ignore whatever this kernel patch does.
The changes record the ip interface that accepts the lock requests and
passes the correct "my_name" (in standard IPV4 dot notation) to user
mode statd (instead of system_utsname.nodename). This enables rpc.statd
to add the correct taken-over IPv4 address into the 3rd parameter of
ha_callout program. Current nfs-utils always resets "my_name" into
loopback address (127.0.0.1), regardless the statement made in rpc.statd
man page. Check out "man rpc.statd" and "man sm-notify" for details.
-- Wendy
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 003_nlm_statd.patch
Type: text/x-patch
Size: 14486 bytes
Desc: not available
URL: <http://listman.redhat.com/archives/cluster-devel/attachments/20070405/7f776025/attachment.bin>
WARNING: multiple messages have this Message-ID (diff)
From: Wendy Cheng <wcheng@redhat.com>
To: nfs@lists.sourceforge.net, cluster-devel@redhat.com
Cc: Lon Hohberger <lhh@redhat.com>
Subject: [PATCH 3/4 Revised] NLM - kernel lockd-statd changes
Date: Thu, 05 Apr 2007 17:52:32 -0400 [thread overview]
Message-ID: <46156FA0.4030506@redhat.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 780 bytes --]
This kernel patch (based on 2.6.21-rc4) should be paired with nfs-utils
user mode changes (patch 4-4, based on nfs-utils-1.1.0-rc1) that is
optional. If changes made in patch 4-4 is not presented in nfs-utils,
the rpc.statd will ignore whatever this kernel patch does.
The changes record the ip interface that accepts the lock requests and
passes the correct "my_name" (in standard IPV4 dot notation) to user
mode statd (instead of system_utsname.nodename). This enables rpc.statd
to add the correct taken-over IPv4 address into the 3rd parameter of
ha_callout program. Current nfs-utils always resets "my_name" into
loopback address (127.0.0.1), regardless the statement made in rpc.statd
man page. Check out "man rpc.statd" and "man sm-notify" for details.
-- Wendy
[-- Attachment #2: 003_nlm_statd.patch --]
[-- Type: text/x-patch, Size: 14486 bytes --]
Signed-off-by: S. Wendy Cheng <wcheng@redhat.com>
Signed-off-by: Lon Hohberger <lhh@redhat.com>
fs/lockd/clntproc.c | 2
fs/lockd/host.c | 61 +++++++++++++++++++-----
fs/lockd/mon.c | 104 +++++++++++++++++++++++++++++++++++------
include/linux/lockd/lockd.h | 11 +++-
include/linux/lockd/sm_inter.h | 3 -
net/sunrpc/svcsock.c | 40 +++++++++++++++
6 files changed, 191 insertions(+), 30 deletions(-)
--- linux-nlm-2/include/linux/lockd/sm_inter.h 2007-03-26 18:25:38.000000000 -0400
+++ linux/include/linux/lockd/sm_inter.h 2007-04-03 21:55:42.000000000 -0400
@@ -25,6 +25,7 @@
*/
struct nsm_args {
__be32 addr; /* remote address */
+ __be32 serv; /* server ip address */
u32 prog; /* RPC callback info */
u32 vers;
u32 proc;
@@ -40,7 +41,7 @@ struct nsm_res {
u32 state;
};
-int nsm_monitor(struct nlm_host *);
+int nsm_monitor(struct nlm_host *, __be32 ip);
int nsm_unmonitor(struct nlm_host *);
extern int nsm_local_state;
--- linux-nlm-2/include/linux/lockd/lockd.h 2007-03-26 18:25:38.000000000 -0400
+++ linux/include/linux/lockd/lockd.h 2007-04-04 10:45:14.000000000 -0400
@@ -39,12 +39,12 @@
struct nlm_host {
struct hlist_node h_hash; /* doubly linked list */
struct sockaddr_in h_addr; /* peer address */
+ __be32 h_server; /* server ip for NLM failover */
struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */
char * h_name; /* remote hostname */
u32 h_version; /* interface version */
unsigned short h_proto; /* transport proto */
unsigned short h_reclaiming : 1,
- h_server : 1, /* server side, not client side */
h_inuse : 1;
wait_queue_head_t h_gracewait; /* wait while reclaiming */
struct rw_semaphore h_rwsem; /* Reboot recovery lock */
@@ -62,11 +62,18 @@ struct nlm_host {
struct nsm_handle * h_nsmhandle; /* NSM status handle */
};
+struct nsm_fo_monitored {
+ struct list_head list;
+ __be32 addr;
+};
+
struct nsm_handle {
struct list_head sm_link;
atomic_t sm_count;
char * sm_name;
struct sockaddr_in sm_addr;
+ struct mutex sm_mutex;
+ struct nsm_fo_monitored sm_serverip;
unsigned int sm_monitored : 1,
sm_sticky : 1; /* don't unmonitor */
};
@@ -254,7 +261,7 @@ static inline int
nlmsvc_check_grace_period(struct nlm_args *argp)
{
/* check for system wide grace period */
- if (nlmsvc_grace_period)
+ if (nlmsvc_grace_period)
return 1;
/* check for per exported fsid grace period */
--- linux-nlm-2/net/sunrpc/svcsock.c 2007-03-26 18:26:06.000000000 -0400
+++ linux/net/sunrpc/svcsock.c 2007-04-04 17:09:15.000000000 -0400
@@ -1111,6 +1111,44 @@ failed:
return;
}
+/* Added for NLM-cluster failover implementation */
+static inline void svc_tcp_get_server_address(struct svc_rqst *rqstp)
+{
+ struct socket *sock = rqstp->rq_sock->sk_sock;
+ struct sockaddr_in6 sin6;
+ struct sockaddr_in *sin = (struct sockaddr_in *) &sin6;
+ int len, err;
+
+ /* ref: inet_getname, inet6_getname, and sys_getsockname */
+ err = sock->ops->getname(sock, (struct sockaddr *) sin, &len, 0);
+ if (err) {
+ dprintk("svc_tcp_get_server_address: getname err=%d\n", err);
+ return;
+ }
+
+ switch (rqstp->rq_sock->sk_sk->sk_family) {
+ case AF_INET:
+ /* sanity check */
+ if (sin->sin_family != AF_INET)
+ printk("sunrpc: inet address family mismatch %d\n",
+ (int) sin->sin_family);
+ rqstp->rq_daddr.addr = sin->sin_addr;
+ break;
+ case AF_INET6:
+ /* sanity check */
+ if (sin6.sin6_family != AF_INET6)
+ printk("sunrpc: inet6 address family mismatch %d\n",
+ (int) sin6.sin6_family);
+ ipv6_addr_copy(&rqstp->rq_daddr.addr6, &sin6.sin6_addr);
+ break;
+ default:
+ break;
+ }
+
+ /* no error return */
+ return;
+}
+
/*
* Receive data from a TCP socket.
*/
@@ -1260,6 +1298,8 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
if (serv->sv_stats)
serv->sv_stats->nettcpcnt++;
+ svc_tcp_get_server_address(rqstp);
+
return len;
err_delete:
--- linux-nlm-2/fs/lockd/host.c 2007-03-26 18:19:11.000000000 -0400
+++ linux/fs/lockd/host.c 2007-04-04 12:02:50.000000000 -0400
@@ -34,16 +34,16 @@ static DEFINE_MUTEX(nlm_host_mutex);
static void nlm_gc_hosts(void);
static struct nsm_handle * __nsm_find(const struct sockaddr_in *,
- const char *, int, int);
+ const char *, int, int, __be32);
static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
const char *hostname,
- int hostname_len);
+ int hostname_len, __be32 ip);
/*
* Common host lookup routine for server & client
*/
static struct nlm_host *
-nlm_lookup_host(int server, const struct sockaddr_in *sin,
+nlm_lookup_host(union svc_addr_u *server, const struct sockaddr_in *sin,
int proto, int version,
const char *hostname,
int hostname_len)
@@ -53,6 +53,7 @@ nlm_lookup_host(int server, const struct
struct nlm_host *host;
struct nsm_handle *nsm = NULL;
int hash;
+ __be32 server_ip;
dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
NIPQUAD(sin->sin_addr.s_addr), proto, version,
@@ -60,6 +61,13 @@ nlm_lookup_host(int server, const struct
hostname_len,
hostname? hostname : "<none>");
+ /* NLM failover: ipv4 for now */
+ if (server)
+ server_ip = server->addr.s_addr;
+ else
+ server_ip = 0;
+
+ dprintk("lockd: server_ip = %u.%u.%u.%u\n", NIPQUAD(server_ip));
hash = NLM_ADDRHASH(sin->sin_addr.s_addr);
@@ -89,7 +97,7 @@ nlm_lookup_host(int server, const struct
continue;
if (host->h_version != version)
continue;
- if (host->h_server != server)
+ if (host->h_server != server_ip)
continue;
/* Move to head of hash chain. */
@@ -107,7 +115,7 @@ nlm_lookup_host(int server, const struct
/* Sadly, the host isn't in our hash table yet. See if
* we have an NSM handle for it. If not, create one.
*/
- if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len)))
+ if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len, server_ip)))
goto out;
host = kzalloc(sizeof(*host), GFP_KERNEL);
@@ -130,7 +138,10 @@ nlm_lookup_host(int server, const struct
host->h_state = 0; /* pseudo NSM state */
host->h_nsmstate = 0; /* real NSM state */
host->h_nsmhandle = nsm;
- host->h_server = server;
+
+ /* NLM failover: only ipv4 for now */
+ host->h_server = server_ip;
+
hlist_add_head(&host->h_hash, chain);
INIT_LIST_HEAD(&host->h_lockowners);
spin_lock_init(&host->h_lock);
@@ -180,7 +191,7 @@ struct nlm_host *
nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
const char *hostname, int hostname_len)
{
- return nlm_lookup_host(0, sin, proto, version,
+ return nlm_lookup_host(NULL, sin, proto, version,
hostname, hostname_len);
}
@@ -191,7 +202,7 @@ struct nlm_host *
nlmsvc_lookup_host(struct svc_rqst *rqstp,
const char *hostname, int hostname_len)
{
- return nlm_lookup_host(1, svc_addr_in(rqstp),
+ return nlm_lookup_host(&rqstp->rq_daddr, svc_addr_in(rqstp),
rqstp->rq_prot, rqstp->rq_vers,
hostname, hostname_len);
}
@@ -314,7 +325,7 @@ void nlm_host_rebooted(const struct sock
hostname, NIPQUAD(sin->sin_addr));
/* Find the NSM handle for this peer */
- if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0)))
+ if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0, 0)))
return;
/* When reclaiming locks on this peer, make sure that
@@ -445,7 +456,7 @@ static DEFINE_MUTEX(nsm_mutex);
static struct nsm_handle *
__nsm_find(const struct sockaddr_in *sin,
const char *hostname, int hostname_len,
- int create)
+ int create, __be32 server_ip)
{
struct nsm_handle *nsm = NULL;
struct list_head *pos;
@@ -490,6 +501,11 @@ __nsm_find(const struct sockaddr_in *sin
atomic_set(&nsm->sm_count, 1);
list_add(&nsm->sm_link, &nsm_handles);
+
+ /* NLM failover */
+ mutex_init(&nsm->sm_mutex);
+ INIT_LIST_HEAD(&nsm->sm_serverip.list);
+ nsm->sm_serverip.addr = server_ip;
}
out:
@@ -498,9 +514,28 @@ out:
}
static struct nsm_handle *
-nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len)
+nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len,
+ __be32 server_ip)
+{
+ return __nsm_find(sin, hostname, hostname_len, 1, server_ip);
+}
+
+/*
+ * NLM failover:
+ * nsm_mutex should be obtained before entry
+ * fo_ip not NULL
+ */
+void
+nsm_release_fo_ip(struct nsm_fo_monitored *fo_ip)
{
- return __nsm_find(sin, hostname, hostname_len, 1);
+ struct list_head *pos, *n, *head=&fo_ip->list;
+ struct nsm_fo_monitored *server_ip;
+
+ list_for_each_safe(pos, n, head) {
+ server_ip = list_entry(pos, struct nsm_fo_monitored, list);
+ kfree(server_ip);
+ }
+ return;
}
/*
@@ -515,6 +550,8 @@ nsm_release(struct nsm_handle *nsm)
mutex_lock(&nsm_mutex);
if (atomic_read(&nsm->sm_count) == 0) {
list_del(&nsm->sm_link);
+ if (!list_empty(&nsm->sm_serverip.list))
+ nsm_release_fo_ip(&nsm->sm_serverip);
kfree(nsm);
}
mutex_unlock(&nsm_mutex);
--- linux-nlm-2/fs/lockd/mon.c 2007-03-26 18:19:10.000000000 -0400
+++ linux/fs/lockd/mon.c 2007-04-04 16:11:05.000000000 -0400
@@ -30,7 +30,7 @@ int nsm_local_state;
* Common procedure for SM_MON/SM_UNMON calls
*/
static int
-nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
+nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, __be32 server_ip)
{
struct rpc_clnt *clnt;
int status;
@@ -48,6 +48,12 @@ nsm_mon_unmon(struct nsm_handle *nsm, u3
memset(&args, 0, sizeof(args));
args.mon_name = nsm->sm_name;
+
+ /* NLM failover:
+ * only IPV4 is supported at this moment
+ */
+ args.serv = server_ip;
+
args.addr = nsm->sm_addr.sin_addr.s_addr;
args.prog = NLM_PROGRAM;
args.vers = 3;
@@ -65,28 +71,71 @@ nsm_mon_unmon(struct nsm_handle *nsm, u3
return status;
}
+static inline
+int nsm_is_monitored(struct nlm_host *host, __be32 server)
+{
+ struct nsm_handle *nsm = host->h_nsmhandle;
+ struct list_head *pos, *head;
+ struct nsm_fo_monitored *fo_entry;
+
+ /* client */
+ if (!server)
+ return nsm->sm_monitored;
+
+ /* server */
+ if (!nsm->sm_monitored)
+ return 0;
+
+ /* search for monitored list */
+ mutex_lock(&nsm->sm_mutex);
+ head = &nsm->sm_serverip.list;
+ list_for_each(pos, head) {
+ fo_entry = list_entry(pos, struct nsm_fo_monitored, list);
+ if (fo_entry->addr == server) {
+ mutex_unlock(&nsm->sm_mutex);
+ return 1;
+ }
+ }
+ mutex_unlock(&nsm->sm_mutex);
+
+ return 0;
+}
+
/*
* Set up monitoring of a remote host
*/
int
-nsm_monitor(struct nlm_host *host)
+nsm_monitor(struct nlm_host *host, __be32 server)
{
struct nsm_handle *nsm = host->h_nsmhandle;
struct nsm_res res;
int status;
+ struct nsm_fo_monitored *fo_entry;
dprintk("lockd: nsm_monitor(%s)\n", host->h_name);
BUG_ON(nsm == NULL);
- if (nsm->sm_monitored)
+ if (nsm_is_monitored(host, server)) {
+ dprintk("nsm_monitor: sm_monitored is true - returning 0\n");
return 0;
+ }
- status = nsm_mon_unmon(nsm, SM_MON, &res);
+ status = nsm_mon_unmon(nsm, SM_MON, &res, server);
if (status < 0 || res.status != 0)
printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name);
- else
+ else if (nsm->sm_monitored) {
+ fo_entry = kzalloc(sizeof(struct nsm_fo_monitored), GFP_KERNEL);
+ if (!fo_entry) {
+ printk("lockd: out of memory, can't add fo_entry\n");
+ return -ENOMEM;
+ }
+ fo_entry->addr = server;
+ INIT_LIST_HEAD(&fo_entry->list);
+ list_add(&fo_entry->list, &nsm->sm_serverip.list);
+ } else
nsm->sm_monitored = 1;
+
return status;
}
@@ -98,7 +147,9 @@ nsm_unmonitor(struct nlm_host *host)
{
struct nsm_handle *nsm = host->h_nsmhandle;
struct nsm_res res;
- int status = 0;
+ int status = 0, error=0;
+ struct list_head *pos, *head=&nsm->sm_serverip.list;
+ struct nsm_fo_monitored *fo_entry;
if (nsm == NULL)
return 0;
@@ -108,11 +159,21 @@ nsm_unmonitor(struct nlm_host *host)
&& nsm->sm_monitored && !nsm->sm_sticky) {
dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
- status = nsm_mon_unmon(nsm, SM_UNMON, &res);
- if (status < 0)
- printk(KERN_NOTICE "lockd: cannot unmonitor %s\n",
+ /* Unmonitor each server IP
+ * todo: need to re-think error handling
+ */
+ mutex_lock(&nsm->sm_mutex);
+ list_for_each(pos, head) {
+ fo_entry = list_entry(pos, struct nsm_fo_monitored, list);
+ status = nsm_mon_unmon(nsm, SM_UNMON, &res, fo_entry->addr);
+ if (status < 0) {
+ error++;
+ printk(KERN_NOTICE "lockd: cannot unmonitor %s\n",
host->h_name);
- else
+ }
+ }
+ mutex_unlock(&nsm->sm_mutex);
+ if (!error)
nsm->sm_monitored = 0;
}
nsm_release(nsm);
@@ -144,6 +205,13 @@ nsm_create(void)
return rpc_create(&args);
}
+/* We want "buffer" in xdr_encode_common() to hold
+ * either the system_utsname.nodename string (__NEW_UTS_LEN+1)
+ * or IPv4 dot notation (16 bytes+1) for now.
+ */
+
+#define XDR_ENCODE_BUF_LEN __NEW_UTS_LEN+1
+
/*
* XDR functions for NSM.
*/
@@ -151,7 +219,8 @@ nsm_create(void)
static __be32 *
xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
{
- char buffer[20], *name;
+ char *name;
+ char buffer[XDR_ENCODE_BUF_LEN];
/*
* Use the dotted-quad IP address of the remote host as
@@ -161,13 +230,20 @@ xdr_encode_common(struct rpc_rqst *rqstp
*/
if (nsm_use_hostnames) {
name = argp->mon_name;
- } else {
+ } else {
sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
name = buffer;
}
- if (!(p = xdr_encode_string(p, name))
- || !(p = xdr_encode_string(p, utsname()->nodename)))
+ if (!(p = xdr_encode_string(p, name)))
+ return ERR_PTR(-EIO);
+
+ if (argp->serv)
+ sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->serv));
+ else
+ sprintf(buffer, "%s", utsname()->nodename);
+ if (!(p = xdr_encode_string(p, buffer)))
return ERR_PTR(-EIO);
+
*p++ = htonl(argp->prog);
*p++ = htonl(argp->vers);
*p++ = htonl(argp->proc);
--- linux-nlm-2/fs/lockd/clntproc.c 2007-03-26 18:19:10.000000000 -0400
+++ linux/fs/lockd/clntproc.c 2007-04-03 21:49:04.000000000 -0400
@@ -500,7 +500,7 @@ nlmclnt_lock(struct nlm_rqst *req, struc
unsigned char fl_flags = fl->fl_flags;
int status = -ENOLCK;
- if (nsm_monitor(host) < 0) {
+ if (nsm_monitor(host, 0) < 0) {
printk(KERN_NOTICE "lockd: failed to monitor %s\n",
host->h_name);
goto out;
[-- Attachment #3: Type: text/plain, Size: 345 bytes --]
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
[-- Attachment #4: Type: text/plain, Size: 140 bytes --]
_______________________________________________
NFS maillist - NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs
next reply other threads:[~2007-04-05 21:52 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-04-05 21:52 Wendy Cheng [this message]
2007-04-05 21:52 ` [PATCH 3/4 Revised] NLM - kernel lockd-statd changes Wendy Cheng
2007-04-10 9:09 ` Olaf Kirch
2007-04-10 9:10 ` [Cluster-devel] Re: [NFS] " Olaf Kirch
2007-04-10 14:41 ` Lon Hohberger
2007-04-10 14:41 ` Lon Hohberger
2007-04-10 15:00 ` [Cluster-devel] Re: [NFS] " Wendy Cheng
2007-04-10 15:00 ` Wendy Cheng
2007-04-10 18:16 ` [Cluster-devel] Re: [NFS] " Wendy Cheng
2007-04-10 18:16 ` Wendy Cheng
[not found] ` <message from Olaf Kirch on Tuesday April 10>
2007-04-11 4:50 ` [Cluster-devel] Re: [NFS] " Neil Brown
2007-04-11 4:50 ` Neil Brown
2007-04-13 19:16 ` [Cluster-devel] Re: [NFS] " Lon Hohberger
2007-04-13 19:16 ` Lon Hohberger
2007-04-13 19:31 ` [Cluster-devel] Re: [NFS] " Wendy Cheng
2007-04-13 19:31 ` Wendy Cheng
2007-04-17 11:52 ` [Cluster-devel] Re: [NFS] " Olaf Kirch
2007-04-17 11:52 ` Olaf Kirch
2007-04-17 13:24 ` [Cluster-devel] Re: [NFS] " Wendy Cheng
2007-04-17 13:24 ` Wendy Cheng
2007-04-17 14:51 ` [Cluster-devel] Re: [NFS] " Olaf Kirch
2007-04-17 14:51 ` Olaf Kirch
2007-04-17 15:09 ` [Cluster-devel] Re: [NFS] " Wendy Cheng
2007-04-17 15:09 ` Wendy Cheng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=46156FA0.4030506@redhat.com \
--to=wcheng@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.