From: Stanislav Kinsbursky <skinsbursky@parallels.com>
To: Jeff Layton <jlayton@redhat.com>
Cc: "bfields@fieldses.org" <bfields@fieldses.org>,
"linux-nfs@vger.kernel.org" <linux-nfs@vger.kernel.org>
Subject: Re: [PATCH v8 5/6] nfsd: add the infrastructure to handle the cld upcall
Date: Mon, 05 Mar 2012 12:48:49 +0400 [thread overview]
Message-ID: <4F547DF1.8090003@parallels.com> (raw)
In-Reply-To: <1330720950-5872-6-git-send-email-jlayton@redhat.com>
This variant is almost what was desired.
If you would be able to prepare one more version - please look at my comments below.
03.03.2012 00:42, Jeff Layton пишет:
> ...and add a mechanism for switching between the "legacy" tracker and
> the new one. The decision is made by looking to see whether the
> v4recoverydir exists. If it does, then the legacy client tracker is
> used.
>
> If it's not, then the kernel will create a "cld" pipe in rpc_pipefs.
> That pipe is used to talk to a daemon for handling the upcall.
>
> Signed-off-by: Jeff Layton<jlayton@redhat.com>
> ---
> fs/nfsd/nfs4recover.c | 411 ++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 files changed, 410 insertions(+), 1 deletions(-)
>
> diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
> index ccd9b97..d928c58 100644
> --- a/fs/nfsd/nfs4recover.c
> +++ b/fs/nfsd/nfs4recover.c
> @@ -1,5 +1,6 @@
> /*
> * Copyright (c) 2004 The Regents of the University of Michigan.
> +* Copyright (c) 2012 Jeff Layton<jlayton@redhat.com>
> * All rights reserved.
> *
> * Andy Adamson<andros@citi.umich.edu>
> @@ -36,6 +37,11 @@
> #include<linux/namei.h>
> #include<linux/crypto.h>
> #include<linux/sched.h>
> +#include<linux/fs.h>
> +#include<net/net_namespace.h>
> +#include<linux/sunrpc/rpc_pipe_fs.h>
> +#include<linux/sunrpc/clnt.h>
> +#include<linux/nfsd/cld.h>
>
> #include "nfsd.h"
> #include "state.h"
> @@ -478,12 +484,415 @@ static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
> .grace_done = nfsd4_recdir_purge_old,
> };
>
> +/* Globals */
> +#define NFSD_PIPE_DIR "nfsd"
> +#define NFSD_CLD_PIPE "cld"
> +
> +static struct rpc_pipe *cld_pipe;
> +
> +/* list of cld_msg's that are currently in use */
> +static DEFINE_SPINLOCK(cld_lock);
> +static LIST_HEAD(cld_list);
> +static unsigned int cld_xid;
> +
> +struct cld_upcall {
> + struct list_head cu_list;
> + struct task_struct *cu_task;
> + struct cld_msg cu_msg;
> +};
> +
> +static int
> +__cld_pipe_upcall(struct cld_msg *cmsg)
> +{
> + int ret;
> + struct rpc_pipe_msg msg;
> +
> + memset(&msg, 0, sizeof(msg));
> + msg.data = cmsg;
> + msg.len = sizeof(*cmsg);
> +
> + /*
> + * Set task state before we queue the upcall. That prevents
> + * wake_up_process in the downcall from racing with schedule.
> + */
> + set_current_state(TASK_UNINTERRUPTIBLE);
> + ret = rpc_queue_upcall(cld_pipe,&msg);
Would be great to replace hard-coded cld_pipe here with a passed one.
For example, put this cld_pipe on cld_msg structure.
And initialize this pointer (hard-coded value for now) below in
nfsd4_cld_create() and all other places where alloc_cld_upcall() is called.
BTW, maybe allocate this struct cld_upcall per NFS client somehow? And thus get
rid of alloc-free calls?
> + if (ret< 0) {
> + set_current_state(TASK_RUNNING);
> + goto out;
> + }
> +
> + schedule();
> + set_current_state(TASK_RUNNING);
> +
> + if (msg.errno< 0)
> + ret = msg.errno;
> +out:
> + return ret;
> +}
> +
> +static int
> +cld_pipe_upcall(struct cld_msg *cmsg)
> +{
> + int ret;
> +
> + /*
> + * -EAGAIN occurs when pipe is closed and reopened while there are
> + * upcalls queued.
> + */
> + do {
> + ret = __cld_pipe_upcall(cmsg);
> + } while (ret == -EAGAIN);
> +
> + return ret;
> +}
> +
> +static ssize_t
> +cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
> +{
> + struct cld_upcall *tmp, *cup;
> + struct cld_msg *cmsg = (struct cld_msg *)src;
> + uint32_t xid;
> +
> + if (mlen != sizeof(*cmsg)) {
> + dprintk("%s: got %lu bytes, expected %lu\n", __func__, mlen,
> + sizeof(*cmsg));
> + return -EINVAL;
> + }
> +
> + /* copy just the xid so we can try to find that */
> + if (copy_from_user(&xid,&cmsg->cm_xid, sizeof(xid)) != 0) {
> + dprintk("%s: error when copying xid from userspace", __func__);
> + return -EFAULT;
> + }
> +
> + /* walk the list and find corresponding xid */
> + cup = NULL;
> + spin_lock(&cld_lock);
> + list_for_each_entry(tmp,&cld_list, cu_list) {
> + if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
> + cup = tmp;
> + list_del_init(&cup->cu_list);
> + break;
> + }
> + }
> + spin_unlock(&cld_lock);
> +
> + /* couldn't find upcall? */
> + if (!cup) {
> + dprintk("%s: couldn't find upcall -- xid=%u\n", __func__,
> + cup->cu_msg.cm_xid);
> + return -EINVAL;
> + }
> +
> + if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
> + return -EFAULT;
> +
> + wake_up_process(cup->cu_task);
> + return mlen;
> +}
> +
> +static void
> +cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
> +{
> + struct cld_msg *cmsg = msg->data;
> + struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
> + cu_msg);
> +
> + /* errno>= 0 means we got a downcall */
> + if (msg->errno>= 0)
> + return;
> +
> + wake_up_process(cup->cu_task);
> +}
> +
> +static const struct rpc_pipe_ops cld_upcall_ops = {
> + .upcall = rpc_pipe_generic_upcall,
> + .downcall = cld_pipe_downcall,
> + .destroy_msg = cld_pipe_destroy_msg,
> +};
> +
> +static struct dentry *
> +nfsd4_cld_register_sb(struct super_block *sb, struct rpc_pipe *pipe)
> +{
> + struct dentry *dir, *dentry;
> +
> + dir = rpc_d_lookup_sb(sb, NFSD_PIPE_DIR);
> + if (dir == NULL)
> + return ERR_PTR(-ENOENT);
> + dentry = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe);
> + dput(dir);
> + return dentry;
> +}
> +
> +static void
> +nfsd4_cld_unregister_sb(struct rpc_pipe *pipe)
> +{
> + if (pipe->dentry)
> + rpc_unlink(pipe->dentry);
> +}
> +
> +static struct dentry *
> +nfsd4_cld_register_net(struct net *net, struct rpc_pipe *pipe)
> +{
> + struct super_block *sb;
> + struct dentry *dentry;
> +
> + sb = rpc_get_sb_net(net);
> + if (!sb)
> + return NULL;
> + dentry = nfsd4_cld_register_sb(sb, pipe);
> + rpc_put_sb_net(net);
> + return dentry;
> +}
> +
> +static void
> +nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe)
> +{
> + struct super_block *sb;
> +
> + sb = rpc_get_sb_net(net);
> + if (sb) {
> + nfsd4_cld_unregister_sb(pipe);
> + rpc_put_sb_net(net);
> + }
> +}
> +
> +/* Initialize rpc_pipefs pipe for communication with client tracking daemon */
> +static int
> +nfsd4_init_cld_pipe(void)
This function will be called per network namespace context.
Could you, please, parametrize it by net instead of using hard-coded init_net
few lines below?
> +{
> + int ret;
> + struct dentry *dentry;
> +
> + /* FIXME: containerize this code */
> + if (cld_pipe)
> + return 0;
> +
> + cld_pipe = rpc_mkpipe_data(&cld_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
> + if (IS_ERR(cld_pipe)) {
> + ret = PTR_ERR(cld_pipe);
> + goto err;
> + }
> +
> + dentry = nfsd4_cld_register_net(&init_net, cld_pipe);
> + if (IS_ERR(dentry)) {
> + ret = PTR_ERR(dentry);
> + goto err_destroy_data;
> + }
> +
> + cld_pipe->dentry = dentry;
> + return 0;
> +
> +err_destroy_data:
> + rpc_destroy_pipe_data(cld_pipe);
> +err:
> + cld_pipe = NULL;
> + printk(KERN_ERR "NFSD: unable to create nfsdcld upcall pipe (%d)\n",
> + ret);
> + return ret;
> +}
> +
> +static void
> +nfsd4_remove_cld_pipe(void)
> +{
Ditto.
> + /* FIXME: containerize... */
> + nfsd4_cld_unregister_net(&init_net, cld_pipe);
> + rpc_destroy_pipe_data(cld_pipe);
> + cld_pipe = NULL;
> +}
> +
> +static struct cld_upcall *
> +alloc_cld_upcall(void)
> +{
> + struct cld_upcall *new, *tmp;
> +
> + new = kzalloc(sizeof(*new), GFP_KERNEL);
> + if (!new)
> + return new;
> +
> + /* FIXME: hard cap on number in flight? */
> +restart_search:
> + spin_lock(&cld_lock);
> + list_for_each_entry(tmp,&cld_list, cu_list) {
> + if (tmp->cu_msg.cm_xid == cld_xid) {
> + cld_xid++;
> + spin_unlock(&cld_lock);
> + goto restart_search;
> + }
> + }
> + new->cu_task = current;
> + new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
> + put_unaligned(cld_xid++,&new->cu_msg.cm_xid);
> + list_add(&new->cu_list,&cld_list);
> + spin_unlock(&cld_lock);
> +
> + dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
> +
> + return new;
> +}
> +
> +static void
> +free_cld_upcall(struct cld_upcall *victim)
> +{
> + spin_lock(&cld_lock);
> + list_del(&victim->cu_list);
> + spin_unlock(&cld_lock);
> + kfree(victim);
> +}
> +
> +/* Ask daemon to create a new record */
> +static void
> +nfsd4_cld_create(struct nfs4_client *clp)
> +{
> + int ret;
> + struct cld_upcall *cup;
> +
> + /* Don't upcall if it's already stored */
> + if (test_bit(NFSD4_CLIENT_STABLE,&clp->cl_flags))
> + return;
> +
> + cup = alloc_cld_upcall();
> + if (!cup) {
> + ret = -ENOMEM;
> + goto out_err;
> + }
> +
> + cup->cu_msg.cm_cmd = Cld_Create;
> + cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
> + memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
> + clp->cl_name.len);
> +
> + ret = cld_pipe_upcall(&cup->cu_msg);
> + if (!ret) {
> + ret = cup->cu_msg.cm_status;
> + set_bit(NFSD4_CLIENT_STABLE,&clp->cl_flags);
> + }
> +
> + free_cld_upcall(cup);
> +out_err:
> + if (ret)
> + printk(KERN_ERR "NFSD: Unable to create client "
> + "record on stable storage: %d\n", ret);
> +}
> +
> +/* Ask daemon to create a new record */
> +static void
> +nfsd4_cld_remove(struct nfs4_client *clp)
> +{
> + int ret;
> + struct cld_upcall *cup;
> +
> + /* Don't upcall if it's already removed */
> + if (!test_bit(NFSD4_CLIENT_STABLE,&clp->cl_flags))
> + return;
> +
> + cup = alloc_cld_upcall();
> + if (!cup) {
> + ret = -ENOMEM;
> + goto out_err;
> + }
> +
> + cup->cu_msg.cm_cmd = Cld_Remove;
> + cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
> + memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
> + clp->cl_name.len);
> +
> + ret = cld_pipe_upcall(&cup->cu_msg);
> + if (!ret) {
> + ret = cup->cu_msg.cm_status;
> + clear_bit(NFSD4_CLIENT_STABLE,&clp->cl_flags);
> + }
> +
> + free_cld_upcall(cup);
> +out_err:
> + if (ret)
> + printk(KERN_ERR "NFSD: Unable to remove client "
> + "record from stable storage: %d\n", ret);
> +}
> +
> +/* Check for presence of a record, and update its timestamp */
> +static int
> +nfsd4_cld_check(struct nfs4_client *clp)
> +{
> + int ret;
> + struct cld_upcall *cup;
> +
> + /* Don't upcall if one was already stored during this grace pd */
> + if (test_bit(NFSD4_CLIENT_STABLE,&clp->cl_flags))
> + return 0;
> +
> + cup = alloc_cld_upcall();
> + if (!cup) {
> + printk(KERN_ERR "NFSD: Unable to check client record on "
> + "stable storage: %d\n", -ENOMEM);
> + return -ENOMEM;
> + }
> +
> + cup->cu_msg.cm_cmd = Cld_Check;
> + cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
> + memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
> + clp->cl_name.len);
> +
> + ret = cld_pipe_upcall(&cup->cu_msg);
> + if (!ret) {
> + ret = cup->cu_msg.cm_status;
> + set_bit(NFSD4_CLIENT_STABLE,&clp->cl_flags);
> + }
> +
> + free_cld_upcall(cup);
> + return ret;
> +}
> +
> +static void
> +nfsd4_cld_grace_done(time_t boot_time)
> +{
> + int ret;
> + struct cld_upcall *cup;
> +
> + cup = alloc_cld_upcall();
> + if (!cup) {
> + ret = -ENOMEM;
> + goto out_err;
> + }
> +
> + cup->cu_msg.cm_cmd = Cld_GraceDone;
> + cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time;
> + ret = cld_pipe_upcall(&cup->cu_msg);
> + if (!ret)
> + ret = cup->cu_msg.cm_status;
> +
> + free_cld_upcall(cup);
> +out_err:
> + if (ret)
> + printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
> +}
> +
> +static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
> + .init = nfsd4_init_cld_pipe,
> + .exit = nfsd4_remove_cld_pipe,
> + .create = nfsd4_cld_create,
> + .remove = nfsd4_cld_remove,
> + .check = nfsd4_cld_check,
> + .grace_done = nfsd4_cld_grace_done,
> +};
> +
> int
> nfsd4_client_tracking_init(void)
> {
> int status;
> + struct path path;
>
> - client_tracking_ops =&nfsd4_legacy_tracking_ops;
> + if (!client_tracking_ops) {
> + client_tracking_ops =&nfsd4_cld_tracking_ops;
> + status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW,&path);
> + if (!status) {
> + if (S_ISDIR(path.dentry->d_inode->i_mode))
> + client_tracking_ops =
> + &nfsd4_legacy_tracking_ops;
> + path_put(&path);
> + }
> + }
>
> status = client_tracking_ops->init();
> if (status) {
--
Best regards,
Stanislav Kinsbursky
next prev parent reply other threads:[~2012-03-05 8:49 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-03-02 20:42 [PATCH v8 0/6] nfsd: overhaul the client name tracking code Jeff Layton
2012-03-02 20:42 ` [PATCH v8 1/6] nfsd: add nfsd4_client_tracking_ops struct and a way to set it Jeff Layton
2012-03-05 8:34 ` Stanislav Kinsbursky
2012-03-02 20:42 ` [PATCH v8 2/6] sunrpc: create nfsd dir in rpc_pipefs Jeff Layton
2012-03-02 20:42 ` [PATCH v8 3/6] nfsd: convert nfs4_client->cl_cb_flags to a generic flags field Jeff Layton
2012-03-02 20:42 ` [PATCH v8 4/6] nfsd: add a header describing upcall to nfsdcld Jeff Layton
2012-03-02 20:42 ` [PATCH v8 5/6] nfsd: add the infrastructure to handle the cld upcall Jeff Layton
2012-03-05 8:48 ` Stanislav Kinsbursky [this message]
2012-03-05 12:42 ` Jeff Layton
2012-03-05 12:51 ` Stanislav Kinsbursky
2012-03-05 14:39 ` Jeff Layton
2012-03-05 14:53 ` Stanislav Kinsbursky
2012-03-05 15:16 ` Jeff Layton
2012-03-05 15:48 ` Stanislav Kinsbursky
2012-03-02 20:42 ` [PATCH v8 6/6] nfsd: add notifier to handle mount/unmount of rpc_pipefs sb Jeff Layton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4F547DF1.8090003@parallels.com \
--to=skinsbursky@parallels.com \
--cc=bfields@fieldses.org \
--cc=jlayton@redhat.com \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).