[PATCH 0/2] pnfs-block: move device mapping from kernel to user daemon

linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/2] pnfs-block: move device mapping from kernel to user daemon
@ 2010-07-15 19:40 Jim Rees
  2010-07-15 19:41 ` [PATCH 1/2] pnfs-block: Add support for simple rpc pipefs Jim Rees
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Jim Rees @ 2010-07-15 19:40 UTC (permalink / raw)
  To: bhalevy; +Cc: linux-nfs

These two patches move the complex block layout device mapping from the
kernel to a user space daemon.  The first patch adds a simple upcall
mechanism via pipefs for the kernel piece to communicate with the daemon.
The second patch removes the kernel device mapping and replaces it with
calls to the daemon.

Passes Connectathon tests to both EMC and spnfs servers.

The user daemon will be sent separately as a patch to nfs-utils.

Haiying Tang (2):
  pnfs-block: Add support for simple rpc pipefs
  pnfs-block: Remove device creation from kernel

 fs/nfs/blocklayout/Makefile                      |    2 +-
 fs/nfs/blocklayout/block-device-discovery-pipe.c |   66 +++
 fs/nfs/blocklayout/blocklayout.c                 |   15 +-
 fs/nfs/blocklayout/blocklayout.h                 |   18 +-
 fs/nfs/blocklayout/blocklayoutdev.c              |  494 +++-------------------
 fs/nfs/blocklayout/blocklayoutdm.c               |  297 ++-----------
 include/linux/sunrpc/rpc_pipe_fs.h               |    4 +
 include/linux/sunrpc/simple_rpc_pipefs.h         |  112 +++++
 net/sunrpc/Makefile                              |    2 +-
 net/sunrpc/simple_rpc_pipefs.c                   |  422 ++++++++++++++++++
 10 files changed, 720 insertions(+), 712 deletions(-)
 create mode 100644 fs/nfs/blocklayout/block-device-discovery-pipe.c
 create mode 100644 include/linux/sunrpc/simple_rpc_pipefs.h
 create mode 100644 net/sunrpc/simple_rpc_pipefs.c


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] pnfs-block: Add support for simple rpc pipefs
  2010-07-15 19:40 [PATCH 0/2] pnfs-block: move device mapping from kernel to user daemon Jim Rees
@ 2010-07-15 19:41 ` Jim Rees
  2010-07-18  9:09   ` Christoph Hellwig
  2010-07-15 19:41 ` [PATCH 2/2] pnfs-block: Remove device creation from kernel Jim Rees
  2010-07-18  9:06 ` [PATCH 0/2] pnfs-block: move device mapping from kernel to user daemon Christoph Hellwig
  2 siblings, 1 reply; 6+ messages in thread
From: Jim Rees @ 2010-07-15 19:41 UTC (permalink / raw)
  To: bhalevy; +Cc: linux-nfs

From: Haiying Tang <Tang_Haiying@emc.com>

pnfs-block: Add support for simple rpc pipefs

Signed-off-by: Eric Anderle <eanderle@umich.edu>
Signed-off-by: Jim Rees <rees@umich.edu>
---
 include/linux/sunrpc/rpc_pipe_fs.h       |    4 +
 include/linux/sunrpc/simple_rpc_pipefs.h |  112 ++++++++
 net/sunrpc/Makefile                      |    2 +-
 net/sunrpc/simple_rpc_pipefs.c           |  422 ++++++++++++++++++++++++++++++
 4 files changed, 539 insertions(+), 1 deletions(-)
 create mode 100644 include/linux/sunrpc/simple_rpc_pipefs.h
 create mode 100644 net/sunrpc/simple_rpc_pipefs.c

diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 6f942c9..2177d50 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -12,6 +12,10 @@ struct rpc_pipe_msg {
 	size_t len;
 	size_t copied;
 	int errno;
+#define PIPEFS_AUTOFREE_RPCMSG       0x01 /* frees rpc_pipe_msg */
+#define PIPEFS_AUTOFREE_RPCMSG_DATA  0x02 /* frees rpc_pipe_msg->data */
+#define PIPEFS_AUTOFREE_UPCALL_MSG   PIPEFS_AUTOFREE_RPCMSG_DATA
+	u8 flags;
 };
 
 struct rpc_pipe_ops {
diff --git a/include/linux/sunrpc/simple_rpc_pipefs.h b/include/linux/sunrpc/simple_rpc_pipefs.h
new file mode 100644
index 0000000..dd02206
--- /dev/null
+++ b/include/linux/sunrpc/simple_rpc_pipefs.h
@@ -0,0 +1,112 @@
+/*
+ *  linux/fs/gfs2/simple_rpc_pipefs.h
+ *
+ *  Copyright (c) 2008 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  David M. Richter <richterd@citi.umich.edu>
+ *
+ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
+ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
+ *  years, guys.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  With thanks to CITI's project sponsor and partner, IBM.
+ */
+
+#ifndef _SIMPLE_RPC_PIPEFS_H_
+#define _SIMPLE_RPC_PIPEFS_H_
+
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/mount.h>
+#include <linux/sched.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+
+
+#define payload_of(headerp)  ((void *)(headerp + 1))
+
+/*
+ * pipefs_hdr_t -- the generic message format for simple_rpc_pipefs.  Messages
+ * may simply be the header itself, although having an optional data payload
+ * follow the header allows much more flexibility.
+ *
+ * Messages are created using pipefs_alloc_init_msg() and
+ * pipefs_alloc_init_msg_padded(), both of which accept a pointer to an
+ * (optional) data payload.
+ *
+ * Given a pipefs_hdr_t *msg that has a struct foo payload, the data can be
+ * accessed using: struct foo *foop = payload_of(msg)
+ */
+typedef struct pipefs_hdr {
+	u32 msgid;
+	u8  type;
+	u8  flags;
+	u16 totallen; /* length of entire message, including hdr itself */
+	u32 status;
+} pipefs_hdr_t;
+
+/*
+ * pipefs_list_t -- a type of list used for tracking callers who've made an
+ * upcall and are blocked waiting for a reply.
+ *
+ * See pipefs_queue_upcall_waitreply() and pipefs_assign_upcall_reply().
+ */
+typedef struct pipefs_list {
+	struct list_head list;
+	spinlock_t list_lock;
+} pipefs_list_t;
+
+
+/* See net/sunrpc/simple_rpc_pipefs.c for more info on using these functions. */
+extern struct dentry *pipefs_mkpipe(const char *name,
+				    struct rpc_pipe_ops *ops,
+				    int wait_for_open);
+extern void pipefs_closepipe(struct dentry *pipe);
+extern void pipefs_init_list(pipefs_list_t *list);
+extern pipefs_hdr_t *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
+					   void *data, u16 datalen);
+extern pipefs_hdr_t *pipefs_alloc_init_msg_padded(u32 msgid, u8 type,
+						  u8 flags, void *data,
+						  u16 datalen, u16 padlen);
+extern pipefs_hdr_t *pipefs_queue_upcall_waitreply(struct dentry *pipe,
+						   pipefs_hdr_t *msg,
+						   pipefs_list_t *uplist,
+						   u8 upflags, u32 timeout);
+extern int pipefs_queue_upcall_noreply(struct dentry *pipe, pipefs_hdr_t *msg,
+				       u8 upflags);
+extern int pipefs_assign_upcall_reply(pipefs_hdr_t *reply,
+				      pipefs_list_t *uplist);
+extern pipefs_hdr_t *pipefs_readmsg(struct file *filp, const char __user *src,
+				    size_t len);
+extern ssize_t pipefs_generic_upcall(struct file *filp,
+				     struct rpc_pipe_msg *rpcmsg,
+				     char __user *dst, size_t buflen);
+extern void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg);
+
+#endif /* _SIMPLE_RPC_PIPEFS_H_ */
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 9d2fca5..e102040 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
 	    addr.o rpcb_clnt.o timer.o xdr.o \
 	    sunrpc_syms.o cache.o rpc_pipe.o \
-	    svc_xprt.o
+	    svc_xprt.o simple_rpc_pipefs.o
 sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/simple_rpc_pipefs.c b/net/sunrpc/simple_rpc_pipefs.c
new file mode 100644
index 0000000..e63f1b2
--- /dev/null
+++ b/net/sunrpc/simple_rpc_pipefs.c
@@ -0,0 +1,422 @@
+/*
+ *  net/sunrpc/simple_rpc_pipefs.c
+ *
+ *  Copyright (c) 2008 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  David M. Richter <richterd@citi.umich.edu>
+ *
+ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
+ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
+ *  years, guys.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  With thanks to CITI's project sponsor and partner, IBM.
+ */
+
+#include <linux/completion.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/sunrpc/simple_rpc_pipefs.h>
+
+
+/*
+ * Make an rpc_pipefs pipe named @name at the root of the mounted rpc_pipefs
+ * filesystem.
+ *
+ * If @wait_for_open is non-zero and an upcall is later queued but the userland
+ * end of the pipe has not yet been opened, the upcall will remain queued until
+ * the pipe is opened; otherwise, the upcall queueing will return with -EPIPE.
+ */
+struct dentry *pipefs_mkpipe(const char *name, struct rpc_pipe_ops *ops,
+			     int wait_for_open)
+{
+	struct dentry *dir, *pipe;
+	struct vfsmount *mnt;
+
+	mnt = rpc_get_mount();
+	if (IS_ERR(mnt)) {
+		pipe = ERR_CAST(mnt);
+		goto out;
+	}
+	dir = mnt->mnt_root;
+	if (!dir) {
+		pipe = ERR_PTR(-ENOENT);
+		goto out;
+	}
+	pipe = rpc_mkpipe(dir, name, NULL, ops,
+			  wait_for_open ? RPC_PIPE_WAIT_FOR_OPEN : 0);
+out:
+	return pipe;
+}
+EXPORT_SYMBOL(pipefs_mkpipe);
+
+/*
+ * Shutdown a pipe made by pipefs_mkpipe().
+ * XXX: do we need to retain an extra reference on the mount?
+ */
+void pipefs_closepipe(struct dentry *pipe)
+{
+	rpc_unlink(pipe);
+	rpc_put_mount();
+}
+EXPORT_SYMBOL(pipefs_closepipe);
+
+/*
+ * Initialize a pipefs_list_t -- which are a way to keep track of callers
+ * who're blocked having made an upcall and are awaiting a reply.
+ *
+ * See pipefs_queue_upcall_waitreply() and pipefs_find_upcall_msgid() for how
+ * to use them.
+ */
+inline void pipefs_init_list(pipefs_list_t *list)
+{
+	INIT_LIST_HEAD(&list->list);
+	spin_lock_init(&list->list_lock);
+}
+EXPORT_SYMBOL(pipefs_init_list);
+
+/*
+ * Alloc/init a generic pipefs message header and copy into its message body
+ * an arbitrary data payload.
+ *
+ * pipefs_hdr_t's are meant to serve as generic, general-purpose message
+ * headers for easy rpc_pipefs I/O.  When an upcall is made, the
+ * pipefs_hdr_t is assigned to a struct rpc_pipe_msg and delivered
+ * therein.  --And yes, the naming can seem a little confusing at first:
+ *
+ * When one thinks of an upcall "message", in simple_rpc_pipefs that's a
+ * pipefs_hdr_t (possibly with an attached message body).  A
+ * struct rpc_pipe_msg is actually only the -vehicle- by which the "real"
+ * message is delivered and processed.
+ */
+pipefs_hdr_t *pipefs_alloc_init_msg_padded(u32 msgid, u8 type, u8 flags,
+					   void *data, u16 datalen, u16 padlen)
+{
+	u16 totallen;
+	pipefs_hdr_t *msg = NULL;
+
+	totallen = sizeof(*msg) + datalen + padlen;
+	if (totallen > PAGE_SIZE) {
+		msg = ERR_PTR(-E2BIG);
+		goto out;
+	}
+
+	msg = kzalloc(totallen, GFP_KERNEL);
+	if (!msg) {
+		msg = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	msg->msgid = msgid;
+	msg->type = type;
+	msg->flags = flags;
+	msg->totallen = totallen;
+	memcpy(payload_of(msg), data, datalen);
+out:
+	return msg;
+}
+EXPORT_SYMBOL(pipefs_alloc_init_msg_padded);
+
+/*
+ * See the description of pipefs_alloc_init_msg_padded().
+ */
+pipefs_hdr_t *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
+				    void *data, u16 datalen)
+{
+	return pipefs_alloc_init_msg_padded(msgid, type, flags, data,
+					    datalen, 0);
+}
+EXPORT_SYMBOL(pipefs_alloc_init_msg);
+
+
+static void pipefs_init_rpcmsg(struct rpc_pipe_msg *rpcmsg, pipefs_hdr_t *msg,
+			       u8 upflags)
+{
+	memset(rpcmsg, 0, sizeof(*rpcmsg));
+	rpcmsg->data = msg;
+	rpcmsg->len = msg->totallen;
+	rpcmsg->flags = upflags;
+}
+
+static struct rpc_pipe_msg *pipefs_alloc_init_rpcmsg(pipefs_hdr_t *msg,
+						     u8 upflags)
+{
+	struct rpc_pipe_msg *rpcmsg;
+
+	rpcmsg = kmalloc(sizeof(*rpcmsg), GFP_KERNEL);
+	if (!rpcmsg)
+		return ERR_PTR(-ENOMEM);
+
+	pipefs_init_rpcmsg(rpcmsg, msg, upflags);
+	return rpcmsg;
+}
+
+
+/* represents an upcall that'll block and wait for a reply */
+typedef struct pipefs_upcall {
+	u32 msgid;
+	struct rpc_pipe_msg rpcmsg;
+	struct list_head list;
+	wait_queue_head_t waitq;
+	struct pipefs_hdr *reply;
+} pipefs_upcall_t;
+
+
+static void pipefs_init_upcall_waitreply(pipefs_upcall_t *upcall,
+					 pipefs_hdr_t *msg, u8 upflags)
+{
+	upcall->reply = NULL;
+	upcall->msgid = msg->msgid;
+	INIT_LIST_HEAD(&upcall->list);
+	init_waitqueue_head(&upcall->waitq);
+	pipefs_init_rpcmsg(&upcall->rpcmsg, msg, upflags);
+}
+
+static int __pipefs_queue_upcall_waitreply(struct dentry *pipe,
+					   pipefs_upcall_t *upcall,
+					   pipefs_list_t *uplist, u32 timeout)
+{
+	int err = 0;
+	DECLARE_WAITQUEUE(wq, current);
+
+	add_wait_queue(&upcall->waitq, &wq);
+	spin_lock(&uplist->list_lock);
+	list_add(&upcall->list, &uplist->list);
+	spin_unlock(&uplist->list_lock);
+
+	err = rpc_queue_upcall(pipe->d_inode, &upcall->rpcmsg);
+	if (err < 0)
+		goto out;
+
+	if (timeout) {
+		/* retval of 0 means timer expired */
+		err = schedule_timeout_uninterruptible(timeout);
+		if (err == 0 && upcall->reply == NULL)
+			err = -ETIMEDOUT;
+	} else {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule();
+		__set_current_state(TASK_RUNNING);
+	}
+
+out:
+	spin_lock(&uplist->list_lock);
+	list_del_init(&upcall->list);
+	spin_unlock(&uplist->list_lock);
+	remove_wait_queue(&upcall->waitq, &wq);
+	return err;
+}
+
+/*
+ * Queue a pipefs msg for an upcall to userspace, place the calling thread
+ * on @uplist, and block the thread to wait for a reply.  If @timeout is
+ * nonzero, the thread will be blocked for at most @timeout jiffies.
+ *
+ * (To convert time units into jiffies, consider the functions
+ *  msecs_to_jiffies(), usecs_to_jiffies(), timeval_to_jiffies(), and
+ *  timespec_to_jiffies().)
+ *
+ * Once a reply is received by your downcall handler, call
+ * pipefs_assign_upcall_reply() with @uplist to find the corresponding upcall,
+ * assign the reply, and wake the waiting thread.
+ *
+ * This function's return value pointer may be an error and should be checked
+ * with IS_ERR() before attempting to access the reply message.
+ *
+ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
+ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
+ * flag is set in @upflags.  See also rpc_pipe_fs.h.
+ */
+pipefs_hdr_t *pipefs_queue_upcall_waitreply(struct dentry *pipe,
+					    pipefs_hdr_t *msg,
+					    pipefs_list_t *uplist,
+					    u8 upflags, u32 timeout)
+{
+	int err = 0;
+	pipefs_upcall_t upcall;
+
+	pipefs_init_upcall_waitreply(&upcall, msg, upflags);
+	err = __pipefs_queue_upcall_waitreply(pipe, &upcall, uplist, timeout);
+	if (err < 0) {
+		kfree(upcall.reply);
+		upcall.reply = ERR_PTR(err);
+	}
+
+	return upcall.reply;
+}
+EXPORT_SYMBOL(pipefs_queue_upcall_waitreply);
+
+/*
+ * Queue a pipefs msg for an upcall to userspace and immediately return (i.e.,
+ * no reply is expected).
+ *
+ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
+ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
+ * flag is set in @upflags.  See also rpc_pipe_fs.h.
+ */
+int pipefs_queue_upcall_noreply(struct dentry *pipe, pipefs_hdr_t *msg,
+				u8 upflags)
+{
+	int err = 0;
+	struct rpc_pipe_msg *rpcmsg;
+
+	upflags |= PIPEFS_AUTOFREE_RPCMSG;
+	rpcmsg = pipefs_alloc_init_rpcmsg(msg, upflags);
+	if (IS_ERR(rpcmsg)) {
+		err = PTR_ERR(rpcmsg);
+		goto out;
+	}
+	err = rpc_queue_upcall(pipe->d_inode, rpcmsg);
+out:
+	return err;
+}
+EXPORT_SYMBOL(pipefs_queue_upcall_noreply);
+
+
+static pipefs_upcall_t *pipefs_find_upcall_msgid(u32 msgid,
+						 pipefs_list_t *uplist)
+{
+	pipefs_upcall_t *upcall;
+
+	spin_lock(&uplist->list_lock);
+	list_for_each_entry(upcall, &uplist->list, list)
+		if (upcall->msgid == msgid)
+			goto out;
+	upcall = NULL;
+out:
+	spin_unlock(&uplist->list_lock);
+	return upcall;
+}
+
+/*
+ * In your rpc_pipe_ops->downcall() handler, once you've read in a downcall
+ * message and have determined that it is a reply to a waiting upcall,
+ * you can use this function to find the appropriate upcall, assign the result,
+ * and wake the upcall thread.
+ *
+ * The reply message must have the same msgid as the original upcall message's.
+ *
+ * See also pipefs_queue_upcall_waitreply() and pipefs_readmsg().
+ */
+int pipefs_assign_upcall_reply(pipefs_hdr_t *reply, pipefs_list_t *uplist)
+{
+	int err = 0;
+	pipefs_upcall_t *upcall;
+
+	upcall = pipefs_find_upcall_msgid(reply->msgid, uplist);
+	if (!upcall) {
+		printk(KERN_ERR "%s: ERROR: have reply but no matching upcall "
+			"for msgid %d\n", __func__, reply->msgid);
+		err = -ENOENT;
+		goto out;
+	}
+	upcall->reply = reply;
+	wake_up(&upcall->waitq);
+out:
+	return err;
+}
+EXPORT_SYMBOL(pipefs_assign_upcall_reply);
+
+/*
+ * Generic method to read-in and return a newly-allocated message which begins
+ * with a pipefs_hdr_t.
+ */
+pipefs_hdr_t *pipefs_readmsg(struct file *filp, const char __user *src,
+			     size_t len)
+{
+	int err = 0, hdrsize;
+	pipefs_hdr_t *msg = NULL;
+
+	hdrsize = sizeof(*msg);
+	if (len < hdrsize) {
+		printk(KERN_ERR "%s: ERROR: header is too short (%d vs %d)\n",
+		       __func__, len, hdrsize);
+		err = -EINVAL;
+		goto out;
+	}
+
+	msg = kzalloc(len, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+	if (copy_from_user(msg, src, len))
+		err = -EFAULT;
+out:
+	if (err) {
+		kfree(msg);
+		msg = ERR_PTR(err);
+	}
+	return msg;
+}
+EXPORT_SYMBOL(pipefs_readmsg);
+
+/*
+ * Generic rpc_pipe_ops->upcall() handler implementation.
+ *
+ * Don't call this directly: to make an upcall, use
+ * pipefs_queue_upcall_waitreply() or pipefs_queue_upcall_noreply().
+ */
+ssize_t pipefs_generic_upcall(struct file *filp, struct rpc_pipe_msg *rpcmsg,
+			      char __user *dst, size_t buflen)
+{
+	char *data;
+	ssize_t len, left;
+
+	data = (char *)rpcmsg->data + rpcmsg->copied;
+	len = rpcmsg->len - rpcmsg->copied;
+	if (len > buflen)
+		len = buflen;
+
+	left = copy_to_user(dst, data, len);
+	if (left < 0) {
+		rpcmsg->errno = left;
+		return left;
+	}
+
+	len -= left;
+	rpcmsg->copied += len;
+	rpcmsg->errno = 0;
+	return len;
+}
+EXPORT_SYMBOL(pipefs_generic_upcall);
+
+/*
+ * Generic rpc_pipe_ops->destroy_msg() handler implementation.
+ *
+ * Items are only freed if @rpcmsg->flags has been set appropriately.
+ * See pipefs_queue_upcall_noreply() and rpc_pipe_fs.h.
+ */
+void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg)
+{
+	if (rpcmsg->flags & PIPEFS_AUTOFREE_UPCALL_MSG)
+		kfree(rpcmsg->data);
+	if (rpcmsg->flags & PIPEFS_AUTOFREE_RPCMSG)
+		kfree(rpcmsg);
+}
+EXPORT_SYMBOL(pipefs_generic_destroy_msg);
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] pnfs-block: Add support for simple rpc pipefs
  2010-07-15 19:41 ` [PATCH 1/2] pnfs-block: Add support for simple rpc pipefs Jim Rees
@ 2010-07-18  9:09   ` Christoph Hellwig
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2010-07-18  9:09 UTC (permalink / raw)
  To: Jim Rees; +Cc: bhalevy, linux-nfs

> --- /dev/null
> +++ b/include/linux/sunrpc/simple_rpc_pipefs.h
> @@ -0,0 +1,112 @@
> +/*
> + *  linux/fs/gfs2/simple_rpc_pipefs.h

There's a reason why these kind of filename comments are useless, and
you have just proven it.

> +#include <linux/fs.h>
> +#include <linux/list.h>
> +#include <linux/mount.h>
> +#include <linux/sched.h>
> +#include <linux/sunrpc/clnt.h>
> +#include <linux/sunrpc/rpc_pipe_fs.h>

Do you actually need all these in a header?  I suspect many could go
away in favour of forward declarations.

> +typedef struct pipefs_hdr {
> +	u32 msgid;
> +	u8  type;
> +	u8  flags;
> +	u16 totallen; /* length of entire message, including hdr itself */
> +	u32 status;
> +} pipefs_hdr_t;

> +typedef struct pipefs_list {
> +	struct list_head list;
> +	spinlock_t list_lock;
> +} pipefs_list_t;

Please do not add typedefs for your structures.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/2] pnfs-block: Remove device creation from kernel
  2010-07-15 19:40 [PATCH 0/2] pnfs-block: move device mapping from kernel to user daemon Jim Rees
  2010-07-15 19:41 ` [PATCH 1/2] pnfs-block: Add support for simple rpc pipefs Jim Rees
@ 2010-07-15 19:41 ` Jim Rees
  2010-07-18  9:06 ` [PATCH 0/2] pnfs-block: move device mapping from kernel to user daemon Christoph Hellwig
  2 siblings, 0 replies; 6+ messages in thread
From: Jim Rees @ 2010-07-15 19:41 UTC (permalink / raw)
  To: bhalevy; +Cc: linux-nfs

From: Haiying Tang <Tang_Haiying@emc.com>

pnfs-block: Remove device creation from kernel

Signed-off-by: Eric Anderle <eanderle@umich.edu>
Signed-off-by: Jim Rees <rees@umich.edu>
---
 fs/nfs/blocklayout/Makefile                      |    2 +-
 fs/nfs/blocklayout/block-device-discovery-pipe.c |   66 +++
 fs/nfs/blocklayout/blocklayout.c                 |   15 +-
 fs/nfs/blocklayout/blocklayout.h                 |   18 +-
 fs/nfs/blocklayout/blocklayoutdev.c              |  494 +++-------------------
 fs/nfs/blocklayout/blocklayoutdm.c               |  297 ++-----------
 6 files changed, 181 insertions(+), 711 deletions(-)
 create mode 100644 fs/nfs/blocklayout/block-device-discovery-pipe.c

diff --git a/fs/nfs/blocklayout/Makefile b/fs/nfs/blocklayout/Makefile
index 1e7619f..5a4bf3d 100644
--- a/fs/nfs/blocklayout/Makefile
+++ b/fs/nfs/blocklayout/Makefile
@@ -3,4 +3,4 @@
 #
 obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o
 blocklayoutdriver-objs := blocklayout.o blocklayoutdev.o blocklayoutdm.o \
-			extents.o
+			extents.o block-device-discovery-pipe.o
diff --git a/fs/nfs/blocklayout/block-device-discovery-pipe.c b/fs/nfs/blocklayout/block-device-discovery-pipe.c
new file mode 100644
index 0000000..069c0a4
--- /dev/null
+++ b/fs/nfs/blocklayout/block-device-discovery-pipe.c
@@ -0,0 +1,66 @@
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/sched.h>
+#include "blocklayout.h"
+
+#define NFSDBG_FACILITY NFSDBG_PNFS_LD
+
+pipefs_list_t bl_device_list;
+struct dentry *bl_device_pipe;
+
+ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, size_t len)
+{
+	int err;
+	pipefs_hdr_t *msg;
+
+	dprintk("Entering %s...\n", __func__);
+
+	msg = pipefs_readmsg(filp, src, len);
+	if (IS_ERR(msg)) {
+		dprintk("ERROR: unable to read pipefs message.\n");
+		return PTR_ERR(msg);
+	}
+
+	/* now assign the result, which wakes the blocked thread */
+	err = pipefs_assign_upcall_reply(msg, &bl_device_list);
+	if (err) {
+		dprintk("ERROR: failed to assign upcall with id %u\n",
+			msg->msgid);
+		kfree(msg);
+	}
+	return len;
+}
+
+static struct rpc_pipe_ops bl_pipe_ops = {
+	.upcall         = pipefs_generic_upcall,
+	.downcall       = bl_pipe_downcall,
+	.destroy_msg    = pipefs_generic_destroy_msg,
+};
+
+int bl_pipe_init(void)
+{
+	dprintk("%s: block_device pipefs registering...\n", __func__);
+	bl_device_pipe = pipefs_mkpipe("bl_device_pipe", &bl_pipe_ops, 1);
+	if (IS_ERR(bl_device_pipe))
+		dprintk("ERROR, unable to make block_device pipe\n");
+
+	if (!bl_device_pipe)
+		dprintk("bl_device_pipe is NULL!\n");
+	else
+	dprintk("bl_device_pipe created!\n");
+	pipefs_init_list(&bl_device_list);
+	return 0;
+}
+
+void bl_pipe_exit(void)
+{
+	dprintk("%s: block_device pipefs unregistering...\n", __func__);
+	if (IS_ERR(bl_device_pipe))
+		return ;
+	pipefs_closepipe(bl_device_pipe);
+	return;
+}
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 63d3b5a..8dfd967 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -737,6 +737,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
 	dev->pglen = PAGE_SIZE * max_pages;
 	dev->mincount = 0;
 
+	dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
 	rc = pnfs_block_callback_ops->nfs_getdeviceinfo(server, dev);
 	dprintk("%s getdevice info returns %d\n", __func__, rc);
 	if (rc)
@@ -765,7 +766,7 @@ bl_initialize_mountpoint(struct nfs_server *server, const struct nfs_fh *fh)
 	struct pnfs_devicelist *dlist = NULL;
 	struct pnfs_block_dev *bdev;
 	LIST_HEAD(block_disklist);
-	int status, i;
+	int status = 0, i;
 
 	dprintk("%s enter\n", __func__);
 
@@ -782,13 +783,6 @@ bl_initialize_mountpoint(struct nfs_server *server, const struct nfs_fh *fh)
 	spin_lock_init(&b_mt_id->bm_lock);
 	INIT_LIST_HEAD(&b_mt_id->bm_devlist);
 
-	/* Construct a list of all visible block disks that have not been
-	 * claimed.
-	 */
-	status =  nfs4_blk_create_block_disk_list(&block_disklist);
-	if (status < 0)
-		goto out_error;
-
 	dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_KERNEL);
 	if (!dlist)
 		goto out_error;
@@ -819,10 +813,9 @@ bl_initialize_mountpoint(struct nfs_server *server, const struct nfs_fh *fh)
 	}
 	dprintk("%s SUCCESS\n", __func__);
 	server->pnfs_ld_data = b_mt_id;
-	status = 0;
+
  out_return:
 	kfree(dlist);
-	nfs4_blk_destroy_disk_list(&block_disklist);
 	return status;
 
  out_error:
@@ -1155,6 +1148,7 @@ static int __init nfs4blocklayout_init(void)
 	dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
 
 	pnfs_block_callback_ops = pnfs_register_layoutdriver(&blocklayout_type);
+	bl_pipe_init();
 	return 0;
 }
 
@@ -1164,6 +1158,7 @@ static void __exit nfs4blocklayout_exit(void)
 	       __func__);
 
 	pnfs_unregister_layoutdriver(&blocklayout_type);
+	bl_pipe_exit();
 }
 
 module_init(nfs4blocklayout_init);
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index d316b7f..12b366b 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -56,7 +56,6 @@ struct block_mount_id {
 
 struct pnfs_block_dev {
 	struct list_head		bm_node;
-	char				*bm_mdevname; /* meta device name */
 	struct pnfs_deviceid		bm_mdevid;    /* associated devid */
 	struct block_device		*bm_mdev;     /* meta device itself */
 };
@@ -263,8 +262,6 @@ int nfs4_blk_process_layoutget(struct pnfs_layout_type *lo,
 int nfs4_blk_create_block_disk_list(struct list_head *);
 void nfs4_blk_destroy_disk_list(struct list_head *);
 /* blocklayoutdm.c */
-struct pnfs_block_dev *nfs4_blk_init_metadev(struct nfs_server *server,
-					     struct pnfs_device *dev);
 int nfs4_blk_flatten(struct pnfs_blk_volume *, int, struct pnfs_block_dev *);
 void free_block_dev(struct pnfs_block_dev *bdev);
 /* extents.c */
@@ -288,4 +285,19 @@ int add_and_merge_extent(struct pnfs_block_layout *bl,
 			 struct pnfs_block_extent *new);
 int mark_for_commit(struct pnfs_block_extent *be,
 		    sector_t offset, sector_t length);
+
+#include <linux/sunrpc/simple_rpc_pipefs.h>
+
+extern pipefs_list_t bl_device_list;
+extern struct dentry *bl_device_pipe;
+
+int bl_pipe_init(void);
+void bl_pipe_exit(void);
+
+#define BL_DEVICE_UMOUNT               0x0 /* Umount--delete devices */
+#define BL_DEVICE_MOUNT                0x1 /* Mount--create devices*/
+#define BL_DEVICE_REQUEST_INIT         0x0 /* Start request */
+#define BL_DEVICE_REQUEST_PROC         0x1 /* User level process succeeds */
+#define BL_DEVICE_REQUEST_ERR          0x2 /* User level process fails */
+
 #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index 7285d5e..69c74fd 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -34,13 +34,12 @@
 
 #include <linux/genhd.h>
 #include <linux/blkdev.h>
+#include <linux/hash.h>
 
 #include "blocklayout.h"
 
 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
 
-#define MAX_VOLS  256  /* Maximum number of block disks.  Totally arbitrary */
-
 uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes)
 {
 	uint32_t *q = p + XDR_QUADLEN(nbytes);
@@ -77,397 +76,6 @@ int nfs4_blkdev_put(struct block_device *bdev)
 	return blkdev_put(bdev, FMODE_READ);
 }
 
-/* Add a visible, claimed (by us!) block disk to the device list */
-static int alloc_add_disk(struct block_device *blk_dev, struct list_head *dlist)
-{
-	struct visible_block_device *vis_dev;
-
-	dprintk("%s enter\n", __func__);
-	vis_dev = kmalloc(sizeof(struct visible_block_device), GFP_KERNEL);
-	if (!vis_dev) {
-		dprintk("%s nfs4_get_sig failed\n", __func__);
-		return -ENOMEM;
-	}
-	vis_dev->vi_bdev = blk_dev;
-	vis_dev->vi_mapped = 0;
-	vis_dev->vi_put_done = 0;
-	list_add(&vis_dev->vi_node, dlist);
-	return 0;
-}
-
-/* Walk the list of block_devices. Add disks that can be opened and claimed
- * to the device list
- */
-static int
-nfs4_blk_add_block_disk(struct device *cdev,
-		       int index, struct list_head *dlist)
-{
-	static char *claim_ptr = "I belong to pnfs block driver";
-	struct block_device *bdev;
-	struct gendisk *gd;
-	unsigned int major, minor;
-	int ret;
-	dev_t dev;
-
-	dprintk("%s enter \n", __func__);
-	if (index >= MAX_VOLS) {
-		dprintk("%s MAX_VOLS hit\n", __func__);
-		return -ENOSPC;
-	}
-	gd = dev_to_disk(cdev);
-	if (gd == NULL || get_capacity(gd) == 0 ||
-	    (gd->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) /* Skip ramdisks */
-		goto out;
-
-	dev = cdev->devt;
-	major = MAJOR(dev);
-	minor = MINOR(dev);
-	bdev = nfs4_blkdev_get(dev);
-	if (!bdev) {
-		dprintk("%s: failed to open device %d:%d\n",
-			__func__, major, minor);
-		goto out;
-	}
-
-	if (bd_claim(bdev, claim_ptr)) {
-		dprintk("%s: failed to claim device %d:%d\n",
-			 __func__, major, minor);
-		blkdev_put(bdev, FMODE_READ);
-		goto out;
-	}
-
-	ret = alloc_add_disk(bdev, dlist);
-	if (ret < 0)
-		goto out_err;
-	index++;
-	dprintk("%s ADDED DEVICE %d:%d capacity %ld, bd_block_size %d\n",
-		__func__, major, minor,
-		(unsigned long)get_capacity(gd),
-		bdev->bd_block_size);
-
-out:
-	dprintk("%s returns index %d \n", __func__, index);
-	return index;
-
-out_err:
-	dprintk("%s Can't add disk %d:%d to list. ERROR: %d\n",
-			__func__, major, minor, ret);
-	nfs4_blkdev_put(bdev);
-	return ret;
-}
-
-/* Destroy the temporary block disk list */
-void nfs4_blk_destroy_disk_list(struct list_head *dlist)
-{
-	struct visible_block_device *vis_dev;
-
-	dprintk("%s enter\n", __func__);
-	while (!list_empty(dlist)) {
-		vis_dev = list_first_entry(dlist, struct visible_block_device,
-					   vi_node);
-		dprintk("%s removing device %d:%d\n", __func__,
-				MAJOR(vis_dev->vi_bdev->bd_dev),
-				MINOR(vis_dev->vi_bdev->bd_dev));
-		list_del(&vis_dev->vi_node);
-		if (!vis_dev->vi_put_done)
-			nfs4_blkdev_put(vis_dev->vi_bdev);
-		kfree(vis_dev);
-	}
-}
-
-struct nfs4_blk_block_disk_list_ctl {
-	struct list_head *dlist;
-	int index;
-};
-
-static int nfs4_blk_iter_block_disk_list(struct device *cdev, void *data)
-{
-	struct nfs4_blk_block_disk_list_ctl *lc = data;
-	int ret;
-
-	dprintk("%s enter\n", __func__);
-	ret = nfs4_blk_add_block_disk(cdev, lc->index, lc->dlist);
-	dprintk("%s 1 ret %d\n", __func__, ret);
-	if (ret >= 0) {
-		lc->index = ret;
-		ret = 0;
-	}
-	return ret;
-}
-
-/*
- * Create a temporary list of all block disks host can see, and that have not
- * yet been claimed.
- * block_class: list of all registered block disks.
- * returns -errno on error, and #of devices found on success.
-*/
-int nfs4_blk_create_block_disk_list(struct list_head *dlist)
-{
-	struct nfs4_blk_block_disk_list_ctl lc = {
-		.dlist = dlist,
-		.index = 0,
-	};
-
-	dprintk("%s enter\n", __func__);
-	return class_for_each_device(&block_class, NULL,
-				     &lc, nfs4_blk_iter_block_disk_list);
-}
-/* We are given an array of XDR encoded array indices, each of which should
- * refer to a previously decoded device.  Translate into a list of pointers
- * to the appropriate pnfs_blk_volume's.
- */
-static int set_vol_array(uint32_t **pp, uint32_t *end,
-			 struct pnfs_blk_volume *vols, int working)
-{
-	int i, index;
-	uint32_t *p = *pp;
-	struct pnfs_blk_volume **array = vols[working].bv_vols;
-	for (i = 0; i < vols[working].bv_vol_n; i++) {
-		BLK_READBUF(p, end, 4);
-		READ32(index);
-		if ((index < 0) || (index >= working)) {
-			dprintk("%s Index %i out of expected range\n",
-				__func__, index);
-			goto out_err;
-		}
-		array[i] = &vols[index];
-	}
-	*pp = p;
-	return 0;
- out_err:
-	return -EIO;
-}
-
-static uint64_t sum_subvolume_sizes(struct pnfs_blk_volume *vol)
-{
-	int i;
-	uint64_t sum = 0;
-	for (i = 0; i < vol->bv_vol_n; i++)
-		sum += vol->bv_vols[i]->bv_size;
-	return sum;
-}
-
-static int decode_blk_signature(uint32_t **pp, uint32_t *end,
-				struct pnfs_blk_sig *sig)
-{
-	int i, tmp;
-	uint32_t *p = *pp;
-
-	BLK_READBUF(p, end, 4);
-	READ32(sig->si_num_comps);
-	if (sig->si_num_comps == 0) {
-		dprintk("%s 0 components in sig\n", __func__);
-		goto out_err;
-	}
-	if (sig->si_num_comps >= PNFS_BLOCK_MAX_SIG_COMP) {
-		dprintk("number of sig comps %i >= PNFS_BLOCK_MAX_SIG_COMP\n",
-		       sig->si_num_comps);
-		goto out_err;
-	}
-	for (i = 0; i < sig->si_num_comps; i++) {
-		BLK_READBUF(p, end, 12);
-		READ64(sig->si_comps[i].bs_offset);
-		READ32(tmp);
-		sig->si_comps[i].bs_length = tmp;
-		BLK_READBUF(p, end, tmp);
-		/* Note we rely here on fact that sig is used immediately
-		 * for mapping, then thrown away.
-		 */
-		sig->si_comps[i].bs_string = (char *)p;
-		p += XDR_QUADLEN(tmp);
-	}
-	*pp = p;
-	return 0;
- out_err:
-	return -EIO;
-}
-
-/* Translate a signature component into a block and offset. */
-static void get_sector(struct block_device *bdev,
-		       struct pnfs_blk_sig_comp *comp,
-		       sector_t *block,
-		       uint32_t *offset_in_block)
-{
-	int64_t use_offset = comp->bs_offset;
-	unsigned int blkshift = blksize_bits(block_size(bdev));
-
-	dprintk("%s enter\n", __func__);
-	if (use_offset < 0)
-		use_offset += (get_capacity(bdev->bd_disk) << 9);
-	*block = use_offset >> blkshift;
-	*offset_in_block = use_offset - (*block << blkshift);
-
-	dprintk("%s block %llu offset_in_block %u\n",
-			__func__, (u64)*block, *offset_in_block);
-	return;
-}
-
-/*
- * All signatures in sig must be found on bdev for verification.
- * Returns True if sig matches, False otherwise.
- *
- * STUB - signature crossing a block boundary will cause problems.
- */
-static int verify_sig(struct block_device *bdev, struct pnfs_blk_sig *sig)
-{
-	sector_t block = 0;
-	struct pnfs_blk_sig_comp *comp;
-	struct buffer_head *bh = NULL;
-	uint32_t offset_in_block = 0;
-	char *ptr;
-	int i;
-
-	dprintk("%s enter. bd_disk->capacity %ld, bd_block_size %d\n",
-			__func__, (unsigned long)get_capacity(bdev->bd_disk),
-			bdev->bd_block_size);
-	for (i = 0; i < sig->si_num_comps; i++) {
-		comp = &sig->si_comps[i];
-		dprintk("%s comp->bs_offset %lld, length=%d\n", __func__,
-			comp->bs_offset, comp->bs_length);
-		get_sector(bdev, comp, &block, &offset_in_block);
-		bh = __bread(bdev, block, bdev->bd_block_size);
-		if (!bh)
-			goto out_err;
-		ptr = (char *)bh->b_data + offset_in_block;
-		if (memcmp(ptr, comp->bs_string, comp->bs_length))
-			goto out_err;
-		brelse(bh);
-	}
-	dprintk("%s Complete Match Found\n", __func__);
-	return 1;
-
-out_err:
-	brelse(bh);
-	dprintk("%s  No Match\n", __func__);
-	return 0;
-}
-
-/*
- * map_sig_to_device()
- * Given a signature, walk the list of visible block disks searching for
- * a match. Returns True if mapping was done, False otherwise.
- *
- * While we're at it, fill in the vol->bv_size.
- */
-/* XXX FRED - use normal 0=success status */
-static int map_sig_to_device(struct pnfs_blk_sig *sig,
-			     struct pnfs_blk_volume *vol,
-			     struct list_head *sdlist)
-{
-	int mapped = 0;
-	struct visible_block_device *vis_dev;
-
-	list_for_each_entry(vis_dev, sdlist, vi_node) {
-		if (vis_dev->vi_mapped || !vis_dev->vi_bdev->bd_disk)
-			continue;
-		mapped = verify_sig(vis_dev->vi_bdev, sig);
-		if (mapped) {
-			vol->bv_dev = vis_dev->vi_bdev->bd_dev;
-			vol->bv_size = get_capacity(vis_dev->vi_bdev->bd_disk);
-			vis_dev->vi_mapped = 1;
-			/* XXX FRED check this */
-			/* We no longer need to scan this device, and
-			 * we need to "put" it before creating metadevice.
-			 */
-			if (!vis_dev->vi_put_done) {
-				vis_dev->vi_put_done = 1;
-				nfs4_blkdev_put(vis_dev->vi_bdev);
-			}
-			break;
-		}
-	}
-	return mapped;
-}
-
-/* XDR decodes pnfs_block_volume4 structure */
-static int decode_blk_volume(uint32_t **pp, uint32_t *end,
-			     struct pnfs_blk_volume *vols, int i,
-			     struct list_head *sdlist, int *array_cnt)
-{
-	int status = 0;
-	struct pnfs_blk_sig sig;
-	uint32_t *p = *pp;
-	uint64_t tmp; /* Used by READ_SECTOR */
-	struct pnfs_blk_volume *vol = &vols[i];
-	int j;
-	u64 tmp_size;
-
-	BLK_READBUF(p, end, 4);
-	READ32(vol->bv_type);
-	dprintk("%s vol->bv_type = %i\n", __func__, vol->bv_type);
-	switch (vol->bv_type) {
-	case PNFS_BLOCK_VOLUME_SIMPLE:
-		*array_cnt = 0;
-		status = decode_blk_signature(&p, end, &sig);
-		if (status)
-			return status;
-		status = map_sig_to_device(&sig, vol, sdlist);
-		if (!status) {
-			dprintk("Could not find disk for device\n");
-			return -EIO;
-		}
-		status = 0;
-		dprintk("%s Set Simple vol to dev %d:%d, size %llu\n",
-				__func__,
-				MAJOR(vol->bv_dev),
-				MINOR(vol->bv_dev),
-				(u64)vol->bv_size);
-		break;
-	case PNFS_BLOCK_VOLUME_SLICE:
-		BLK_READBUF(p, end, 16);
-		READ_SECTOR(vol->bv_offset);
-		READ_SECTOR(vol->bv_size);
-		*array_cnt = vol->bv_vol_n = 1;
-		status = set_vol_array(&p, end, vols, i);
-		break;
-	case PNFS_BLOCK_VOLUME_STRIPE:
-		BLK_READBUF(p, end, 8);
-		READ_SECTOR(vol->bv_stripe_unit);
-		BLK_READBUF(p, end, 4);
-		READ32(vol->bv_vol_n);
-		if (!vol->bv_vol_n)
-			return -EIO;
-		*array_cnt = vol->bv_vol_n;
-		status = set_vol_array(&p, end, vols, i);
-		if (status)
-			return status;
-		/* Ensure all subvolumes are the same size */
-		for (j = 1; j < vol->bv_vol_n; j++) {
-			if (vol->bv_vols[j]->bv_size !=
-			    vol->bv_vols[0]->bv_size) {
-				dprintk("%s varying subvol size\n", __func__);
-				return -EIO;
-			}
-		}
-		/* Make sure total size only includes addressable areas */
-		tmp_size = vol->bv_vols[0]->bv_size;
-		do_div(tmp_size, (u32)vol->bv_stripe_unit);
-		vol->bv_size = vol->bv_vol_n * tmp_size * vol->bv_stripe_unit;
-		dprintk("%s Set Stripe vol to size %llu\n",
-				__func__, (u64)vol->bv_size);
-		break;
-	case PNFS_BLOCK_VOLUME_CONCAT:
-		BLK_READBUF(p, end, 4);
-		READ32(vol->bv_vol_n);
-		if (!vol->bv_vol_n)
-			return -EIO;
-		*array_cnt = vol->bv_vol_n;
-		status = set_vol_array(&p, end, vols, i);
-		if (status)
-			return status;
-		vol->bv_size = sum_subvolume_sizes(vol);
-		dprintk("%s Set Concat vol to size %llu\n",
-				__func__, (u64)vol->bv_size);
-		break;
-	default:
-		dprintk("Unknown volume type %i\n", vol->bv_type);
- out_err:
-		return -EIO;
-	}
-	*pp = p;
-	return status;
-}
-
 /* Decodes pnfs_block_deviceaddr4 (draft-8) which is XDR encoded
  * in dev->dev_addr_buf.
  */
@@ -476,65 +84,71 @@ nfs4_blk_decode_device(struct nfs_server *server,
 		       struct pnfs_device *dev,
 		       struct list_head *sdlist)
 {
-	int num_vols, i, status, count;
-	struct pnfs_blk_volume *vols, **arrays, **arrays_ptr;
-	uint32_t *p = dev->area;
-	uint32_t *end = (uint32_t *) ((char *) p + dev->mincount);
 	struct pnfs_block_dev *rv = NULL;
-	struct visible_block_device *vis_dev;
+	struct block_device *bd = NULL;
+	pipefs_hdr_t *msg = NULL, *reply = NULL;
+	uint32_t major, minor;
 
 	dprintk("%s enter\n", __func__);
 
-	READ32(num_vols);
-	dprintk("%s num_vols = %i\n", __func__, num_vols);
-
-	vols = kmalloc(sizeof(struct pnfs_blk_volume) * num_vols, GFP_KERNEL);
-	if (!vols)
+	if (IS_ERR(bl_device_pipe))
 		return NULL;
-	/* Each volume in vols array needs its own array.  Save time by
-	 * allocating them all in one large hunk.  Because each volume
-	 * array can only reference previous volumes, and because once
-	 * a concat or stripe references a volume, it may never be
-	 * referenced again, the volume arrays are guaranteed to fit
-	 * in the suprisingly small space allocated.
-	 */
-	arrays = kmalloc(sizeof(struct pnfs_blk_volume *) * num_vols * 2,
-			 GFP_KERNEL);
-	if (!arrays)
-		goto out;
-	arrays_ptr = arrays;
+	dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
+	dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
+		dev->mincount);
+	msg = pipefs_alloc_init_msg(0, BL_DEVICE_MOUNT, 0, dev->area,
+				    dev->mincount);
+	if (IS_ERR(msg)) {
+		dprintk("ERROR: couldn't make pipefs message.\n");
+		goto out_err;
+	}
+	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
+	msg->status = BL_DEVICE_REQUEST_INIT;
+
+	dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
+	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
+					      &bl_device_list, 0, 0);
 
-	list_for_each_entry(vis_dev, sdlist, vi_node) {
-		/* Wipe crud left from parsing previous device */
-		vis_dev->vi_mapped = 0;
+	if (IS_ERR(reply)) {
+		dprintk("ERROR: upcall_waitreply failed\n");
+		goto out_err;
 	}
-	for (i = 0; i < num_vols; i++) {
-		vols[i].bv_vols = arrays_ptr;
-		status = decode_blk_volume(&p, end, vols, i, sdlist, &count);
-		if (status)
-			goto out;
-		arrays_ptr += count;
+	if (reply->status != BL_DEVICE_REQUEST_PROC) {
+		dprintk("%s failed to open device: %ld\n",
+			__func__, PTR_ERR(bd));
+		goto out_err;
 	}
-
-	/* Check that we have used up opaque */
-	if (p != end) {
-		dprintk("Undecoded cruft at end of opaque\n");
-		goto out;
+	memcpy(&major, (uint32_t *)(payload_of(reply)), sizeof(uint32_t));
+	memcpy(&minor, (uint32_t *)(payload_of(reply) + sizeof(uint32_t)),
+		sizeof(uint32_t));
+	bd = nfs4_blkdev_get(MKDEV(major, minor));
+	if (IS_ERR(bd)) {
+		dprintk("%s failed to open device : %ld\n",
+			__func__, PTR_ERR(bd));
+		goto out_err;
 	}
 
-	/* Now use info in vols to create the meta device */
-	rv = nfs4_blk_init_metadev(server, dev);
+	rv = kzalloc(sizeof(*rv), GFP_KERNEL);
 	if (!rv)
-		goto out;
-	status = nfs4_blk_flatten(vols, num_vols, rv);
-	if (status) {
-		free_block_dev(rv);
-		rv = NULL;
-	}
- out:
-	kfree(arrays);
-	kfree(vols);
+		goto out_err;
+
+	rv->bm_mdev = bd;
+	memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct pnfs_deviceid));
+	dprintk("%s Created device %s with bd_block_size %u\n",
+		__func__,
+		bd->bd_disk->disk_name,
+		bd->bd_block_size);
+	kfree(reply);
+	kfree(msg);
 	return rv;
+
+out_err:
+	kfree(rv);
+	if (!IS_ERR(reply))
+		kfree(reply);
+	if (!IS_ERR(msg))
+		kfree(msg);
+	return NULL;
 }
 
 /* Map deviceid returned by the server to constructed block_device */
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index 3d15de0..2c1b7a4 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -31,6 +31,8 @@
  */
 
 #include <linux/genhd.h> /* gendisk - used in a dprintk*/
+#include <linux/sched.h>
+#include <linux/hash.h>
 
 #include "blocklayout.h"
 
@@ -45,52 +47,44 @@
 #define roundup8(x) (((x)+7) & ~7)
 #define sizeof8(x) roundup8(sizeof(x))
 
-/* Given x>=1, return smallest n such that 2**n >= x */
-static unsigned long find_order(int x)
+static int dev_remove(dev_t dev)
 {
-	unsigned long rv = 0;
-	for (x--; x; x >>= 1)
-		rv++;
-	return rv;
-}
-
-/* Debugging aid */
-static void print_extent(u64 meta_offset, dev_t disk,
-			 u64 disk_offset, u64 length)
-{
-	dprintk("%lli:, %d:%d %lli, %lli\n", meta_offset, MAJOR(disk),
-			MINOR(disk), disk_offset, length);
-}
-static int dev_create(const char *name, dev_t *dev)
-{
-	struct dm_ioctl ctrl;
-	int rv;
-
-	memset(&ctrl, 0, sizeof(ctrl));
-	strncpy(ctrl.name, name, DM_NAME_LEN-1);
-	rv = dm_dev_create(&ctrl); /* XXX - need to pull data out of ctrl */
-	dprintk("Tried to create %s, got %i\n", name, rv);
-	if (!rv) {
-		*dev = huge_decode_dev(ctrl.dev);
-		dprintk("dev = (%i, %i)\n", MAJOR(*dev), MINOR(*dev));
+	int ret = 1;
+	pipefs_hdr_t *msg = NULL, *reply = NULL;
+	uint64_t bl_dev;
+	uint32_t major = MAJOR(dev), minor = MINOR(dev);
+
+	dprintk("Entering %s\n", __func__);
+
+	if (IS_ERR(bl_device_pipe))
+		return ret;
+
+	memcpy((void *)&bl_dev, &major, sizeof(uint32_t));
+	memcpy((void *)&bl_dev + sizeof(uint32_t), &minor, sizeof(uint32_t));
+	msg = pipefs_alloc_init_msg(0, BL_DEVICE_UMOUNT, 0, (void *)&bl_dev,
+				    sizeof(uint64_t));
+	if (IS_ERR(msg)) {
+		dprintk("ERROR: couldn't make pipefs message.\n");
+		goto out;
+	}
+	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
+	msg->status = BL_DEVICE_REQUEST_INIT;
+
+	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
+					      &bl_device_list, 0, 0);
+	if (IS_ERR(reply)) {
+		dprintk("ERROR: upcall_waitreply failed\n");
+		goto out;
 	}
-	return rv;
-}
-
-static int dev_remove(const char *name)
-{
-	struct dm_ioctl ctrl;
-	memset(&ctrl, 0, sizeof(ctrl));
-	strncpy(ctrl.name, name, DM_NAME_LEN-1);
-	return dm_dev_remove(&ctrl);
-}
 
-static int dev_resume(const char *name)
-{
-	struct dm_ioctl ctrl;
-	memset(&ctrl, 0, sizeof(ctrl));
-	strncpy(ctrl.name, name, DM_NAME_LEN-1);
-	return dm_do_resume(&ctrl);
+	if (reply->status == BL_DEVICE_REQUEST_PROC)
+		ret = 0; /*TODO: what to return*/
+out:
+	if (!IS_ERR(reply))
+		kfree(reply);
+	if (!IS_ERR(msg))
+		kfree(msg);
+	return ret;
 }
 
 /*
@@ -100,12 +94,12 @@ static int nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
 {
 	int rv;
 
-	dprintk("%s Releasing %s\n", __func__, bdev->bm_mdevname);
+	dprintk("%s Releasing\n", __func__);
 	/* XXX Check return? */
 	rv = nfs4_blkdev_put(bdev->bm_mdev);
 	dprintk("%s nfs4_blkdev_put returns %d\n", __func__, rv);
 
-	rv = dev_remove(bdev->bm_mdevname);
+	rv = dev_remove(bdev->bm_mdev->bd_dev);
 	dprintk("%s Returns %d\n", __func__, rv);
 	return rv;
 }
@@ -114,9 +108,8 @@ void free_block_dev(struct pnfs_block_dev *bdev)
 {
 	if (bdev) {
 		if (bdev->bm_mdev) {
-			dprintk("%s Removing DM device: %s %d:%d\n",
+			dprintk("%s Removing DM device: %d:%d\n",
 				__func__,
-				bdev->bm_mdevname,
 				MAJOR(bdev->bm_mdev->bd_dev),
 				MINOR(bdev->bm_mdev->bd_dev));
 			/* XXX Check status ?? */
@@ -125,213 +118,3 @@ void free_block_dev(struct pnfs_block_dev *bdev)
 		kfree(bdev);
 	}
 }
-
-/*
- *  Create meta device. Keep it open to use for I/O.
- */
-struct pnfs_block_dev *nfs4_blk_init_metadev(struct nfs_server *server,
-					     struct pnfs_device *dev)
-{
-	static uint64_t dev_count; /* STUB used for device names */
-	struct block_device *bd;
-	dev_t meta_dev;
-	struct pnfs_block_dev *rv;
-	int status;
-
-	dprintk("%s enter\n", __func__);
-
-	rv = kmalloc(sizeof(*rv) + 32, GFP_KERNEL);
-	if (!rv)
-		return NULL;
-	rv->bm_mdevname = (char *)rv + sizeof(*rv);
-	sprintf(rv->bm_mdevname, "FRED_%llu", dev_count++);
-	status = dev_create(rv->bm_mdevname, &meta_dev);
-	if (status)
-		goto out_err;
-	bd = nfs4_blkdev_get(meta_dev);
-	if (!bd)
-		goto out_err;
-	if (bd_claim(bd, server)) {
-		dprintk("%s: failed to claim device %d:%d\n",
-					__func__,
-					MAJOR(meta_dev),
-					MINOR(meta_dev));
-		blkdev_put(bd, FMODE_READ);
-		goto out_err;
-	}
-
-	rv->bm_mdev = bd;
-	memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct pnfs_deviceid));
-	dprintk("%s Created device %s named %s with bd_block_size %u\n",
-				__func__,
-				bd->bd_disk->disk_name,
-				rv->bm_mdevname,
-				bd->bd_block_size);
-	return rv;
-
- out_err:
-	kfree(rv);
-	return NULL;
-}
-
-/*
- * Given a vol_offset into root, returns the disk and disk_offset it
- * corresponds to, as well as the length of the contiguous segment thereafter.
- * All offsets/lengths are in 512-byte sectors.
- */
-static int nfs4_blk_resolve(int root, struct pnfs_blk_volume *vols,
-			    u64 vol_offset, dev_t *disk, u64 *disk_offset,
-			    u64 *length)
-{
-	struct pnfs_blk_volume *node;
-	u64 node_offset;
-
-	/* Walk down device tree until we hit a leaf node (VOLUME_SIMPLE) */
-	node = &vols[root];
-	node_offset = vol_offset;
-	*length = node->bv_size;
-	while (1) {
-		dprintk("offset=%lli, length=%lli\n",
-			node_offset, *length);
-		if (node_offset > node->bv_size)
-			return -EIO;
-		switch (node->bv_type) {
-		case PNFS_BLOCK_VOLUME_SIMPLE:
-			*disk = node->bv_dev;
-			dprintk("%s VOLUME_SIMPLE: node->bv_dev %d:%d\n",
-			       __func__,
-			       MAJOR(node->bv_dev),
-			       MINOR(node->bv_dev));
-			*disk_offset = node_offset;
-			*length = min(*length, node->bv_size - node_offset);
-			return 0;
-		case PNFS_BLOCK_VOLUME_SLICE:
-			dprintk("%s VOLUME_SLICE:\n", __func__);
-			*length = min(*length, node->bv_size - node_offset);
-			node_offset += node->bv_offset;
-			node = node->bv_vols[0];
-			break;
-		case PNFS_BLOCK_VOLUME_CONCAT: {
-			u64 next = 0, sum = 0;
-			int i;
-			dprintk("%s VOLUME_CONCAT:\n", __func__);
-			for (i = 0; i < node->bv_vol_n; i++) {
-				next = sum + node->bv_vols[i]->bv_size;
-				if (node_offset < next)
-					break;
-				sum = next;
-			}
-			*length = min(*length, next - node_offset);
-			node_offset -= sum;
-			node = node->bv_vols[i];
-			}
-			break;
-		case PNFS_BLOCK_VOLUME_STRIPE: {
-			u64 global_s_no;
-			u64 stripe_pos;
-			u64 local_s_no;
-			u64 disk_number;
-
-			dprintk("%s VOLUME_STRIPE:\n", __func__);
-			global_s_no = node_offset;
-			/* BUG - note this assumes stripe_unit <= 2**32 */
-			stripe_pos = (u64) do_div(global_s_no,
-						  (u32)node->bv_stripe_unit);
-			local_s_no = global_s_no;
-			disk_number = (u64) do_div(local_s_no,
-						   (u32) node->bv_vol_n);
-			*length = min(*length,
-				      node->bv_stripe_unit - stripe_pos);
-			node_offset = local_s_no * node->bv_stripe_unit +
-					stripe_pos;
-			node = node->bv_vols[disk_number];
-			}
-			break;
-		default:
-			return -EIO;
-		}
-	}
-}
-
-/*
- * Create an LVM dm device table that represents the volume topology returned
- * by GETDEVICELIST or GETDEVICEINFO.
- *
- * vols:  topology with VOLUME_SIMPLEs mapped to visable block disks.
- * size:  number of volumes in vols.
- */
-int nfs4_blk_flatten(struct pnfs_blk_volume *vols, int size,
-		     struct pnfs_block_dev *bdev)
-{
-	u64 meta_offset = 0;
-	u64 meta_size = vols[size-1].bv_size;
-	dev_t disk;
-	u64 disk_offset, len;
-	int status = 0, count = 0, pages_needed;
-	struct dm_ioctl *ctl;
-	struct dm_target_spec *spec;
-	char *args = NULL;
-	unsigned long p;
-
-	dprintk("%s enter. mdevname %s number of volumes %d\n", __func__,
-			bdev->bm_mdevname, size);
-
-	/* We need to reserve memory to store segments, so need to count
-	 * segments.  This means we resolve twice, basically throwing away
-	 * all info from first run apart from the count.  Seems like
-	 * there should be a better way.
-	 */
-	for (meta_offset = 0; meta_offset < meta_size; meta_offset += len) {
-		status = nfs4_blk_resolve(size-1, vols, meta_offset, &disk,
-						&disk_offset, &len);
-		/* TODO Check status */
-		count += 1;
-	}
-
-	dprintk("%s: Have %i segments\n", __func__, count);
-	pages_needed = ((count + SPEC_HEADER_ADJUST) / SPECS_PER_PAGE) + 1;
-	dprintk("%s: Need %i pages\n", __func__, pages_needed);
-	p = __get_free_pages(GFP_KERNEL, find_order(pages_needed));
-	if (!p)
-		return -ENOMEM;
-	/* A dm_ioctl is placed at the beginning, followed by a series of
-	 * (dm_target_spec, argument string) pairs.
-	 */
-	ctl = (struct dm_ioctl *) p;
-	spec = (struct dm_target_spec *) (p + sizeof8(*ctl));
-	memset(ctl, 0, sizeof(*ctl));
-	ctl->data_start = (char *) spec - (char *) ctl;
-	ctl->target_count = count;
-	strncpy(ctl->name, bdev->bm_mdevname, DM_NAME_LEN);
-
-	dprintk("%s ctl->name %s\n", __func__, ctl->name);
-	for (meta_offset = 0; meta_offset < meta_size; meta_offset += len) {
-		status = nfs4_blk_resolve(size-1, vols, meta_offset, &disk,
-							&disk_offset, &len);
-		if (!len)
-			break;
-		/* TODO Check status */
-		print_extent(meta_offset, disk, disk_offset, len);
-		spec->sector_start = meta_offset;
-		spec->length = len;
-		spec->status = 0;
-		strcpy(spec->target_type, "linear");
-		args = (char *) (spec + 1);
-		sprintf(args, "%i:%i %lli",
-			MAJOR(disk), MINOR(disk), disk_offset);
-		dprintk("%s args %s\n", __func__, args);
-		spec->next = roundup8(sizeof(*spec) + strlen(args) + 1);
-		spec = (struct dm_target_spec *) (((char *) spec) + spec->next);
-	}
-	ctl->data_size = (char *) spec - (char *) ctl;
-
-	status = dm_table_load(ctl, ctl->data_size);
-	dprintk("%s dm_table_load returns %d\n", __func__, status);
-
-	dev_resume(bdev->bm_mdevname);
-
-	free_pages(p, find_order(pages_needed));
-	dprintk("%s returns %d\n", __func__, status);
-	return status;
-}
-
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 0/2] pnfs-block: move device mapping from kernel to user daemon
  2010-07-15 19:40 [PATCH 0/2] pnfs-block: move device mapping from kernel to user daemon Jim Rees
  2010-07-15 19:41 ` [PATCH 1/2] pnfs-block: Add support for simple rpc pipefs Jim Rees
  2010-07-15 19:41 ` [PATCH 2/2] pnfs-block: Remove device creation from kernel Jim Rees
@ 2010-07-18  9:06 ` Christoph Hellwig
  2 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2010-07-18  9:06 UTC (permalink / raw)
  To: Jim Rees; +Cc: bhalevy, linux-nfs

>  10 files changed, 720 insertions(+), 712 deletions(-)

While it's good that this cruft goes away from the kernel the diffstat
is a bit disappointing.

Also you can now remove the block_class export that was added for pNFS.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 0/2] complex block layout
@ 2010-07-21 22:29 Jim Rees
  2010-07-21 22:30 ` [PATCH 1/2] pnfs-block: Add support for simple rpc pipefs Jim Rees
  0 siblings, 1 reply; 6+ messages in thread
From: Jim Rees @ 2010-07-21 22:29 UTC (permalink / raw)
  To: bhalevy; +Cc: linux-nfs

This is a replacement for the patch set I sent last week, rebased to current
pnfs-all-latest and incorporating suggestions both from reviewers and from
checkpatch.pl.

These two patches move the complex block layout device mapping from the
kernel to a user space daemon.  The first patch adds a simple upcall
mechanism via pipefs for the kernel piece to communicate with the daemon.
The second patch removes the kernel device mapping and replaces it with
calls to the daemon.

Passes Connectathon tests to both EMC and spnfs servers.

The user daemon will be sent separately as a patch to nfs-utils.

Haiying Tang (2):
  pnfs-block: Add support for simple rpc pipefs
  pnfs-block: Remove device creation from kernel

 fs/nfs/blocklayout/Makefile                      |    2 +-
 fs/nfs/blocklayout/block-device-discovery-pipe.c |   66 +++
 fs/nfs/blocklayout/blocklayout.c                 |   15 +-
 fs/nfs/blocklayout/blocklayout.h                 |   18 +-
 fs/nfs/blocklayout/blocklayoutdev.c              |  494 +++-------------------
 fs/nfs/blocklayout/blocklayoutdm.c               |  297 ++-----------
 include/linux/sunrpc/rpc_pipe_fs.h               |    4 +
 include/linux/sunrpc/simple_rpc_pipefs.h         |  111 +++++
 net/sunrpc/Makefile                              |    2 +-
 net/sunrpc/simple_rpc_pipefs.c                   |  424 +++++++++++++++++++
 10 files changed, 721 insertions(+), 712 deletions(-)
 create mode 100644 fs/nfs/blocklayout/block-device-discovery-pipe.c
 create mode 100644 include/linux/sunrpc/simple_rpc_pipefs.h
 create mode 100644 net/sunrpc/simple_rpc_pipefs.c

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] pnfs-block: Add support for simple rpc pipefs
  2010-07-21 22:29 [PATCH 0/2] complex block layout Jim Rees
@ 2010-07-21 22:30 ` Jim Rees
  0 siblings, 0 replies; 6+ messages in thread
From: Jim Rees @ 2010-07-21 22:30 UTC (permalink / raw)
  To: bhalevy; +Cc: linux-nfs

Signed-off-by: Eric Anderle <eanderle@umich.edu>
Signed-off-by: Jim Rees <rees@umich.edu>
---
 include/linux/sunrpc/rpc_pipe_fs.h       |    4 +
 include/linux/sunrpc/simple_rpc_pipefs.h |  111 ++++++++
 net/sunrpc/Makefile                      |    2 +-
 net/sunrpc/simple_rpc_pipefs.c           |  424 ++++++++++++++++++++++++++++++
 4 files changed, 540 insertions(+), 1 deletions(-)
 create mode 100644 include/linux/sunrpc/simple_rpc_pipefs.h
 create mode 100644 net/sunrpc/simple_rpc_pipefs.c

diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 6f942c9..2177d50 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -12,6 +12,10 @@ struct rpc_pipe_msg {
 	size_t len;
 	size_t copied;
 	int errno;
+#define PIPEFS_AUTOFREE_RPCMSG       0x01 /* frees rpc_pipe_msg */
+#define PIPEFS_AUTOFREE_RPCMSG_DATA  0x02 /* frees rpc_pipe_msg->data */
+#define PIPEFS_AUTOFREE_UPCALL_MSG   PIPEFS_AUTOFREE_RPCMSG_DATA
+	u8 flags;
 };
 
 struct rpc_pipe_ops {
diff --git a/include/linux/sunrpc/simple_rpc_pipefs.h b/include/linux/sunrpc/simple_rpc_pipefs.h
new file mode 100644
index 0000000..02e8147
--- /dev/null
+++ b/include/linux/sunrpc/simple_rpc_pipefs.h
@@ -0,0 +1,111 @@
+/*
+ *  Copyright (c) 2008 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  David M. Richter <richterd@citi.umich.edu>
+ *
+ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
+ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
+ *  years, guys.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  With thanks to CITI's project sponsor and partner, IBM.
+ */
+
+#ifndef _SIMPLE_RPC_PIPEFS_H_
+#define _SIMPLE_RPC_PIPEFS_H_
+
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/mount.h>
+#include <linux/sched.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+
+
+#define payload_of(headerp)  ((void *)(headerp + 1))
+
+/*
+ * struct pipefs_hdr -- the generic message format for simple_rpc_pipefs.
+ * Messages may simply be the header itself, although having an optional
+ * data payload follow the header allows much more flexibility.
+ *
+ * Messages are created using pipefs_alloc_init_msg() and
+ * pipefs_alloc_init_msg_padded(), both of which accept a pointer to an
+ * (optional) data payload.
+ *
+ * Given a struct pipefs_hdr *msg that has a struct foo payload, the data
+ * can be accessed using: struct foo *foop = payload_of(msg)
+ */
+struct pipefs_hdr {
+	u32 msgid;
+	u8  type;
+	u8  flags;
+	u16 totallen; /* length of entire message, including hdr itself */
+	u32 status;
+};
+
+/*
+ * struct pipefs_list -- a type of list used for tracking callers who've made an
+ * upcall and are blocked waiting for a reply.
+ *
+ * See pipefs_queue_upcall_waitreply() and pipefs_assign_upcall_reply().
+ */
+struct pipefs_list {
+	struct list_head list;
+	spinlock_t list_lock;
+};
+
+
+/* See net/sunrpc/simple_rpc_pipefs.c for more info on using these functions. */
+extern struct dentry *pipefs_mkpipe(const char *name,
+				    const struct rpc_pipe_ops *ops,
+				    int wait_for_open);
+extern void pipefs_closepipe(struct dentry *pipe);
+extern void pipefs_init_list(struct pipefs_list *list);
+extern struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
+						void *data, u16 datalen);
+extern struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type,
+						       u8 flags, void *data,
+						       u16 datalen, u16 padlen);
+extern struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
+							struct pipefs_hdr *msg,
+							struct pipefs_list
+							*uplist, u8 upflags,
+							u32 timeout);
+extern int pipefs_queue_upcall_noreply(struct dentry *pipe,
+				       struct pipefs_hdr *msg, u8 upflags);
+extern int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
+				      struct pipefs_list *uplist);
+extern struct pipefs_hdr *pipefs_readmsg(struct file *filp,
+					 const char __user *src, size_t len);
+extern ssize_t pipefs_generic_upcall(struct file *filp,
+				     struct rpc_pipe_msg *rpcmsg,
+				     char __user *dst, size_t buflen);
+extern void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg);
+
+#endif /* _SIMPLE_RPC_PIPEFS_H_ */
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 9d2fca5..e102040 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
 	    addr.o rpcb_clnt.o timer.o xdr.o \
 	    sunrpc_syms.o cache.o rpc_pipe.o \
-	    svc_xprt.o
+	    svc_xprt.o simple_rpc_pipefs.o
 sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/simple_rpc_pipefs.c b/net/sunrpc/simple_rpc_pipefs.c
new file mode 100644
index 0000000..c9306aa
--- /dev/null
+++ b/net/sunrpc/simple_rpc_pipefs.c
@@ -0,0 +1,424 @@
+/*
+ *  net/sunrpc/simple_rpc_pipefs.c
+ *
+ *  Copyright (c) 2008 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  David M. Richter <richterd@citi.umich.edu>
+ *
+ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
+ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
+ *  years, guys.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  With thanks to CITI's project sponsor and partner, IBM.
+ */
+
+#include <linux/completion.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/sunrpc/simple_rpc_pipefs.h>
+
+
+/*
+ * Make an rpc_pipefs pipe named @name at the root of the mounted rpc_pipefs
+ * filesystem.
+ *
+ * If @wait_for_open is non-zero and an upcall is later queued but the userland
+ * end of the pipe has not yet been opened, the upcall will remain queued until
+ * the pipe is opened; otherwise, the upcall queueing will return with -EPIPE.
+ */
+struct dentry *pipefs_mkpipe(const char *name, const struct rpc_pipe_ops *ops,
+			     int wait_for_open)
+{
+	struct dentry *dir, *pipe;
+	struct vfsmount *mnt;
+
+	mnt = rpc_get_mount();
+	if (IS_ERR(mnt)) {
+		pipe = ERR_CAST(mnt);
+		goto out;
+	}
+	dir = mnt->mnt_root;
+	if (!dir) {
+		pipe = ERR_PTR(-ENOENT);
+		goto out;
+	}
+	pipe = rpc_mkpipe(dir, name, NULL, ops,
+			  wait_for_open ? RPC_PIPE_WAIT_FOR_OPEN : 0);
+out:
+	return pipe;
+}
+EXPORT_SYMBOL(pipefs_mkpipe);
+
+/*
+ * Shutdown a pipe made by pipefs_mkpipe().
+ * XXX: do we need to retain an extra reference on the mount?
+ */
+void pipefs_closepipe(struct dentry *pipe)
+{
+	rpc_unlink(pipe);
+	rpc_put_mount();
+}
+EXPORT_SYMBOL(pipefs_closepipe);
+
+/*
+ * Initialize a struct pipefs_list -- which are a way to keep track of callers
+ * who're blocked having made an upcall and are awaiting a reply.
+ *
+ * See pipefs_queue_upcall_waitreply() and pipefs_find_upcall_msgid() for how
+ * to use them.
+ */
+inline void pipefs_init_list(struct pipefs_list *list)
+{
+	INIT_LIST_HEAD(&list->list);
+	spin_lock_init(&list->list_lock);
+}
+EXPORT_SYMBOL(pipefs_init_list);
+
+/*
+ * Alloc/init a generic pipefs message header and copy into its message body
+ * an arbitrary data payload.
+ *
+ * struct pipefs_hdr's are meant to serve as generic, general-purpose message
+ * headers for easy rpc_pipefs I/O.  When an upcall is made, the
+ * struct pipefs_hdr is assigned to a struct rpc_pipe_msg and delivered
+ * therein.  --And yes, the naming can seem a little confusing at first:
+ *
+ * When one thinks of an upcall "message", in simple_rpc_pipefs that's a
+ * struct pipefs_hdr (possibly with an attached message body).  A
+ * struct rpc_pipe_msg is actually only the -vehicle- by which the "real"
+ * message is delivered and processed.
+ */
+struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type, u8 flags,
+					   void *data, u16 datalen, u16 padlen)
+{
+	u16 totallen;
+	struct pipefs_hdr *msg = NULL;
+
+	totallen = sizeof(*msg) + datalen + padlen;
+	if (totallen > PAGE_SIZE) {
+		msg = ERR_PTR(-E2BIG);
+		goto out;
+	}
+
+	msg = kzalloc(totallen, GFP_KERNEL);
+	if (!msg) {
+		msg = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	msg->msgid = msgid;
+	msg->type = type;
+	msg->flags = flags;
+	msg->totallen = totallen;
+	memcpy(payload_of(msg), data, datalen);
+out:
+	return msg;
+}
+EXPORT_SYMBOL(pipefs_alloc_init_msg_padded);
+
+/*
+ * See the description of pipefs_alloc_init_msg_padded().
+ */
+struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
+				    void *data, u16 datalen)
+{
+	return pipefs_alloc_init_msg_padded(msgid, type, flags, data,
+					    datalen, 0);
+}
+EXPORT_SYMBOL(pipefs_alloc_init_msg);
+
+
+static void pipefs_init_rpcmsg(struct rpc_pipe_msg *rpcmsg,
+			       struct pipefs_hdr *msg, u8 upflags)
+{
+	memset(rpcmsg, 0, sizeof(*rpcmsg));
+	rpcmsg->data = msg;
+	rpcmsg->len = msg->totallen;
+	rpcmsg->flags = upflags;
+}
+
+static struct rpc_pipe_msg *pipefs_alloc_init_rpcmsg(struct pipefs_hdr *msg,
+						     u8 upflags)
+{
+	struct rpc_pipe_msg *rpcmsg;
+
+	rpcmsg = kmalloc(sizeof(*rpcmsg), GFP_KERNEL);
+	if (!rpcmsg)
+		return ERR_PTR(-ENOMEM);
+
+	pipefs_init_rpcmsg(rpcmsg, msg, upflags);
+	return rpcmsg;
+}
+
+
+/* represents an upcall that'll block and wait for a reply */
+struct pipefs_upcall {
+	u32 msgid;
+	struct rpc_pipe_msg rpcmsg;
+	struct list_head list;
+	wait_queue_head_t waitq;
+	struct pipefs_hdr *reply;
+};
+
+
+static void pipefs_init_upcall_waitreply(struct pipefs_upcall *upcall,
+					 struct pipefs_hdr *msg, u8 upflags)
+{
+	upcall->reply = NULL;
+	upcall->msgid = msg->msgid;
+	INIT_LIST_HEAD(&upcall->list);
+	init_waitqueue_head(&upcall->waitq);
+	pipefs_init_rpcmsg(&upcall->rpcmsg, msg, upflags);
+}
+
+static int __pipefs_queue_upcall_waitreply(struct dentry *pipe,
+					   struct pipefs_upcall *upcall,
+					   struct pipefs_list *uplist,
+					   u32 timeout)
+{
+	int err = 0;
+	DECLARE_WAITQUEUE(wq, current);
+
+	add_wait_queue(&upcall->waitq, &wq);
+	spin_lock(&uplist->list_lock);
+	list_add(&upcall->list, &uplist->list);
+	spin_unlock(&uplist->list_lock);
+
+	err = rpc_queue_upcall(pipe->d_inode, &upcall->rpcmsg);
+	if (err < 0)
+		goto out;
+
+	if (timeout) {
+		/* retval of 0 means timer expired */
+		err = schedule_timeout_uninterruptible(timeout);
+		if (err == 0 && upcall->reply == NULL)
+			err = -ETIMEDOUT;
+	} else {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule();
+		__set_current_state(TASK_RUNNING);
+	}
+
+out:
+	spin_lock(&uplist->list_lock);
+	list_del_init(&upcall->list);
+	spin_unlock(&uplist->list_lock);
+	remove_wait_queue(&upcall->waitq, &wq);
+	return err;
+}
+
+/*
+ * Queue a pipefs msg for an upcall to userspace, place the calling thread
+ * on @uplist, and block the thread to wait for a reply.  If @timeout is
+ * nonzero, the thread will be blocked for at most @timeout jiffies.
+ *
+ * (To convert time units into jiffies, consider the functions
+ *  msecs_to_jiffies(), usecs_to_jiffies(), timeval_to_jiffies(), and
+ *  timespec_to_jiffies().)
+ *
+ * Once a reply is received by your downcall handler, call
+ * pipefs_assign_upcall_reply() with @uplist to find the corresponding upcall,
+ * assign the reply, and wake the waiting thread.
+ *
+ * This function's return value pointer may be an error and should be checked
+ * with IS_ERR() before attempting to access the reply message.
+ *
+ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
+ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
+ * flag is set in @upflags.  See also rpc_pipe_fs.h.
+ */
+struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
+					    struct pipefs_hdr *msg,
+					    struct pipefs_list *uplist,
+					    u8 upflags, u32 timeout)
+{
+	int err = 0;
+	struct pipefs_upcall upcall;
+
+	pipefs_init_upcall_waitreply(&upcall, msg, upflags);
+	err = __pipefs_queue_upcall_waitreply(pipe, &upcall, uplist, timeout);
+	if (err < 0) {
+		kfree(upcall.reply);
+		upcall.reply = ERR_PTR(err);
+	}
+
+	return upcall.reply;
+}
+EXPORT_SYMBOL(pipefs_queue_upcall_waitreply);
+
+/*
+ * Queue a pipefs msg for an upcall to userspace and immediately return (i.e.,
+ * no reply is expected).
+ *
+ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
+ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
+ * flag is set in @upflags.  See also rpc_pipe_fs.h.
+ */
+int pipefs_queue_upcall_noreply(struct dentry *pipe, struct pipefs_hdr *msg,
+				u8 upflags)
+{
+	int err = 0;
+	struct rpc_pipe_msg *rpcmsg;
+
+	upflags |= PIPEFS_AUTOFREE_RPCMSG;
+	rpcmsg = pipefs_alloc_init_rpcmsg(msg, upflags);
+	if (IS_ERR(rpcmsg)) {
+		err = PTR_ERR(rpcmsg);
+		goto out;
+	}
+	err = rpc_queue_upcall(pipe->d_inode, rpcmsg);
+out:
+	return err;
+}
+EXPORT_SYMBOL(pipefs_queue_upcall_noreply);
+
+
+static struct pipefs_upcall *pipefs_find_upcall_msgid(u32 msgid,
+						 struct pipefs_list *uplist)
+{
+	struct pipefs_upcall *upcall;
+
+	spin_lock(&uplist->list_lock);
+	list_for_each_entry(upcall, &uplist->list, list)
+		if (upcall->msgid == msgid)
+			goto out;
+	upcall = NULL;
+out:
+	spin_unlock(&uplist->list_lock);
+	return upcall;
+}
+
+/*
+ * In your rpc_pipe_ops->downcall() handler, once you've read in a downcall
+ * message and have determined that it is a reply to a waiting upcall,
+ * you can use this function to find the appropriate upcall, assign the result,
+ * and wake the upcall thread.
+ *
+ * The reply message must have the same msgid as the original upcall message's.
+ *
+ * See also pipefs_queue_upcall_waitreply() and pipefs_readmsg().
+ */
+int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
+			       struct pipefs_list *uplist)
+{
+	int err = 0;
+	struct pipefs_upcall *upcall;
+
+	upcall = pipefs_find_upcall_msgid(reply->msgid, uplist);
+	if (!upcall) {
+		printk(KERN_ERR "%s: ERROR: have reply but no matching upcall "
+			"for msgid %d\n", __func__, reply->msgid);
+		err = -ENOENT;
+		goto out;
+	}
+	upcall->reply = reply;
+	wake_up(&upcall->waitq);
+out:
+	return err;
+}
+EXPORT_SYMBOL(pipefs_assign_upcall_reply);
+
+/*
+ * Generic method to read-in and return a newly-allocated message which begins
+ * with a struct pipefs_hdr.
+ */
+struct pipefs_hdr *pipefs_readmsg(struct file *filp, const char __user *src,
+			     size_t len)
+{
+	int err = 0, hdrsize;
+	struct pipefs_hdr *msg = NULL;
+
+	hdrsize = sizeof(*msg);
+	if (len < hdrsize) {
+		printk(KERN_ERR "%s: ERROR: header is too short (%d vs %d)\n",
+		       __func__, (int) len, hdrsize);
+		err = -EINVAL;
+		goto out;
+	}
+
+	msg = kzalloc(len, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+	if (copy_from_user(msg, src, len))
+		err = -EFAULT;
+out:
+	if (err) {
+		kfree(msg);
+		msg = ERR_PTR(err);
+	}
+	return msg;
+}
+EXPORT_SYMBOL(pipefs_readmsg);
+
+/*
+ * Generic rpc_pipe_ops->upcall() handler implementation.
+ *
+ * Don't call this directly: to make an upcall, use
+ * pipefs_queue_upcall_waitreply() or pipefs_queue_upcall_noreply().
+ */
+ssize_t pipefs_generic_upcall(struct file *filp, struct rpc_pipe_msg *rpcmsg,
+			      char __user *dst, size_t buflen)
+{
+	char *data;
+	ssize_t len, left;
+
+	data = (char *)rpcmsg->data + rpcmsg->copied;
+	len = rpcmsg->len - rpcmsg->copied;
+	if (len > buflen)
+		len = buflen;
+
+	left = copy_to_user(dst, data, len);
+	if (left < 0) {
+		rpcmsg->errno = left;
+		return left;
+	}
+
+	len -= left;
+	rpcmsg->copied += len;
+	rpcmsg->errno = 0;
+	return len;
+}
+EXPORT_SYMBOL(pipefs_generic_upcall);
+
+/*
+ * Generic rpc_pipe_ops->destroy_msg() handler implementation.
+ *
+ * Items are only freed if @rpcmsg->flags has been set appropriately.
+ * See pipefs_queue_upcall_noreply() and rpc_pipe_fs.h.
+ */
+void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg)
+{
+	if (rpcmsg->flags & PIPEFS_AUTOFREE_UPCALL_MSG)
+		kfree(rpcmsg->data);
+	if (rpcmsg->flags & PIPEFS_AUTOFREE_RPCMSG)
+		kfree(rpcmsg);
+}
+EXPORT_SYMBOL(pipefs_generic_destroy_msg);
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-07-21 22:30 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-07-15 19:40 [PATCH 0/2] pnfs-block: move device mapping from kernel to user daemon Jim Rees
2010-07-15 19:41 ` [PATCH 1/2] pnfs-block: Add support for simple rpc pipefs Jim Rees
2010-07-18  9:09   ` Christoph Hellwig
2010-07-15 19:41 ` [PATCH 2/2] pnfs-block: Remove device creation from kernel Jim Rees
2010-07-18  9:06 ` [PATCH 0/2] pnfs-block: move device mapping from kernel to user daemon Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2010-07-21 22:29 [PATCH 0/2] complex block layout Jim Rees
2010-07-21 22:30 ` [PATCH 1/2] pnfs-block: Add support for simple rpc pipefs Jim Rees

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).