[RFC] FC Transport : Async Events via netlink interface

linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [RFC] FC Transport : Async Events via netlink interface
@ 2006-04-17 20:44 James Smart
  2006-04-18 16:01 ` Mike Anderson
  2006-04-19 14:59 ` [RFC] FC Transport : Async Events via netlink interface Matthew Wilcox
  0 siblings, 2 replies; 24+ messages in thread
From: James Smart @ 2006-04-17 20:44 UTC (permalink / raw)
  To: linux-scsi

[netdev folks copied to review the use of the netlink interface]

This patch adds HBAAPI async event support to the FC transport.

Events are pushed to userspace via Netlink. This is per the previous
RFC comments given in :
  http://marc.theaimsgroup.com/?l=linux-scsi&m=114062896729418&w=2

This patch contains the following changes:
  - Add Netlink support to the FC transport
  - Creates a new file "include/scsi/scsi_netlink_fc.h", which
    contains the user-space visible portion of the FC transport
    netlink messaging
  - Allow user apps to register to receive async events
  - Add the fc_host_event_post() interface to post async events
  - A couple of misc fixes:
     - From the prior event post: small dev_loss_tmo mods, with the
       main fix to validate the module parameter on load.
     - Fix fc_user_scan() so it safely walks the rport list

Using netlink worked out very well, and solves the multiple receiver issue.
Also looks very extensible for additional HBAAPI function support.

-- james s

PS: Comments on Kconfig change appreciated. I don't have much experience on
  changing the kernel config and build process.



Signed-off-by: James Smart <James.Smart@emulex.com>


diff -upNr a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
--- a/drivers/scsi/Kconfig	2006-03-29 11:53:24.000000000 -0500
+++ b/drivers/scsi/Kconfig	2006-04-17 12:03:31.000000000 -0400
@@ -221,7 +221,7 @@ config SCSI_SPI_ATTRS
 
 config SCSI_FC_ATTRS
 	tristate "FiberChannel Transport Attributes"
-	depends on SCSI
+	depends on SCSI && NET && NETFILTER && NETFILTER_NETLINK
 	help
 	  If you wish to export transport-specific information about
 	  each attached FiberChannel device to sysfs, say Y.
diff -upNr a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
--- a/drivers/scsi/scsi_transport_fc.c	2006-04-10 09:02:15.000000000 -0400
+++ b/drivers/scsi/scsi_transport_fc.c	2006-04-17 11:24:53.000000000 -0400
@@ -33,9 +33,17 @@
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/scsi_cmnd.h>
 #include "scsi_priv.h"
+#include <linux/time.h>
+#include <linux/jiffies.h>
+#include <linux/security.h>
+#include <net/sock.h>
+#include <net/netlink.h>
 
 static int fc_queue_work(struct Scsi_Host *, struct work_struct *);
 
+#define get_list_head_entry(pos, head, member) 		\
+	pos = list_entry((head)->next, typeof(*pos), member)
+
 /*
  * Redefine so that we can have same named attributes in the
  * sdev/starget/host objects.
@@ -132,6 +140,29 @@ fc_enum_name_match(tgtid_bind_type, fc_t
 #define FC_BINDTYPE_MAX_NAMELEN	30
 

+/* Convert fc_host_event_code values to ascii string name */
+static const struct {
+	enum fc_host_event_code		value;
+	char				*name;
+} fc_host_event_code_names[] = {
+	{ FCH_EVT_LIP,			"lip" },
+	{ FCH_EVT_LINKUP,		"link_up" },
+	{ FCH_EVT_LINKDOWN,		"link_down" },
+	{ FCH_EVT_LIPRESET,		"lip_reset" },
+	{ FCH_EVT_RSCN,			"rscn" },
+	{ FCH_EVT_ADAPTER_CHANGE,	"adapter_chg" },
+	{ FCH_EVT_PORT_UNKNOWN,		"port_unknown" },
+	{ FCH_EVT_PORT_ONLINE,		"port_online" },
+	{ FCH_EVT_PORT_OFFLINE,		"port_offline" },
+	{ FCH_EVT_PORT_FABRIC,		"port_fabric" },
+	{ FCH_EVT_LINK_UNKNOWN,		"link_unknown" },
+	{ FCH_EVT_VENDOR_UNIQUE,	"vendor_unique" },
+};
+fc_enum_name_search(host_event_code, fc_host_event_code,
+		fc_host_event_code_names)
+#define FC_HOST_EVENT_CODE_MAX_NAMELEN	30
+
+
 #define fc_bitfield_name_search(title, table)			\
 static ssize_t							\
 get_fc_##title##_names(u32 table_key, char *buf)		\
@@ -368,9 +399,9 @@ static DECLARE_TRANSPORT_CLASS(fc_rport_
  *   should insulate the loss of a remote port.
  *   The maximum will be capped by the value of SCSI_DEVICE_BLOCK_MAX_TIMEOUT.
  */
-static unsigned int fc_dev_loss_tmo = SCSI_DEVICE_BLOCK_MAX_TIMEOUT;
+static unsigned int fc_modp_dev_loss_tmo = SCSI_DEVICE_BLOCK_MAX_TIMEOUT;
 
-module_param_named(dev_loss_tmo, fc_dev_loss_tmo, int, S_IRUGO|S_IWUSR);
+module_param_named(dev_loss_tmo, fc_modp_dev_loss_tmo, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(dev_loss_tmo,
 		 "Maximum number of seconds that the FC transport should"
 		 " insulate the loss of a remote port. Once this value is"
@@ -378,19 +409,366 @@ MODULE_PARM_DESC(dev_loss_tmo,
 		 " between 1 and SCSI_DEVICE_BLOCK_MAX_TIMEOUT.");
 

+/**
+ * Netlink Infrastructure
+ **/
+
+#include <scsi/scsi_netlink_fc.h>
+
+struct fc_nl_user {
+	struct list_head ulist;
+	int pid;
+	u32 flags;
+};
+
+/* fc_nl_user flags values */
+#define FC_NL_UF_EVENTS		0x01
+
+static struct sock *fc_nl_sock;
+static DEFINE_SPINLOCK(fc_nl_lock);
+static u32 fc_event_seq;
+static struct list_head fc_nl_user_list;
+
+#define FC_NL_GROUP_CNT		0
+
+static inline struct fc_nl_user *
+fc_find_user(int pid)
+{
+	struct fc_nl_user *nluser;
+
+	list_for_each_entry(nluser, &fc_nl_user_list, ulist)
+		if (nluser->pid == pid)
+			return nluser;
+	return NULL;
+}
+
+static struct fc_nl_user *
+fc_add_user(int pid, int uflag)
+{
+	struct fc_nl_user *nluser, *newuser;
+	unsigned long flags;
+
+	/* pre-guess we need to add a user struct */
+	newuser = kzalloc(sizeof(struct fc_nl_user), GFP_KERNEL);
+
+	spin_lock_irqsave(&fc_nl_lock, flags);
+
+	nluser = fc_find_user(pid);
+	if (!nluser) {
+		if (newuser) {
+			newuser->pid = pid;
+			newuser->flags = uflag;
+			list_add_tail(&newuser->ulist, &fc_nl_user_list);
+		} 
+	} else
+		nluser->flags |= uflag;
+
+	spin_unlock_irqrestore(&fc_nl_lock, flags);
+
+	if (nluser) {
+		kfree(newuser);
+		return nluser;
+	}
+
+	return newuser;
+}
+
+static void
+fc_del_user(int pid, int uflag)
+{
+	struct fc_nl_user *nluser;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fc_nl_lock, flags);
+
+	nluser = fc_find_user(pid);
+	if (nluser) {
+		nluser->flags &= ~uflag;
+		if (!nluser->flags)
+			list_del(&nluser->ulist);
+	}
+
+	spin_unlock_irqrestore(&fc_nl_lock, flags);
+
+	if (nluser && !nluser->flags)
+		kfree(nluser);
+}
+
+static int
+fc_handle_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int rcvlen)
+{
+	struct fc_nl_hdr *fch = NLMSG_DATA(nlh);
+	struct fc_nl_user *nluser;
+	int err = 0, pid;
+
+	pid = nlh->nlmsg_pid;
+
+	switch (fch->msgtype) {
+	case FC_NL_EVENTS_REG:
+		nluser = fc_add_user(pid, FC_NL_UF_EVENTS);
+		if (!nluser) {
+			printk(KERN_WARNING "%s: EVT REG failed\n",
+				__FUNCTION__);
+			err = -ENOMEM;
+		}
+		break;
+
+	case FC_NL_EVENTS_DEREG:
+		fc_del_user(pid, FC_NL_UF_EVENTS);
+		break;
+
+	default:
+		printk(KERN_WARNING "%s: unknown msg type 0x%x len %d\n",
+			 __FUNCTION__, fch->msgtype, rcvlen);
+		err = -EBADR;
+		break;
+	}
+
+	return err;
+}
+
+static void
+fc_nl_rcv_msg(struct sk_buff *skb)
+{
+	struct nlmsghdr *nlh;
+	struct fc_nl_hdr *fch;
+	uint32_t rlen;
+	int err;
+
+	while (skb->len >= NLMSG_SPACE(0)) {
+		err = 0;
+
+		nlh = (struct nlmsghdr *) skb->data;
+		if ((nlh->nlmsg_len < (sizeof(*nlh) + sizeof(*fch))) ||
+		    (skb->len < nlh->nlmsg_len)) {
+			printk(KERN_WARNING "%s: discarding partial skb\n",
+				 __FUNCTION__);
+			return;
+		}
+
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+
+		if (nlh->nlmsg_type != FC_TRANSPORT_MSG) {
+			err = -EBADMSG;
+			goto next_msg;
+		}
+
+		fch = NLMSG_DATA(nlh);
+		if (fch->version != FC_NETLINK_API_VERSION) {
+			err = -EPROTOTYPE;
+			goto next_msg;
+		}
+
+		if (security_netlink_recv(skb)) {
+			err = -EPERM;
+			goto next_msg;
+		}
+
+		err = fc_handle_nl_rcv_msg(skb, nlh, rlen);
+
+next_msg:
+		if ((err) || (nlh->nlmsg_flags & NLM_F_ACK))
+			netlink_ack(skb, nlh, err);
+
+		skb_pull(skb, rlen);
+	}
+}
+
+static void
+fc_nl_rcv(struct sock *sk, int len)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
+		fc_nl_rcv_msg(skb);
+		kfree_skb(skb);
+	}
+}
+
+static int
+fc_nl_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct netlink_notify *n = ptr;
+
+	if ((event == NETLINK_URELEASE) &&
+	    (n->protocol == NETLINK_FCTRANSPORT) && (n->pid))
+		fc_del_user(n->pid, 0xFFFFFFFF);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block fc_netlink_notifier = {
+	.notifier_call  = fc_nl_rcv_nl_event,
+};
+
+
+static void
+fc_send_event(struct fc_nl_user *nluser, struct fc_nl_event *event)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr	*nlh;
+	struct fc_nl_event *evt;
+	const char *name, *fn;
+	u32 len = NLMSG_SPACE(sizeof(*event));
+	int err;
+
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (!skb) {
+		err = -ENOBUFS;
+		fn = "alloc_skb";
+		goto send_fail;
+	}
+
+	nlh = nlmsg_put(skb, nluser->pid, 0, FC_TRANSPORT_MSG,
+			len - sizeof(*nlh), 0);
+	if (!nlh) {
+		err = -ENOBUFS;
+		fn = "nlmsg_put";
+		goto send_fail;
+	}
+	evt = NLMSG_DATA(nlh);
+	memcpy(evt, event, sizeof(*event));
+
+	err = nlmsg_unicast(fc_nl_sock, skb, nluser->pid);
+	if (err < 0) {
+		fn = "nlmsg_unicast";
+		goto send_fail;
+	}
+
+	return;
+
+send_fail:
+	name = get_fc_host_event_code_name(event->event_code);
+	printk(KERN_WARNING
+		"%s: Dropped Event to PID %d : %s data 0x%08x : %s : err %d\n",
+		__FUNCTION__, nluser->pid, (name) ? name : "<unknown>",
+		event->event_data, fn, err);
+	return;
+}
+
+/**
+ * fc_host_event_post - called to post an even on an fc_host.
+ *
+ * @shost:	host the event occurred on
+ * @event:	event being posted
+ *
+ * Notes:
+ *	This routine assumes no locks are held on entry.
+ *
+ *      We always reserve one element in the event list so that the
+ *      ring logic is easier (e.g. empty is get=put, full is put+1=get)
+ **/
+void
+fc_host_event_post(struct Scsi_Host *shost,
+		enum fc_host_event_code event_code, u32 event_data)
+{
+	struct fc_nl_user *nluser, *next_nluser;
+	struct fc_nl_event *event;
+	struct timeval tv;
+	unsigned long flags;
+	u32 seq;
+
+	if (!fc_nl_sock || list_empty(&fc_nl_user_list))
+		return;
+
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+	if (!event) {
+		const char *name = get_fc_host_event_code_name(event_code);
+		printk(KERN_WARNING
+			"%s: Dropped Event : %s data 0x%08x - ENOMEM\n",
+			__FUNCTION__, (name) ? name : "<unknown>", event_data);
+		return;
+	}
+
+	spin_lock_irqsave(&fc_nl_lock, flags);
+	seq = fc_event_seq++;
+	spin_unlock_irqrestore(&fc_nl_lock, flags);
+	do_gettimeofday(&tv);
+
+	event->fcnlh.msgtype = FC_NL_ASYNC_EVENT;
+	event->fcnlh.version = FC_NETLINK_API_VERSION;
+	event->fcnlh.reserved1 = 0;
+	event->fcnlh.reserved2 = 0;
+	event->seq_num = seq;
+	event->host_no = shost->host_no;
+	event->event_code = event_code;
+	event->event_data = event_data;
+	event->tv_sec = tv.tv_sec;
+	event->tv_usec = tv.tv_usec;
+
+	list_for_each_entry_safe(nluser, next_nluser, &fc_nl_user_list, ulist) {
+		if (nluser->flags & FC_NL_UF_EVENTS)
+			fc_send_event(nluser, event);
+	}
+	
+	kfree(event);
+}
+EXPORT_SYMBOL(fc_host_event_post);
+
+
 static __init int fc_transport_init(void)
 {
-	int error = transport_class_register(&fc_host_class);
+	int error;
+
+	/* fix any module parameters */
+
+	if ((fc_modp_dev_loss_tmo < 1) ||
+	    (fc_modp_dev_loss_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)) {
+		printk(KERN_WARNING
+			"%s: dev_loss_tmo out of range, setting to max (%d)\n",
+			__FUNCTION__, SCSI_DEVICE_BLOCK_MAX_TIMEOUT);
+		fc_modp_dev_loss_tmo = SCSI_DEVICE_BLOCK_MAX_TIMEOUT;
+	}
+
+	INIT_LIST_HEAD(&fc_nl_user_list);
+
+	/* register the transport classes */
+
+	error = transport_class_register(&fc_host_class);
 	if (error)
 		return error;
 	error = transport_class_register(&fc_rport_class);
 	if (error)
-		return error;
-	return transport_class_register(&fc_transport_class);
+		goto rport_class_out;
+	error = transport_class_register(&fc_transport_class);
+	if (error)
+		goto transport_class_out;
+
+	error = netlink_register_notifier(&fc_netlink_notifier);
+	if (error)
+		goto register_out;
+
+	fc_nl_sock = netlink_kernel_create(NETLINK_FCTRANSPORT, FC_NL_GROUP_CNT,
+					fc_nl_rcv, THIS_MODULE);
+	if (!fc_nl_sock) {
+		error = -ENOBUFS;
+	} else
+		return error;		/* successful return */
+
+	netlink_unregister_notifier(&fc_netlink_notifier);
+register_out:
+	transport_class_unregister(&fc_transport_class);
+transport_class_out:
+	transport_class_unregister(&fc_rport_class);
+rport_class_out:
+	transport_class_unregister(&fc_host_class);
+
+	return error;
 }
 
 static void __exit fc_transport_exit(void)
 {
+	struct fc_nl_user *nluser;
+
+	sock_release(fc_nl_sock->sk_socket);
+	netlink_unregister_notifier(&fc_netlink_notifier);
+	while (!list_empty(&fc_nl_user_list)) {
+		get_list_head_entry(nluser, &fc_nl_user_list, ulist);
+		list_del(&nluser->ulist);
+		kfree(nluser);
+	}
 	transport_class_unregister(&fc_transport_class);
 	transport_class_unregister(&fc_rport_class);
 	transport_class_unregister(&fc_host_class);
@@ -874,9 +1252,6 @@ show_fc_private_host_tgtid_bind_type(str
 	return snprintf(buf, FC_BINDTYPE_MAX_NAMELEN, "%s\n", name);
 }
 
-#define get_list_head_entry(pos, head, member) 		\
-	pos = list_entry((head)->next, typeof(*pos), member)
-
 static ssize_t
 store_fc_private_host_tgtid_bind_type(struct class_device *cdev,
 	const char *buf, size_t count)
@@ -1142,14 +1517,24 @@ fc_timed_out(struct scsi_cmnd *scmd)
 }
 
 /*
- * Must be called with shost->host_lock held
+ * fc_user_scan - Sysfs interface to scan
+ *
+ * @shost:	The scsi host scan to occur on
+ * @channel:	Channel # to scan
+ * @id:		Target ID # to scan
+ * @lun:	Lun # to scan
+ *
+ * Notes:
+ *	This routine assumes no locks are held on entry.
  */
-static int fc_user_scan(struct Scsi_Host *shost, uint channel,
+static int
+fc_user_scan(struct Scsi_Host *shost, uint channel,
 		uint id, uint lun)
 {
-	struct fc_rport *rport;
+	struct fc_rport *rport, *next_rport;
 
-	list_for_each_entry(rport, &fc_host_rports(shost), peers) {
+	list_for_each_entry_safe(rport, next_rport,
+			&fc_host_rports(shost), peers) {
 		if (rport->scsi_target_id == -1)
 			continue;
 
@@ -1278,6 +1663,7 @@ void fc_release_transport(struct scsi_tr
 }
 EXPORT_SYMBOL(fc_release_transport);
 
+
 /**
  * fc_queue_work - Queue work to the fc_host workqueue.
  * @shost:	Pointer to Scsi_Host bound to fc_host.
@@ -1514,7 +1900,7 @@ fc_rport_create(struct Scsi_Host *shost,
 
 	rport->maxframe_size = -1;
 	rport->supported_classes = FC_COS_UNSPECIFIED;
-	rport->dev_loss_tmo = fc_dev_loss_tmo;
+	rport->dev_loss_tmo = fc_modp_dev_loss_tmo;
 	memcpy(&rport->node_name, &ids->node_name, sizeof(rport->node_name));
 	memcpy(&rport->port_name, &ids->port_name, sizeof(rport->port_name));
 	rport->port_id = ids->port_id;
diff -upNr a/include/linux/netlink.h b/include/linux/netlink.h
--- a/include/linux/netlink.h	2006-04-12 12:52:37.000000000 -0400
+++ b/include/linux/netlink.h	2006-04-12 12:53:01.000000000 -0400
@@ -21,6 +21,8 @@
 #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
 #define NETLINK_KOBJECT_UEVENT	15	/* Kernel messages to userspace */
 #define NETLINK_GENERIC		16
+/* leave room for NETLINK_DM (DM Events) and NETLINK_TGT (SCSI Target) */
+#define NETLINK_FCTRANSPORT	19	/* SCSI FC Transport */
 
 #define MAX_LINKS 32		
 
diff -upNr a/include/scsi/scsi_netlink_fc.h b/include/scsi/scsi_netlink_fc.h
--- a/include/scsi/scsi_netlink_fc.h	1969-12-31 19:00:00.000000000 -0500
+++ b/include/scsi/scsi_netlink_fc.h	2006-04-17 09:51:08.000000000 -0400
@@ -0,0 +1,57 @@
+/* 
+ *  FiberChannel transport Netlink Interface
+ *
+ *  Copyright (C) 2006   James Smart, Emulex Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#ifndef SCSI_NETLINK_FC_H
+#define SCSI_NETLINK_FC_H
+
+#define FC_NETLINK_API_VERSION		1
+
+/* Single Netlink Message type to send all FC Transport messages */
+#define FC_TRANSPORT_MSG		NLMSG_MIN_TYPE + 1
+
+/* FC transport header - found at the front of all FC_TRANSPORT_MSG messages */
+struct fc_nl_hdr {
+	uint16_t msgtype;
+	uint16_t version;
+	uint16_t reserved1;
+	uint16_t reserved2;
+} __attribute__((aligned(sizeof(uint64_t))));
+
+/* FC Transport Message Types */
+	/* user -> kernel */
+#define FC_NL_EVENTS_REG		0x0001
+#define FC_NL_EVENTS_DEREG		0x0002
+	/* kernel -> user */
+#define FC_NL_ASYNC_EVENT		0x0100
+
+/* Asynchronous Event Message */
+struct fc_nl_event {
+	struct fc_nl_hdr fcnlh;
+	uint32_t seq_num;
+	uint32_t host_no;
+	uint32_t event_code;
+	uint32_t event_data;
+	uint64_t tv_sec;
+	uint64_t tv_usec;
+} __attribute__((aligned(sizeof(uint64_t))));
+
+
+#endif /* SCSI_NETLINK_FC_H */
+
diff -upNr a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
--- a/include/scsi/scsi_transport_fc.h	2006-04-10 08:46:47.000000000 -0400
+++ b/include/scsi/scsi_transport_fc.h	2006-04-17 11:42:20.000000000 -0400
@@ -285,6 +285,30 @@ struct fc_host_statistics {
 

 /*
+ * FC Event Codes - Polled and Async, following FC HBAAPI v2.0 guidelines
+ */
+
+/*
+ * fc_host_event_code: If you alter this, you also need to alter
+ * scsi_transport_fc.c (for the ascii descriptions).
+ */
+enum fc_host_event_code  {
+	FCH_EVT_LIP			= 0x1,
+	FCH_EVT_LINKUP			= 0x2,
+	FCH_EVT_LINKDOWN		= 0x3,
+	FCH_EVT_LIPRESET		= 0x4,
+	FCH_EVT_RSCN			= 0x5,
+	FCH_EVT_ADAPTER_CHANGE		= 0x103,
+	FCH_EVT_PORT_UNKNOWN		= 0x200,
+	FCH_EVT_PORT_OFFLINE		= 0x201,
+	FCH_EVT_PORT_ONLINE		= 0x202,
+	FCH_EVT_PORT_FABRIC		= 0x204,
+	FCH_EVT_LINK_UNKNOWN		= 0x500,
+	FCH_EVT_VENDOR_UNIQUE		= 0xffff,
+};
+
+
+/*
  * FC Local Port (Host) Attributes
  *
  * Attributes are based on HBAAPI V2.0 definitions.
@@ -493,6 +517,15 @@ fc_remote_port_chkready(struct fc_rport 
 }
 

+static inline u64 wwn_to_u64(u8 *wwn)
+{
+	return (u64)wwn[0] << 56 | (u64)wwn[1] << 48 |
+	    (u64)wwn[2] << 40 | (u64)wwn[3] << 32 |
+	    (u64)wwn[4] << 24 | (u64)wwn[5] << 16 |
+	    (u64)wwn[6] <<  8 | (u64)wwn[7];
+}
+
+
 struct scsi_transport_template *fc_attach_transport(
 			struct fc_function_template *);
 void fc_release_transport(struct scsi_transport_template *);
@@ -502,13 +535,8 @@ struct fc_rport *fc_remote_port_add(stru
 void fc_remote_port_delete(struct fc_rport  *rport);
 void fc_remote_port_rolechg(struct fc_rport  *rport, u32 roles);
 int scsi_is_fc_rport(const struct device *);
+void fc_host_event_post(struct Scsi_Host *shost,
+		enum fc_host_event_code event_code, u32 event_data);
 
-static inline u64 wwn_to_u64(u8 *wwn)
-{
-	return (u64)wwn[0] << 56 | (u64)wwn[1] << 48 |
-	    (u64)wwn[2] << 40 | (u64)wwn[3] << 32 |
-	    (u64)wwn[4] << 24 | (u64)wwn[5] << 16 |
-	    (u64)wwn[6] <<  8 | (u64)wwn[7];
-}
 
 #endif /* SCSI_TRANSPORT_FC_H */




^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] FC Transport : Async Events via netlink interface
  2006-04-17 20:44 [RFC] FC Transport : Async Events via netlink interface James Smart
@ 2006-04-18 16:01 ` Mike Anderson
  2006-04-19 12:52   ` James Smart
  2006-04-19 12:57   ` [RFC] Netlink and user-space buffer pointers James Smart
  2006-04-19 14:59 ` [RFC] FC Transport : Async Events via netlink interface Matthew Wilcox
  1 sibling, 2 replies; 24+ messages in thread
From: Mike Anderson @ 2006-04-18 16:01 UTC (permalink / raw)
  To: James Smart; +Cc: linux-scsi

Looks good, comments below.

James Smart <James.Smart@Emulex.Com> wrote:
> +static void
> +fc_send_event(struct fc_nl_user *nluser, struct fc_nl_event *event)
> +{
> +	struct sk_buff *skb;
> +	struct nlmsghdr	*nlh;
> +	struct fc_nl_event *evt;
> +	const char *name, *fn;
> +	u32 len = NLMSG_SPACE(sizeof(*event));
> +	int err;
> +
> +	skb = alloc_skb(len, GFP_KERNEL);
> +	if (!skb) {
> +		err = -ENOBUFS;
> +		fn = "alloc_skb";
> +		goto send_fail;
> +	}
> +
> +	nlh = nlmsg_put(skb, nluser->pid, 0, FC_TRANSPORT_MSG,
> +			len - sizeof(*nlh), 0);
> +	if (!nlh) {
> +		err = -ENOBUFS;
> +		fn = "nlmsg_put";
> +		goto send_fail;
> +	}
> +	evt = NLMSG_DATA(nlh);
> +	memcpy(evt, event, sizeof(*event));
> +
> +	err = nlmsg_unicast(fc_nl_sock, skb, nluser->pid);
> +	if (err < 0) {
> +		fn = "nlmsg_unicast";
> +		goto send_fail;
> +	}

Is there some reason that you are not using nlmsg_multicast. The caller of
this function is somewhat simulating the function of multicast.

> +
> +	return;
> +
> +send_fail:
> +	name = get_fc_host_event_code_name(event->event_code);
> +	printk(KERN_WARNING
> +		"%s: Dropped Event to PID %d : %s data 0x%08x : %s : err %d\n",
> +		__FUNCTION__, nluser->pid, (name) ? name : "<unknown>",
> +		event->event_data, fn, err);
> +	return;
> +}

In the send_fail case it looks like you leak skbs. Do you need to add a
call to nlmsg_free or kfree_skb?


-andmike
--
Michael Anderson
andmike@us.ibm.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] FC Transport : Async Events via netlink interface
  2006-04-18 16:01 ` Mike Anderson
@ 2006-04-19 12:52   ` James Smart
  2006-04-19 12:57   ` [RFC] Netlink and user-space buffer pointers James Smart
  1 sibling, 0 replies; 24+ messages in thread
From: James Smart @ 2006-04-19 12:52 UTC (permalink / raw)
  To: Mike Anderson; +Cc: linux-scsi

Mike Anderson wrote:
> Is there some reason that you are not using nlmsg_multicast. The caller of
> this function is somewhat simulating the function of multicast.

Only that I haven't looked into using groups yet. It certainly makes sense.

> In the send_fail case it looks like you leak skbs. Do you need to add a
> call to nlmsg_free or kfree_skb?

Yep.

I'll include these comments in the revised post. I'll wait a little longer
for any further comments.

-- james


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC] Netlink and user-space buffer pointers
  2006-04-18 16:01 ` Mike Anderson
  2006-04-19 12:52   ` James Smart
@ 2006-04-19 12:57   ` James Smart
  2006-04-19 16:22     ` Patrick McHardy
                       ` (2 more replies)
  1 sibling, 3 replies; 24+ messages in thread
From: James Smart @ 2006-04-19 12:57 UTC (permalink / raw)
  To: linux-scsi, netdev, linux-kernel

Folks,

To take netlink to where we want to use it within the SCSI subsystem (as
the mechanism of choice to replace ioctls), we're going to need to pass
user-space buffer pointers.

What is the best, portable manner to pass a pointer between user and kernel
space within a netlink message ?  The example I've seen is in the iscsi
target code - and it's passed between user-kernel space as a u64, then
typecast to a void *, and later within the bio_map_xxx functions, as an
unsigned long. I assume we are going to continue with this method ?

-- james s

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] FC Transport : Async Events via netlink interface
  2006-04-17 20:44 [RFC] FC Transport : Async Events via netlink interface James Smart
  2006-04-18 16:01 ` Mike Anderson
@ 2006-04-19 14:59 ` Matthew Wilcox
  2006-04-19 16:11   ` James Smart
  1 sibling, 1 reply; 24+ messages in thread
From: Matthew Wilcox @ 2006-04-19 14:59 UTC (permalink / raw)
  To: James Smart; +Cc: linux-scsi

On Mon, Apr 17, 2006 at 04:44:21PM -0400, James Smart wrote:
> PS: Comments on Kconfig change appreciated. I don't have much experience on
>   changing the kernel config and build process.
> 
> diff -upNr a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
> --- a/drivers/scsi/Kconfig	2006-03-29 11:53:24.000000000 -0500
> +++ b/drivers/scsi/Kconfig	2006-04-17 12:03:31.000000000 -0400
> @@ -221,7 +221,7 @@ config SCSI_SPI_ATTRS
>  
>  config SCSI_FC_ATTRS
>  	tristate "FiberChannel Transport Attributes"
> -	depends on SCSI
> +	depends on SCSI && NET && NETFILTER && NETFILTER_NETLINK
>  	help
>  	  If you wish to export transport-specific information about
>  	  each attached FiberChannel device to sysfs, say Y.

I would use a select here rather than a depends.  So enabling a driver
that uses FC_ATTRS will force netlink to be added.  However, I think you
have the wrong symbols here.  NETFILTER_NETLINK is the netlink interface
for netfilter -- entirely unrelated.  It looks like you only need to
enable NET in order to force netlink to be built.  So something like
this would be enough:

 config SCSI_FC_ATTRS
 	tristate "FiberChannel Transport Attributes"
 	depends on SCSI
+	select NET
 	help
 	  If you wish to export transport-specific information about
 	  each attached FiberChannel device to sysfs, say Y.


> +#define get_list_head_entry(pos, head, member) 		\
> +	pos = list_entry((head)->next, typeof(*pos), member)
> +

This one sets alarm bells ringing ...

>  static void __exit fc_transport_exit(void)
>  {
> +	struct fc_nl_user *nluser;
> +
> +	sock_release(fc_nl_sock->sk_socket);
> +	netlink_unregister_notifier(&fc_netlink_notifier);
> +	while (!list_empty(&fc_nl_user_list)) {
> +		get_list_head_entry(nluser, &fc_nl_user_list, ulist);
> +		list_del(&nluser->ulist);
> +		kfree(nluser);
> +	}
>  	transport_class_unregister(&fc_transport_class);
>  	transport_class_unregister(&fc_rport_class);
>  	transport_class_unregister(&fc_host_class);

I would write this as:

	struct fc_nl_user *nluser, *tmp;
	list_for_each_entry_safe(nluser, tmp, &fc_nl_user_list, ulist) {
		kfree(nluser);
	}


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] FC Transport : Async Events via netlink interface
  2006-04-19 14:59 ` [RFC] FC Transport : Async Events via netlink interface Matthew Wilcox
@ 2006-04-19 16:11   ` James Smart
  0 siblings, 0 replies; 24+ messages in thread
From: James Smart @ 2006-04-19 16:11 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-scsi



Matthew Wilcox wrote:
> enable NET in order to force netlink to be built.  So something like
> this would be enough:
> 
>  config SCSI_FC_ATTRS
>  	tristate "FiberChannel Transport Attributes"
>  	depends on SCSI
> +	select NET
>  	help
>  	  If you wish to export transport-specific information about
>  	  each attached FiberChannel device to sysfs, say Y.

Thanks....

>> +#define get_list_head_entry(pos, head, member) 		\
>> +	pos = list_entry((head)->next, typeof(*pos), member)
>> +
> 
> This one sets alarm bells ringing ...

Please explain why. I've always wondered why the list macros never
let you look, without dequeuing, the head of the list. It will let
you look - as long as you use the functions that make it think
you're going to walk the list.


> I would write this as:
> 
> 	struct fc_nl_user *nluser, *tmp;
> 	list_for_each_entry_safe(nluser, tmp, &fc_nl_user_list, ulist) {
> 		kfree(nluser);
> 	}

ok.. Subtle though, as you have to know you are consuming the entire list.

-- james



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-19 12:57   ` [RFC] Netlink and user-space buffer pointers James Smart
@ 2006-04-19 16:22     ` Patrick McHardy
  2006-04-19 17:08       ` James Smart
  2006-04-19 16:26     ` Stephen Hemminger
  2006-04-19 21:32     ` Mike Christie
  2 siblings, 1 reply; 24+ messages in thread
From: Patrick McHardy @ 2006-04-19 16:22 UTC (permalink / raw)
  To: James.Smart; +Cc: linux-scsi, netdev, linux-kernel

James Smart wrote:
> To take netlink to where we want to use it within the SCSI subsystem (as
> the mechanism of choice to replace ioctls), we're going to need to pass
> user-space buffer pointers.
> 
> What is the best, portable manner to pass a pointer between user and kernel
> space within a netlink message ?  The example I've seen is in the iscsi
> target code - and it's passed between user-kernel space as a u64, then
> typecast to a void *, and later within the bio_map_xxx functions, as an
> unsigned long. I assume we are going to continue with this method ?

This might be problematic, since there is a shared receive-queue in
the kernel netlink message might get processed in the context of
a different process. I didn't find any spots where ISCSI passes
pointers over netlink, can you point me to it?

Besides that, netlink protocols should use fixed size architecture
independant types, so u64 would be the best choice for pointers.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-19 12:57   ` [RFC] Netlink and user-space buffer pointers James Smart
  2006-04-19 16:22     ` Patrick McHardy
@ 2006-04-19 16:26     ` Stephen Hemminger
  2006-04-19 17:05       ` James Smart
  2006-04-19 21:32     ` Mike Christie
  2 siblings, 1 reply; 24+ messages in thread
From: Stephen Hemminger @ 2006-04-19 16:26 UTC (permalink / raw)
  To: James.Smart; +Cc: linux-scsi, netdev, linux-kernel

On Wed, 19 Apr 2006 08:57:25 -0400
James Smart <James.Smart@Emulex.Com> wrote:

> Folks,
> 
> To take netlink to where we want to use it within the SCSI subsystem (as
> the mechanism of choice to replace ioctls), we're going to need to pass
> user-space buffer pointers.

This changes the design of netlink. It is desired that netlink
can be done remotely over the network as well as queueing.
The current design is message based, not RPC based. By including a
user-space pointer, you are making the message dependent on the
context as it is process.

Please rethink your design.

> What is the best, portable manner to pass a pointer between user and kernel
> space within a netlink message ?  The example I've seen is in the iscsi
> target code - and it's passed between user-kernel space as a u64, then
> typecast to a void *, and later within the bio_map_xxx functions, as an
> unsigned long. I assume we are going to continue with this method ?
> 
> -- james s
> -
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-19 16:26     ` Stephen Hemminger
@ 2006-04-19 17:05       ` James Smart
  0 siblings, 0 replies; 24+ messages in thread
From: James Smart @ 2006-04-19 17:05 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: linux-scsi, netdev, linux-kernel



Stephen Hemminger wrote:
> On Wed, 19 Apr 2006 08:57:25 -0400
> James Smart <James.Smart@Emulex.Com> wrote:
> 
>> Folks,
>>
>> To take netlink to where we want to use it within the SCSI subsystem (as
>> the mechanism of choice to replace ioctls), we're going to need to pass
>> user-space buffer pointers.
> 
> This changes the design of netlink. It is desired that netlink
> can be done remotely over the network as well as queueing.
> The current design is message based, not RPC based. By including a
> user-space pointer, you are making the message dependent on the
> context as it is process.
> 
> Please rethink your design.

I assume that the message receiver has some way to determine where the
message originated (via the sk_buff), and thus could reject it if it
didn't meet the right criteria.  True ?  You just have to be cognizant
that it is usable from a remote entity - which is a very good thing.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-19 16:22     ` Patrick McHardy
@ 2006-04-19 17:08       ` James Smart
  2006-04-19 17:16         ` Patrick McHardy
  0 siblings, 1 reply; 24+ messages in thread
From: James Smart @ 2006-04-19 17:08 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: linux-scsi, netdev, linux-kernel

Patrick McHardy wrote:
> This might be problematic, since there is a shared receive-queue in
> the kernel netlink message might get processed in the context of
> a different process. I didn't find any spots where ISCSI passes
> pointers over netlink, can you point me to it?

Please explain... Would the pid be set erroneously as well ?  Ignoring
the kernel-user space pointer issue, we're going to have a tight
pid + request_id relationship being maintained across multiple messages.
We'll also be depending on the pid events for clean up if an app dies.
So I hope pid is consistent.

-- james s

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-19 17:08       ` James Smart
@ 2006-04-19 17:16         ` Patrick McHardy
  0 siblings, 0 replies; 24+ messages in thread
From: Patrick McHardy @ 2006-04-19 17:16 UTC (permalink / raw)
  To: James.Smart; +Cc: linux-scsi, netdev, linux-kernel

James Smart wrote:
> 
> 
> Patrick McHardy wrote:
> 
>> This might be problematic, since there is a shared receive-queue in
>> the kernel netlink message might get processed in the context of
>> a different process. I didn't find any spots where ISCSI passes
>> pointers over netlink, can you point me to it?
> 
> 
> Please explain... Would the pid be set erroneously as well ?  Ignoring
> the kernel-user space pointer issue, we're going to have a tight
> pid + request_id relationship being maintained across multiple messages.
> We'll also be depending on the pid events for clean up if an app dies.
> So I hope pid is consistent.

The PID contained in the netlink message itself is correct, current->pid
might not be.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-19 12:57   ` [RFC] Netlink and user-space buffer pointers James Smart
  2006-04-19 16:22     ` Patrick McHardy
  2006-04-19 16:26     ` Stephen Hemminger
@ 2006-04-19 21:32     ` Mike Christie
  2006-04-20 14:33       ` James Smart
  2 siblings, 1 reply; 24+ messages in thread
From: Mike Christie @ 2006-04-19 21:32 UTC (permalink / raw)
  To: James.Smart; +Cc: linux-scsi, netdev, linux-kernel

James Smart wrote:
> Folks,
> 
> To take netlink to where we want to use it within the SCSI subsystem (as
> the mechanism of choice to replace ioctls), we're going to need to pass
> user-space buffer pointers.
> 
> What is the best, portable manner to pass a pointer between user and kernel
> space within a netlink message ?  The example I've seen is in the iscsi
> target code - and it's passed between user-kernel space as a u64, then
> typecast to a void *, and later within the bio_map_xxx functions, as an
> unsigned long. I assume we are going to continue with this method ?
> 

I do not know if it is needed. For the target code, we use the
bio_map_xxx to avoid having to copy the command data which is needed for
decent performance. We have also been trying to figure out ways of
getting out of using netlink to send the command info (cdb, tag info,
etc) around, because in some of Tomo's tests using mmaped packet sockets
he was able to imporove performance by removing that copy from the
kernel to userspace. We had problems with that though and other nice
interfaces like relayfs only allowed us to pass data from the kernel to
userspace so we still need another interface to pass things from
userspace to the kernel. Still working on this though. If someone knows
a interface please let us know.

For the tasks you want to do for the fc class is performance critical?
If not, you could do what the iscsi class (for the netdev people this is
drivers/scsi/scsi_transport_iscsi.c) does and just suffer a couple
copies. For iscsi we do this in userspace to send down a login pdu:

	/*
	 * xmitbuf is a buffer that is large enough for the iscsi_event,
	 * iscsi pdu (hdr_size) and iscsi pdu data (data_size)
	 */
        memset(xmitbuf, 0, sizeof(*ev) + hdr_size + data_size);
        xmitlen = sizeof(*ev);
        ev = xmitbuf;
        ev->type = ISCSI_UEVENT_SEND_PDU;
        ev->transport_handle = transport_handle;
        ev->u.send_pdu.sid = sid;
        ev->u.send_pdu.cid = cid;
        ev->u.send_pdu.hdr_size = hdr_size;
        ev->u.send_pdu.data_size = data_size;

then later we do sendmsg()to send down the xmitbuf to the kernel iscsi
driver. I think there may be issues with packing structs or 32 bit
userspace and 64 bit kernels and other fun things like this so the iscsi
pdu and iscsi event have to be defined correctly and I guess we are back
to some of the problems with ioctls :(

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-19 21:32     ` Mike Christie
@ 2006-04-20 14:33       ` James Smart
  2006-04-20 17:45         ` Mike Christie
  0 siblings, 1 reply; 24+ messages in thread
From: James Smart @ 2006-04-20 14:33 UTC (permalink / raw)
  To: Mike Christie; +Cc: linux-scsi, netdev, linux-kernel

Mike Christie wrote:
> For the tasks you want to do for the fc class is performance critical?

No, it should not be.

> If not, you could do what the iscsi class (for the netdev people this is
> drivers/scsi/scsi_transport_iscsi.c) does and just suffer a couple
> copies. For iscsi we do this in userspace to send down a login pdu:
> 
> 	/*
> 	 * xmitbuf is a buffer that is large enough for the iscsi_event,
> 	 * iscsi pdu (hdr_size) and iscsi pdu data (data_size)
> 	 */

Well, the real difference is that the payload of the "message" is actually
the payload of the SCSI command or ELS/CT Request. Thus, the payload may
range in size from a few hundred bytes to several kbytes (> 1 page) to
Mbyte's in size. Rather than buffer all of this, and push it over the socket,
thus the extra copies - it would best to have the LLDD simply DMA the
payload like on a typical SCSI command.  Additionally, there will be
response data that can be several kbytes in length.

> ... I think there may be issues with packing structs or 32 bit
> userspace and 64 bit kernels and other fun things like this so the iscsi
> pdu and iscsi event have to be defined correctly and I guess we are back
> to some of the problems with ioctls :(

Agreed. In this use of netlink, there's not a lot of wins for netlink over
ioctls. It all comes down to 2 things: a) proper portable message definition;
and b) what do you do with that non-portable user space buffer pointer ?

-- james s

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-20 14:33       ` James Smart
@ 2006-04-20 17:45         ` Mike Christie
  2006-04-20 17:52           ` Mike Christie
                             ` (3 more replies)
  0 siblings, 4 replies; 24+ messages in thread
From: Mike Christie @ 2006-04-20 17:45 UTC (permalink / raw)
  To: James.Smart; +Cc: linux-scsi, netdev, linux-kernel

James Smart wrote:
> 
> Mike Christie wrote:
>> For the tasks you want to do for the fc class is performance critical?
> 
> No, it should not be.
> 
>> If not, you could do what the iscsi class (for the netdev people this is
>> drivers/scsi/scsi_transport_iscsi.c) does and just suffer a couple
>> copies. For iscsi we do this in userspace to send down a login pdu:
>>
>>     /*
>>      * xmitbuf is a buffer that is large enough for the iscsi_event,
>>      * iscsi pdu (hdr_size) and iscsi pdu data (data_size)
>>      */
> 
> Well, the real difference is that the payload of the "message" is actually
> the payload of the SCSI command or ELS/CT Request. Thus, the payload may

I am not sure I follow. For iscsi, everything after the iscsi_event
struct can be the iscsi request that is to be transmitted. The payload
will not normally be Mbytes but it is not a couple if bytes.

> range in size from a few hundred bytes to several kbytes (> 1 page) to
> Mbyte's in size. Rather than buffer all of this, and push it over the
> socket,
> thus the extra copies - it would best to have the LLDD simply DMA the
> payload like on a typical SCSI command.  Additionally, there will be
> response data that can be several kbytes in length.
> 

Once you have got the buffer to the class, the class can create a
scatterlist to DMA from for the LLD. I thought. iscsi does not do this
just because it is software right now. For qla4xxx we do not need
something like what you are talking about (see below for what I was
thinking about for the initiators). If you are saying the extra step of
the copy is plain dumb, I agree, but this happens (you have to suffer
some copy and cannot do dio) for sg io as well in some cases. I think
for the sg driver the copy_*_user is the default.

Instead of netlink for scsi commands and transport requests....

For scsi commands could we just use sg io, or is there something special
about the command you want to send? If you can use sg io for scsi
commands, maybe for transport level requests (in my example iscsi pdu)
we could modify something like sg/bsg/block layer scsi_ioctl.c to send
down transport requests to the classes and encapsulate them in some new
struct transport_requests or use the existing struct request but do that
thing people keep taling about using the request/request_queue for
message passing.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-20 17:45         ` Mike Christie
@ 2006-04-20 17:52           ` Mike Christie
  2006-04-20 17:58           ` Mike Christie
                             ` (2 subsequent siblings)
  3 siblings, 0 replies; 24+ messages in thread
From: Mike Christie @ 2006-04-20 17:52 UTC (permalink / raw)
  To: James.Smart; +Cc: linux-scsi, netdev, linux-kernel

Mike Christie wrote:
> James Smart wrote:
>> Mike Christie wrote:
>>> For the tasks you want to do for the fc class is performance critical?
>> No, it should not be.
>>
>>> If not, you could do what the iscsi class (for the netdev people this is
>>> drivers/scsi/scsi_transport_iscsi.c) does and just suffer a couple
>>> copies. For iscsi we do this in userspace to send down a login pdu:
>>>
>>>     /*
>>>      * xmitbuf is a buffer that is large enough for the iscsi_event,
>>>      * iscsi pdu (hdr_size) and iscsi pdu data (data_size)
>>>      */
>> Well, the real difference is that the payload of the "message" is actually
>> the payload of the SCSI command or ELS/CT Request. Thus, the payload may
> 
> I am not sure I follow. For iscsi, everything after the iscsi_event
> struct can be the iscsi request that is to be transmitted. The payload
> will not normally be Mbytes but it is not a couple if bytes.
> 
>> range in size from a few hundred bytes to several kbytes (> 1 page) to
>> Mbyte's in size. Rather than buffer all of this, and push it over the
>> socket,
>> thus the extra copies - it would best to have the LLDD simply DMA the
>> payload like on a typical SCSI command.  Additionally, there will be
>> response data that can be several kbytes in length.
>>
> 
> Once you have got the buffer to the class, the class can create a
> scatterlist to DMA from for the LLD. I thought. iscsi does not do this
> just because it is software right now. For qla4xxx we do not need

That should be, we do need.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-20 17:45         ` Mike Christie
  2006-04-20 17:52           ` Mike Christie
@ 2006-04-20 17:58           ` Mike Christie
  2006-04-20 20:03           ` James Smart
  2006-04-20 20:18           ` Douglas Gilbert
  3 siblings, 0 replies; 24+ messages in thread
From: Mike Christie @ 2006-04-20 17:58 UTC (permalink / raw)
  To: James.Smart; +Cc: linux-scsi, netdev, linux-kernel

Mike Christie wrote:
> James Smart wrote:
>> Mike Christie wrote:
>>> For the tasks you want to do for the fc class is performance critical?
>> No, it should not be.
>>
>>> If not, you could do what the iscsi class (for the netdev people this is
>>> drivers/scsi/scsi_transport_iscsi.c) does and just suffer a couple
>>> copies. For iscsi we do this in userspace to send down a login pdu:
>>>
>>>     /*
>>>      * xmitbuf is a buffer that is large enough for the iscsi_event,
>>>      * iscsi pdu (hdr_size) and iscsi pdu data (data_size)
>>>      */
>> Well, the real difference is that the payload of the "message" is actually
>> the payload of the SCSI command or ELS/CT Request. Thus, the payload may
> 
> I am not sure I follow. For iscsi, everything after the iscsi_event
> struct can be the iscsi request that is to be transmitted. The payload
> will not normally be Mbytes but it is not a couple if bytes.
> 
>> range in size from a few hundred bytes to several kbytes (> 1 page) to
>> Mbyte's in size. Rather than buffer all of this, and push it over the
>> socket,
>> thus the extra copies - it would best to have the LLDD simply DMA the
>> payload like on a typical SCSI command.  Additionally, there will be
>> response data that can be several kbytes in length.
>>
> 
> Once you have got the buffer to the class, the class can create a
> scatterlist to DMA from for the LLD. I thought. iscsi does not do this
> just because it is software right now. For qla4xxx we do not need
> something like what you are talking about (see below for what I was
> thinking about for the initiators). If you are saying the extra step of
> the copy is plain dumb, I agree, but this happens (you have to suffer
> some copy and cannot do dio) for sg io as well in some cases. I think
> for the sg driver the copy_*_user is the default.
> 
> Instead of netlink for scsi commands and transport requests....
> 
> For scsi commands could we just use sg io, or is there something special
> about the command you want to send? If you can use sg io for scsi
> commands, maybe for transport level requests (in my example iscsi pdu)
> we could modify something like sg/bsg/block layer scsi_ioctl.c to send
> down transport requests to the classes and encapsulate them in some new
> struct transport_requests or use the existing struct request but do that
> thing people keep taling about using the request/request_queue for
> message passing.

And just to be complete, the problem with this is that it is tied to the
request queue and so you cannot just send a transport level request
unless it is tied to the device. But for the target stuff we added a
request queue to the host so we could inject requests (the idea was to
send down those magic message requests) at a higher level.  To be able
to use that for sg io though it would require some more code and magic
as you know.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-20 17:45         ` Mike Christie
  2006-04-20 17:52           ` Mike Christie
  2006-04-20 17:58           ` Mike Christie
@ 2006-04-20 20:03           ` James Smart
  2006-04-20 20:35             ` Mike Christie
                               ` (2 more replies)
  2006-04-20 20:18           ` Douglas Gilbert
  3 siblings, 3 replies; 24+ messages in thread
From: James Smart @ 2006-04-20 20:03 UTC (permalink / raw)
  To: Mike Christie; +Cc: linux-scsi, netdev, linux-kernel

Note: We've transitioned off topic. If what this means is "there isn't a good
way except by ioctls (which still isn't easily portable) or system calls",
then that's ok. Then at least we know the limits and can look at other
implementation alternatives.

Mike Christie wrote:
> James Smart wrote:
>> Mike Christie wrote:
>>> For the tasks you want to do for the fc class is performance critical?
>> No, it should not be.
>>
>>> If not, you could do what the iscsi class (for the netdev people this is
>>> drivers/scsi/scsi_transport_iscsi.c) does and just suffer a couple
>>> copies. For iscsi we do this in userspace to send down a login pdu:
>>>
>>>     /*
>>>      * xmitbuf is a buffer that is large enough for the iscsi_event,
>>>      * iscsi pdu (hdr_size) and iscsi pdu data (data_size)
>>>      */
>> Well, the real difference is that the payload of the "message" is actually
>> the payload of the SCSI command or ELS/CT Request. Thus, the payload may
> 
> I am not sure I follow. For iscsi, everything after the iscsi_event
> struct can be the iscsi request that is to be transmitted. The payload
> will not normally be Mbytes but it is not a couple if bytes.

True... For a large read/write - it will eventually total what the i/o
request size was, and you did have to push it through the socekt.
What this discussion really comes down to is the difference between initiator
offload and what a target does.

The initiator offloads the "full" i/o from the users - e.g. send command,
get response. In the initiator case, the user isn't aware of each and
every IU that makes up the i/o. As it's on an i/o basis, the LLDD doing
the offload needs the full buffer sitting and ready. DMA is preferred so
the buffer doesn't have to be consuming socket/kernel/driver buffers while
it's pending - plus speed.

In the target case, the target controls each IU and it's size, thus it
only has to have access to as much buffer space as it wants to push the next
IU. The i/o can be "paced" by the target. Unfortunately, this is an entirely
different use model than users of a scsi initiator expect, and it won't map
well into replacing things like our sg_io ioctls.

> Instead of netlink for scsi commands and transport requests....
> 
> For scsi commands could we just use sg io, or is there something special
> about the command you want to send? If you can use sg io for scsi
> commands, maybe for transport level requests (in my example iscsi pdu)
> we could modify something like sg/bsg/block layer scsi_ioctl.c to send
> down transport requests to the classes and encapsulate them in some new
> struct transport_requests or use the existing struct request but do that
> thing people keep taling about using the request/request_queue for
> message passing.

Well - there's 2 parts to this answer:

First : IOCTL's are considered dangerous/bad practice and therefore it would
   be nice to find a replacement mechanism that eliminates them. If that
   mechanism has some of the cool features that netlink does, even better.
   Using sg io, in the manner you indicate, wouldn't remove the ioctl use.
   Note: I have OEMs/users that are very confused about the community's statement
   about ioctls. They've heard they are bad, should never be allowed, will no
   be longer supported, but yet they are at the heart of DM and sg io and other
   subsystems. Other than a "grandfathered" explanation, they don't understand
   why the rules bend for one piece of code but not for another. To them, all
   the features are just as critical regardless of whose providing them.

Second: transport level i/o could be done like you suggest, and we've
   prototyped some of this as well. However, there's something very wrong
   about putting "block device" wrappers and settings around something that
   is not a block device.  In general, it's a heck of a lot of overhead and
   still doesn't solve the real issue - how to portably pass that user buffer
   in to/out of the kernel.

-- james s

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-20 17:45         ` Mike Christie
                             ` (2 preceding siblings ...)
  2006-04-20 20:03           ` James Smart
@ 2006-04-20 20:18           ` Douglas Gilbert
  3 siblings, 0 replies; 24+ messages in thread
From: Douglas Gilbert @ 2006-04-20 20:18 UTC (permalink / raw)
  To: Mike Christie; +Cc: James.Smart, linux-scsi, netdev, linux-kernel

Mike Christie wrote:
> James Smart wrote:
> 
>>Mike Christie wrote:
>>
>>>For the tasks you want to do for the fc class is performance critical?
>>
>>No, it should not be.
>>
>>
>>>If not, you could do what the iscsi class (for the netdev people this is
>>>drivers/scsi/scsi_transport_iscsi.c) does and just suffer a couple
>>>copies. For iscsi we do this in userspace to send down a login pdu:
>>>
>>>    /*
>>>     * xmitbuf is a buffer that is large enough for the iscsi_event,
>>>     * iscsi pdu (hdr_size) and iscsi pdu data (data_size)
>>>     */
>>
>>Well, the real difference is that the payload of the "message" is actually
>>the payload of the SCSI command or ELS/CT Request. Thus, the payload may
> 
> 
> I am not sure I follow. For iscsi, everything after the iscsi_event
> struct can be the iscsi request that is to be transmitted. The payload
> will not normally be Mbytes but it is not a couple if bytes.
> 
> 
>>range in size from a few hundred bytes to several kbytes (> 1 page) to
>>Mbyte's in size. Rather than buffer all of this, and push it over the
>>socket,
>>thus the extra copies - it would best to have the LLDD simply DMA the
>>payload like on a typical SCSI command.  Additionally, there will be
>>response data that can be several kbytes in length.
>>
> 
> 
> Once you have got the buffer to the class, the class can create a
> scatterlist to DMA from for the LLD. I thought. iscsi does not do this
> just because it is software right now. For qla4xxx we do not need
> something like what you are talking about (see below for what I was
> thinking about for the initiators). If you are saying the extra step of
> the copy is plain dumb, I agree, but this happens (you have to suffer
> some copy and cannot do dio) for sg io as well in some cases. I think
> for the sg driver the copy_*_user is the default.

Mike,
Indirect IO is the default in the sg driver because:
  - it has always been thus
  - the sg driver is less constrained (e.g. max number
    of scatg elements is a bigger issue with dio)
  - the only alignment to worry about is byte
    alignment (some folks would like bit alignment
    but you can't please everybody)
  - there is no need for the sg driver to pin user
    pages in memory (as there is with direct IO and
    mmaped-IO)

> Instead of netlink for scsi commands and transport requests....

With a netlink based pass through one might:
  - improve on the SG_IO ioctl and add things like
    tags that are currently missing
  - introduce a proper SCSI task management function
    pass through (no request queue please)
  - make other pass throughs for SAS: SMP and STP
  - have an alternative to sysfs for various control
    functions in a HBA (e.g. in SAS: link and hard
    reset) and fetching performance data from a HBA

Apart from how to get data efficiently between the HBA
and the user space, another major issue is the flexibility
of the bind() in s_netlink (storage netlink??).

> For scsi commands could we just use sg io, or is there something special
> about the command you want to send? If you can use sg io for scsi
> commands, maybe for transport level requests (in my example iscsi pdu)
> we could modify something like sg/bsg/block layer scsi_ioctl.c to send
> down transport requests to the classes and encapsulate them in some new
> struct transport_requests or use the existing struct request but do that
> thing people keep taling about using the request/request_queue for
> message passing.

Some SG_IO ioctl users want up to 32 MB in one transaction
and others want their data fast. Many pass through users
view the kernel as an impediment (not so much as "the way"
as "in the way").

Doug Gilbert

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-20 20:03           ` James Smart
@ 2006-04-20 20:35             ` Mike Christie
  2006-04-20 20:40               ` Mike Christie
  2006-04-20 21:41               ` Mike Christie
  2006-04-20 21:51             ` Mike Christie
  2006-04-20 23:44             ` Andrew Vasquez
  2 siblings, 2 replies; 24+ messages in thread
From: Mike Christie @ 2006-04-20 20:35 UTC (permalink / raw)
  To: James.Smart; +Cc: linux-scsi, netdev, linux-kernel

James Smart wrote:
> Note: We've transitioned off topic. If what this means is "there isn't a
> good
> way except by ioctls (which still isn't easily portable) or system calls",
> then that's ok. Then at least we know the limits and can look at other
> implementation alternatives.
> 
> Mike Christie wrote:
>> James Smart wrote:
>>> Mike Christie wrote:
>>>> For the tasks you want to do for the fc class is performance critical?
>>> No, it should not be.
>>>
>>>> If not, you could do what the iscsi class (for the netdev people
>>>> this is
>>>> drivers/scsi/scsi_transport_iscsi.c) does and just suffer a couple
>>>> copies. For iscsi we do this in userspace to send down a login pdu:
>>>>
>>>>     /*
>>>>      * xmitbuf is a buffer that is large enough for the iscsi_event,
>>>>      * iscsi pdu (hdr_size) and iscsi pdu data (data_size)
>>>>      */
>>> Well, the real difference is that the payload of the "message" is
>>> actually
>>> the payload of the SCSI command or ELS/CT Request. Thus, the payload may
>>
>> I am not sure I follow. For iscsi, everything after the iscsi_event
>> struct can be the iscsi request that is to be transmitted. The payload
>> will not normally be Mbytes but it is not a couple if bytes.
> 
> True... For a large read/write - it will eventually total what the i/o
> request size was, and you did have to push it through the socekt.
> What this discussion really comes down to is the difference between
> initiator
> offload and what a target does.
> 
> The initiator offloads the "full" i/o from the users - e.g. send command,
> get response. In the initiator case, the user isn't aware of each and
> every IU that makes up the i/o. As it's on an i/o basis, the LLDD doing
> the offload needs the full buffer sitting and ready. DMA is preferred so
> the buffer doesn't have to be consuming socket/kernel/driver buffers while
> it's pending - plus speed.
> 
> In the target case, the target controls each IU and it's size, thus it
> only has to have access to as much buffer space as it wants to push the
> next
> IU. The i/o can be "paced" by the target. Unfortunately, this is an
> entirely
> different use model than users of a scsi initiator expect, and it won't map
> well into replacing things like our sg_io ioctls.


I am not talking about the target here. For the open-iscsi initiator
that is in mainline that I referecnced in the example we send pdus from
userpsace to the LLD. In the future, initaitors that offload some iscsi
processing and will login from userspace or have userspace monitor the
transport by doing iscsi pings, we need to be able to send these pdus.
And the iscsi pdu cannot be broken up at the iscsi level (they can at
the interconect level though). From the iscsi host level they have to go
out like a scsi command would in that the LLD cannot decide to send out
mutiple pdus for he pdu that userspace sends down.

I do agree with you that targets can break down a scsi command into
multiple transport level packets as it sees fit.


> 
>> Instead of netlink for scsi commands and transport requests....
>>
>> For scsi commands could we just use sg io, or is there something special
>> about the command you want to send? If you can use sg io for scsi
>> commands, maybe for transport level requests (in my example iscsi pdu)
>> we could modify something like sg/bsg/block layer scsi_ioctl.c to send
>> down transport requests to the classes and encapsulate them in some new
>> struct transport_requests or use the existing struct request but do that
>> thing people keep taling about using the request/request_queue for
>> message passing.
> 
> Well - there's 2 parts to this answer:
> 
> First : IOCTL's are considered dangerous/bad practice and therefore it
> would

Yeah, i am not trying to kill ioctls. I go where the community goes.
What I am trying to dois just reuse the sg io mapping code so that we do
not end up with sg, st, target, blk scsi_ioctl.c and bsg all doing
similar things.


>   be nice to find a replacement mechanism that eliminates them. If that
>   mechanism has some of the cool features that netlink does, even better.
>   Using sg io, in the manner you indicate, wouldn't remove the ioctl use.
>   Note: I have OEMs/users that are very confused about the community's
> statement
>   about ioctls. They've heard they are bad, should never be allowed,
> will no
>   be longer supported, but yet they are at the heart of DM and sg io and
> other
>   subsystems. Other than a "grandfathered" explanation, they don't
> understand
>   why the rules bend for one piece of code but not for another. To them,
> all
>   the features are just as critical regardless of whose providing them.
> 
> Second: transport level i/o could be done like you suggest, and we've
>   prototyped some of this as well. However, there's something very wrong
>   about putting "block device" wrappers and settings around something that
>   is not a block device.  In general, it's a heck of a lot of overhead and
>   still doesn't solve the real issue - how to portably pass that user
> buffer


I am not talking about putting block device wrappers. This the magic
part and the message passing comes in. A while back I made the requuest
queue a class (only sent the patch to Jens and did not follow up). The
original reason was for the io sched swap and some multipath stuff, but
since with then the request queue would need a block device to be
exposed to userspace through sysfs and would not need a block device to
send messages throgh. You just need a way to commniutate between
userspace and the kernel but it does not have to be through a block
device. I think this path has other benefits in that you could do
userspace level scanning as well since you do not need the block device
and ULD like we do today.



>   in to/out of the kernel.
> 
> 
> -- james s


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-20 20:35             ` Mike Christie
@ 2006-04-20 20:40               ` Mike Christie
  2006-04-20 21:41               ` Mike Christie
  1 sibling, 0 replies; 24+ messages in thread
From: Mike Christie @ 2006-04-20 20:40 UTC (permalink / raw)
  To: James.Smart; +Cc: linux-scsi, netdev, linux-kernel

Mike Christie wrote:
> James Smart wrote:
>> Note: We've transitioned off topic. If what this means is "there isn't a
>> good
>> way except by ioctls (which still isn't easily portable) or system calls",
>> then that's ok. Then at least we know the limits and can look at other
>> implementation alternatives.
>>
>> Mike Christie wrote:
>>> James Smart wrote:
>>>> Mike Christie wrote:
>>>>> For the tasks you want to do for the fc class is performance critical?
>>>> No, it should not be.
>>>>
>>>>> If not, you could do what the iscsi class (for the netdev people
>>>>> this is
>>>>> drivers/scsi/scsi_transport_iscsi.c) does and just suffer a couple
>>>>> copies. For iscsi we do this in userspace to send down a login pdu:
>>>>>
>>>>>     /*
>>>>>      * xmitbuf is a buffer that is large enough for the iscsi_event,
>>>>>      * iscsi pdu (hdr_size) and iscsi pdu data (data_size)
>>>>>      */
>>>> Well, the real difference is that the payload of the "message" is
>>>> actually
>>>> the payload of the SCSI command or ELS/CT Request. Thus, the payload may
>>> I am not sure I follow. For iscsi, everything after the iscsi_event
>>> struct can be the iscsi request that is to be transmitted. The payload
>>> will not normally be Mbytes but it is not a couple if bytes.
>> True... For a large read/write - it will eventually total what the i/o
>> request size was, and you did have to push it through the socekt.
>> What this discussion really comes down to is the difference between
>> initiator
>> offload and what a target does.
>>
>> The initiator offloads the "full" i/o from the users - e.g. send command,
>> get response. In the initiator case, the user isn't aware of each and
>> every IU that makes up the i/o. As it's on an i/o basis, the LLDD doing
>> the offload needs the full buffer sitting and ready. DMA is preferred so
>> the buffer doesn't have to be consuming socket/kernel/driver buffers while
>> it's pending - plus speed.
>>
>> In the target case, the target controls each IU and it's size, thus it
>> only has to have access to as much buffer space as it wants to push the
>> next
>> IU. The i/o can be "paced" by the target. Unfortunately, this is an
>> entirely
>> different use model than users of a scsi initiator expect, and it won't map
>> well into replacing things like our sg_io ioctls.
> 
> 
> I am not talking about the target here. For the open-iscsi initiator
> that is in mainline that I referecnced in the example we send pdus from
> userpsace to the LLD. In the future, initaitors that offload some iscsi
> processing and will login from userspace or have userspace monitor the
> transport by doing iscsi pings, we need to be able to send these pdus.
> And the iscsi pdu cannot be broken up at the iscsi level (they can at
> the interconect level though). From the iscsi host level they have to go
> out like a scsi command would in that the LLD cannot decide to send out
> mutiple pdus for he pdu that userspace sends down.
> 
> I do agree with you that targets can break down a scsi command into
> multiple transport level packets as it sees fit.
> 

Oh yeah is

FC IU == iscsi tcp packet
or
FC IU == iscsi pdu
?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-20 20:35             ` Mike Christie
  2006-04-20 20:40               ` Mike Christie
@ 2006-04-20 21:41               ` Mike Christie
  1 sibling, 0 replies; 24+ messages in thread
From: Mike Christie @ 2006-04-20 21:41 UTC (permalink / raw)
  To: Mike Christie; +Cc: James.Smart, SCSI Mailing List

Mike Christie wrote:
>> Second: transport level i/o could be done like you suggest, and we've
>>   prototyped some of this as well. However, there's something very wrong
>>   about putting "block device" wrappers and settings around something that
>>   is not a block device.  In general, it's a heck of a lot of overhead and
>>   still doesn't solve the real issue - how to portably pass that user
>> buffer
> 
> 
> I am not talking about putting block device wrappers. This the magic
> part and the message passing comes in. A while back I made the requuest
> queue a class (only sent the patch to Jens and did not follow up). The
> original reason was for the io sched swap and some multipath stuff, but
> since with then the request queue would need a block device to be
> exposed to userspace through sysfs and would not need a block device to
> send messages throgh. You just need a way to commniutate between
> userspace and the kernel but it does not have to be through a block
> device. I think this path has other benefits in that you could do
> userspace level scanning as well since you do not need the block device
> and ULD like we do today.
> 

Again for a bad example look at the target code. The original idea was
to use the request_queue for messages because it did not work so well
for them to take target requests (we do not need the io scheduler for
example) to send request from the target interrupt handler to the netink
code. It was over kill becuase all we really want is a per cpu list, but
we used the request queue becuase it had the queue limits that we
needed. It was mean as temporary and I think tomo is working on
replacing that with a per process list and softirq. But the point is
that you can use the request queue to pass around requests and still use
netlink for the interface between the kernel and userspace.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-20 20:03           ` James Smart
  2006-04-20 20:35             ` Mike Christie
@ 2006-04-20 21:51             ` Mike Christie
  2006-04-20 23:07               ` Mike Christie
  2006-04-20 23:44             ` Andrew Vasquez
  2 siblings, 1 reply; 24+ messages in thread
From: Mike Christie @ 2006-04-20 21:51 UTC (permalink / raw)
  To: James.Smart; +Cc: linux-scsi

snipped lkml and netdev. I do not think they care do they.

James Smart wrote:
> Note: We've transitioned off topic. If what this means is "there isn't a
> good
> way except by ioctls (which still isn't easily portable) or system calls",
> then that's ok. Then at least we know the limits and can look at other
> implementation alternatives.
> 

So we have

1. sysfs: prbably bad becuase to pass down every attr for the command we
need a seperate file. This does not seem like it meets the binary sysfs
file requirement.

2. configfs: again bad becuase we have to pass down eveyr attr for the
command in a speperate file.

3. ioctl: Christoph has not allowed scsi drivers to add ioctls so I
doubt he would let some other scsi code let it.

4. syscall:

5. netlink: netdev guys do not want use to pass pointers. So we either
have a worst case double copy (copy from userspace to network and
network to buffer that is within the LLDs queue limits (do like sg where
it as a preallocated buffer)) or if we used the bio code like how the
ll_rw_blk.c does just fail if the command is to large. In some cases the
io may just be too large and have to fail though.

	5A. modify netlink to copy data to a buffer that is within some sort of
limits we can DMA from.
	5B. debate with netdev guys about passing poitners, in which case we
could have some of the problem 5 has in that when we map data we do not
have much control over the pages so we may have to drop down to a copy
to some preallocate buffer or fail model.

6. char device

7. what else...

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-20 21:51             ` Mike Christie
@ 2006-04-20 23:07               ` Mike Christie
  0 siblings, 0 replies; 24+ messages in thread
From: Mike Christie @ 2006-04-20 23:07 UTC (permalink / raw)
  To: Mike Christie; +Cc: James.Smart, linux-scsi

Mike Christie wrote:
> snipped lkml and netdev. I do not think they care do they.
> 
> James Smart wrote:
>> Note: We've transitioned off topic. If what this means is "there isn't a
>> good
>> way except by ioctls (which still isn't easily portable) or system calls",
>> then that's ok. Then at least we know the limits and can look at other
>> implementation alternatives.
>>
> 
> So we have

Ok for those that emailed me off list. All these interfaces have a
similar problems in that the kernel cannot get enough pages to make a
scatterlist that is within the HBA's limits. block/scsi_ioctl.c handles
this by failing the request, sg.c handles this by preallocating buffers,
the target code handles this by calling the target transfer function
multiple times as James described, and using netlink as described for a
target or initiator and for sg io or target commands or transport
commands has similar issues if it just cannot get enough pages to make a
proper scatterlist, but currently it has the extra limitation in that we
are not supposed to pass pointers.

Sorry for the confusion.

> 
> 1. sysfs: prbably bad becuase to pass down every attr for the command we
> need a seperate file. This does not seem like it meets the binary sysfs
> file requirement.
> 
> 2. configfs: again bad becuase we have to pass down eveyr attr for the
> command in a speperate file.
> 
> 3. ioctl: Christoph has not allowed scsi drivers to add ioctls so I
> doubt he would let some other scsi code let it.
> 
> 4. syscall:
> 
> 5. netlink: netdev guys do not want use to pass pointers. So we either
> have a worst case double copy (copy from userspace to network and
> network to buffer that is within the LLDs queue limits (do like sg where
> it as a preallocated buffer)) or if we used the bio code like how the
> ll_rw_blk.c does just fail if the command is to large. In some cases the
> io may just be too large and have to fail though.
> 
> 	5A. modify netlink to copy data to a buffer that is within some sort of
> limits we can DMA from.
> 	5B. debate with netdev guys about passing poitners, in which case we
> could have some of the problem 5 has in that when we map data we do not
> have much control over the pages so we may have to drop down to a copy
> to some preallocate buffer or fail model.
> 
> 6. char device
> 
> 7. what else...
> -
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] Netlink and user-space buffer pointers
  2006-04-20 20:03           ` James Smart
  2006-04-20 20:35             ` Mike Christie
  2006-04-20 21:51             ` Mike Christie
@ 2006-04-20 23:44             ` Andrew Vasquez
  2 siblings, 0 replies; 24+ messages in thread
From: Andrew Vasquez @ 2006-04-20 23:44 UTC (permalink / raw)
  To: James Smart; +Cc: Mike Christie, linux-scsi, netdev, linux-kernel

On Thu, 20 Apr 2006, James Smart wrote:

> Note: We've transitioned off topic. If what this means is "there isn't a 
> good
> way except by ioctls (which still isn't easily portable) or system calls",
> then that's ok. Then at least we know the limits and can look at other
> implementation alternatives.

this topic has been brought-up many times in the past, most recently:

	http://thread.gmane.org/gmane.linux.drivers.openib/19525/focus=19525
	http://thread.gmane.org/gmane.linux.kernel/387375/focus=387455

where is was suggested to pathscale folks to use some blend of sysfs,
netlink sockets and debugfs:

	http://kerneltrap.org/node/4394

> >>Mike Christie wrote:
> >Instead of netlink for scsi commands and transport requests....
> >
> >For scsi commands could we just use sg io, or is there something special
> >about the command you want to send? If you can use sg io for scsi
> >commands, maybe for transport level requests (in my example iscsi pdu)
> >we could modify something like sg/bsg/block layer scsi_ioctl.c to send
> >down transport requests to the classes and encapsulate them in some new
> >struct transport_requests or use the existing struct request but do that
> >thing people keep taling about using the request/request_queue for
> >message passing.
> 
> Well - there's 2 parts to this answer:
> 
> First : IOCTL's are considered dangerous/bad practice and therefore it would
>   be nice to find a replacement mechanism that eliminates them. If that
>   mechanism has some of the cool features that netlink does, even better.
>   Using sg io, in the manner you indicate, wouldn't remove the ioctl use.
>   Note: I have OEMs/users that are very confused about the community's 
>   statement
>   about ioctls. They've heard they are bad, should never be allowed, will no
>   be longer supported, but yet they are at the heart of DM and sg io and 
>   other
>   subsystems. Other than a "grandfathered" explanation, they don't 
>   understand
>   why the rules bend for one piece of code but not for another. To them, all
>   the features are just as critical regardless of whose providing them.

I believe it to be the same for most hardware-vendor's customers...

> Second: transport level i/o could be done like you suggest, and we've
>   prototyped some of this as well. However, there's something very wrong
>   about putting "block device" wrappers and settings around something that
>   is not a block device.

Eeww...  no wrappers.  Your netlink prototypes certainly get FC-
transport further along, but would also be nice if there could be some
subsystem consensus on *the* interface.

I honestly don't know which interface is *best*, but from a HBA
vendors perspective managing per-request locally allocated memory is
undesirable.

Thanks,
av

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2006-04-20 23:44 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-04-17 20:44 [RFC] FC Transport : Async Events via netlink interface James Smart
2006-04-18 16:01 ` Mike Anderson
2006-04-19 12:52   ` James Smart
2006-04-19 12:57   ` [RFC] Netlink and user-space buffer pointers James Smart
2006-04-19 16:22     ` Patrick McHardy
2006-04-19 17:08       ` James Smart
2006-04-19 17:16         ` Patrick McHardy
2006-04-19 16:26     ` Stephen Hemminger
2006-04-19 17:05       ` James Smart
2006-04-19 21:32     ` Mike Christie
2006-04-20 14:33       ` James Smart
2006-04-20 17:45         ` Mike Christie
2006-04-20 17:52           ` Mike Christie
2006-04-20 17:58           ` Mike Christie
2006-04-20 20:03           ` James Smart
2006-04-20 20:35             ` Mike Christie
2006-04-20 20:40               ` Mike Christie
2006-04-20 21:41               ` Mike Christie
2006-04-20 21:51             ` Mike Christie
2006-04-20 23:07               ` Mike Christie
2006-04-20 23:44             ` Andrew Vasquez
2006-04-20 20:18           ` Douglas Gilbert
2006-04-19 14:59 ` [RFC] FC Transport : Async Events via netlink interface Matthew Wilcox
2006-04-19 16:11   ` James Smart

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).