Netdev List

Netdev List
 help / color / mirror / Atom feed

* [RFC 3/5] e100:  Support receiving errored frames.
From: greearb @ 2011-06-18  0:09 UTC (permalink / raw)
  To: netdev; +Cc: Ben Greear
In-Reply-To: <1308355783-22407-1-git-send-email-greearb@candelatech.com>

From: Ben Greear <greearb@candelatech.com>

This can be helpful when sniffing dodgy networks.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
:100644 100644 647d8c6... aad303d... M	drivers/net/e100.c
 drivers/net/e100.c |   42 ++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 647d8c6..aad303d 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -588,6 +588,7 @@ struct nic {
 		wol_magic          = (1 << 3),
 		ich_10h_workaround = (1 << 4),
 		save_rxfcs         = (1 << 5),
+		save_rxerr         = (1 << 6),
 	} flags					____cacheline_aligned;
 
 	enum mac mac;
@@ -1126,9 +1127,13 @@ static void e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb)
 		config->full_duplex_force = 0x1;	/* 1=force, 0=auto */
 
 	if (nic->flags & promiscuous || nic->loopback) {
+		config->promiscuous_mode = 0x1;		/* 1=on, 0=off */
+	}
+
+	if (nic->flags & save_rxerr) {
+		config->rx_discard_overruns = 0x1;	/* 1=save, 0=discard */
 		config->rx_save_bad_frames = 0x1;	/* 1=save, 0=discard */
 		config->rx_discard_short_frames = 0x0;	/* 1=discard, 0=save */
-		config->promiscuous_mode = 0x1;		/* 1=on, 0=off */
 	}
 
 	if (nic->flags & save_rxfcs)
@@ -1983,7 +1988,18 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx,
 	skb_put(skb, actual_size);
 	skb->protocol = eth_type_trans(skb, nic->netdev);
 
-	if (unlikely(!(rfd_status & cb_ok))) {
+	if (unlikely(nic->flags & save_rxerr)) {
+		if (!(rfd_status & cb_ok)) {
+			skb->pkt_type = PACKET_INVALID;
+		} else if (actual_size >
+			   ETH_DATA_LEN + VLAN_ETH_HLEN + rxfcs_pad) {
+			nic->rx_over_length_errors++;
+			skb->pkt_type = PACKET_INVALID;
+		}
+		goto process_skb;
+	}
+
+	if (unlikely((nic->flags & save_rxerr) && !(rfd_status & cb_ok))) {
 		/* Don't indicate if hardware indicates errors */
 		dev_kfree_skb_any(skb);
 	} else if (actual_size > ETH_DATA_LEN + VLAN_ETH_HLEN + rxfcs_pad) {
@@ -1991,6 +2007,7 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx,
 		nic->rx_over_length_errors++;
 		dev_kfree_skb_any(skb);
 	} else {
+process_skb:
 		dev->stats.rx_packets++;
 		dev->stats.rx_bytes += actual_size;
 		netif_receive_skb(skb);
@@ -2397,6 +2414,25 @@ static u32 e100_get_save_rxfcs(struct net_device *netdev)
 	return !!(nic->flags & save_rxfcs);
 }
 
+static int e100_set_save_rxerr(struct net_device *netdev, u32 data)
+{
+	struct nic *nic = netdev_priv(netdev);
+	if (data)
+		nic->flags |= save_rxerr;
+	else
+		nic->flags &= ~save_rxerr;
+
+	e100_exec_cb(nic, NULL, e100_configure);
+
+	return 0;
+}
+
+static u32 e100_get_save_rxerr(struct net_device *netdev)
+{
+	struct nic *nic = netdev_priv(netdev);
+	return !!(nic->flags & save_rxerr);
+}
+
 static void e100_get_drvinfo(struct net_device *netdev,
 	struct ethtool_drvinfo *info)
 {
@@ -2718,6 +2754,8 @@ static const struct ethtool_ops e100_ethtool_ops = {
 	.get_sset_count		= e100_get_sset_count,
 	.set_save_rxfcs		= e100_set_save_rxfcs,
 	.get_save_rxfcs		= e100_get_save_rxfcs,
+	.set_save_rxerr		= e100_set_save_rxerr,
+	.get_save_rxerr		= e100_get_save_rxerr,
 };
 
 static int e100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
-- 
1.7.3.4


^ permalink raw reply related

* [RFC 2/5] net:  Add pkt-type PACKET_INVALID
From: greearb @ 2011-06-18  0:09 UTC (permalink / raw)
  To: netdev; +Cc: Ben Greear
In-Reply-To: <1308355783-22407-1-git-send-email-greearb@candelatech.com>

From: Ben Greear <greearb@candelatech.com>

This will be used for errored frames received from NICs.
It re-uses the un-used PACKET_FASTROUTE value, but leaves that
define in just in case some user-space app requires it.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
:100644 100644 7b31863... 86f06e4... M	include/linux/if_packet.h
 include/linux/if_packet.h |    7 +++++--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 7b31863..86f06e4 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -26,9 +26,12 @@ struct sockaddr_ll {
 #define PACKET_MULTICAST	2		/* To group		*/
 #define PACKET_OTHERHOST	3		/* To someone else 	*/
 #define PACKET_OUTGOING		4		/* Outgoing of any type */
-/* These ones are invisible by user level */
+/* This one is invisible by user level */
 #define PACKET_LOOPBACK		5		/* MC/BRD frame looped back */
-#define PACKET_FASTROUTE	6		/* Fastrouted frame	*/
+#define PACKET_FASTROUTE	6		/* Fastrouted frame, not used */
+#define PACKET_INVALID		6		/* Packet has errors, perhaps
+						 * received with bad FCS
+						 */
 
 /* Packet socket options */
 
-- 
1.7.3.4


^ permalink raw reply related

* [RFC 1/5] net:  Support ethtool ops for rx of errored frames.
From: greearb @ 2011-06-18  0:09 UTC (permalink / raw)
  To: netdev; +Cc: Ben Greear
In-Reply-To: <1308355783-22407-1-git-send-email-greearb@candelatech.com>

From: Ben Greear <greearb@candelatech.com>

This can be useful when sniffing dodgy networks.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
:100644 100644 675ddc0... fb0fac9... M	include/linux/ethtool.h
:100644 100644 0e01860... 443a63c... M	net/core/ethtool.c
 include/linux/ethtool.h |    7 +++++++
 net/core/ethtool.c      |    8 ++++++++
 2 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 675ddc0..fb0fac9 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -880,6 +880,9 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  * @set_save_rxfcs:  Set flag to save (1), or discard (0), the Ethernet
  *	Frame Checksum for received packets.
  * @get_save_rxfcs:  Get current value for Save RX-FCS flag.
+ * @set_save_rxerr:  Set flag to save (1), or discard (0), received Ethernet
+ *	frames with errors (bad checksum, etc)
+ * @get_save_rxerr:  Get current value for Save RX-ERR flag.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -960,6 +963,8 @@ struct ethtool_ops {
 	int	(*set_dump)(struct net_device *, struct ethtool_dump *);
 	int	(*set_save_rxfcs)(struct net_device *, u32);
 	u32	(*get_save_rxfcs)(struct net_device *);
+	int	(*set_save_rxerr)(struct net_device *, u32);
+	u32	(*get_save_rxerr)(struct net_device *);
 
 };
 #endif /* __KERNEL__ */
@@ -1036,6 +1041,8 @@ struct ethtool_ops {
 #define ETHTOOL_GET_DUMP_DATA	0x00000040 /* Get dump data */
 #define ETHTOOL_GET_SAVE_RXFCS	0x00000041 /* Get RX Save Frame Checksum */
 #define ETHTOOL_SET_SAVE_RXFCS	0x00000042 /* Set RX Save Frame Checksum */
+#define ETHTOOL_GET_SAVE_RXERR	0x00000043 /* Get RX Save Errored Frames */
+#define ETHTOOL_SET_SAVE_RXERR	0x00000044 /* Set RX Save Errored Frames */
 
 
 /* compatibility with older code */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0e01860..443a63c 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -2160,6 +2160,14 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		rc =  ethtool_get_value(dev, useraddr, ethcmd,
 					dev->ethtool_ops->get_save_rxfcs);
 		break;
+	case ETHTOOL_SET_SAVE_RXERR:
+		rc = ethtool_set_value(dev, useraddr,
+				       dev->ethtool_ops->set_save_rxerr);
+		break;
+	case ETHTOOL_GET_SAVE_RXERR:
+		rc =  ethtool_get_value(dev, useraddr, ethcmd,
+					dev->ethtool_ops->get_save_rxerr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
1.7.3.4


^ permalink raw reply related

* [RFC 0/5] Ethernet low-level frame debugging support.
From: greearb @ 2011-06-18  0:09 UTC (permalink / raw)
  To: netdev; +Cc: Ben Greear

From: Ben Greear <greearb@candelatech.com>

This builds on the previous patches to allow receiving the
Ethernet FCS.

These patches let one receive errored frames up the stack
and generate frames with customized FCS.  This allows one
to generate and receive frames with bad CRC for testing
and sniffing purposes.

I'll finish send/rcv support for e100,e1000, and e1000e
if these patches are acceptable.

Ben Greear (5):
  net:  Support ethtool ops for rx of errored frames.
  net:  Add pkt-type PACKET_INVALID
  e100:  Support receiving errored frames.
  net:  Support sending frame with specified FCS.
  e1000e:  Support sending frame with custom FCS.

 drivers/net/e100.c           |   42 ++++++++++++++++++++++++++++++++++++++++--
 drivers/net/e1000e/netdev.c  |   11 +++++++++++
 include/asm-generic/socket.h |    7 +++++++
 include/linux/ethtool.h      |    7 +++++++
 include/linux/if_packet.h    |    7 +++++--
 include/linux/skbuff.h       |    5 ++++-
 include/net/sock.h           |    6 ++++++
 net/core/ethtool.c           |    8 ++++++++
 net/core/skbuff.c            |    1 +
 net/core/sock.c              |    7 +++++++
 net/packet/af_packet.c       |   22 ++++++++++++++++++++--
 11 files changed, 116 insertions(+), 7 deletions(-)

-- 
1.7.3.4

^ permalink raw reply

* Re: what's causing "ip_rt_bug"?
From: Julian Anastasov @ 2011-06-17 23:56 UTC (permalink / raw)
  To: Tomasz Chmielewski; +Cc: Eric Dumazet, netdev
In-Reply-To: <4DFBC92A.2030901@wpkg.org>


	Hello,

On Fri, 17 Jun 2011, Tomasz Chmielewski wrote:

> > What your routing table looks like ? (ip ro)
> 
> It's just a proxy, no special routing set:

	Is transparent proxy used?

> # ip ro
> 58.185.117.18 via 119.46.110.193 dev eth0 
> 119.46.240.13 via 119.46.110.193 dev eth0 
> 58.185.117.29 via 119.46.110.193 dev eth0 
> 119.46.241.13 via 119.46.110.193 dev eth0 

	Same route for 58.185.117.28 2nd time? Is that possible?:

> 58.185.117.28 via 119.46.110.193 dev eth0 
> 119.46.110.192/26 dev eth0  proto kernel  scope link  src 119.46.110.197 
> 169.254.0.0/16 dev eth0  scope link 
> default via 119.46.110.195 dev eth0 
> 
> 
> The box is also crashing every few days; and I really had no clue why (just connected a serial console to catch any new oops/panic).
> 
> 
> The last time it crashed, I have this entry in syslog:
> 
> Jun 17 16:16:17 TRUE-SC02 kernel: [172488.602629] ip_rt_bug: 124.121.155.197 -> 119.46.110.197, ?

	The ip_rt_bug messages show that skb->dev is
NULL (OUTPUT hook), daddr in IP header is local address,
may be some original received packet. If such packet is
provided to ip_route_me_harder(skb, RTN_UNSPEC) an
ip_route_input call can happen. Calling later dst_output
should lead to this warning. The question is what can
cause received packet to appear in OUTPUT hook where
a change in mark or TOS can can trigger such ip_route_input
call. What kind of netfilter modules are used? nf_queue,
-j REJECT, NAT? Is 124.121.155.197 a local address?

> Jun 17 16:17:00 TRUE-SC02 kernel: [172531.239041] BUG: unable to handle kernel NULL pointer dereference at (null)
> Jun 17 16:17:00 TRUE-SC02 kernel: [172531.239409] IP: [<ffffffff81361cae>] dev_queue_xmit+0x1e3/0x441
> Jun 17 16:17:00 TRUE-SC02 kernel: [172531.239760] PGD 43c30b067 PUD 439e63067 PMD 0
> Jun 17 16:17:00 TRUE-SC02 kernel: [172531.240103] Oops: 0000 [#1] SMP
> Jun 17 16:19:58 TRUE-SC02 syslogd 1.4.1: restart.

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply

* [PATCH 2/2] proc: Usable inode numbers for the namespace file descriptors.
From: Eric W. Biederman @ 2011-06-17 23:33 UTC (permalink / raw)
  To: Linux Containers
  Cc: Alexey Dobriyan, netdev, David Lamparter, linux-kernel,
	Serge E. Hallyn
In-Reply-To: <m1wrgkavbm.fsf_-_@fess.ebiederm.org>


Assign a unique proc inode to each namespace, yielding an
identifier that userspace can use for identifying a namespace.

This has been a long requested feature and only blocked because
a naive implementation would put the id in a global space and
would ultimately require having a namespace for the names of
namespaces, making migration and certain virtualization tricks
impossible.

We still don't have per superblock inode numbers for proc, which
appears necessary for application unaware checkpoint/restart and
migrations (if the application is using namespace filedescriptors)
but that is now allowd by the design if it becomes important.

I have preallocated the ipc and uts initial proc inode numbers so
their structures can be statically initialized.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/namespaces.c          |    1 +
 include/linux/ipc_namespace.h |    2 ++
 include/linux/proc_fs.h       |    4 ++++
 include/linux/utsname.h       |    1 +
 include/net/net_namespace.h   |    2 ++
 init/version.c                |    2 ++
 ipc/msgutil.c                 |    2 ++
 ipc/namespace.c               |   16 ++++++++++++++++
 kernel/utsname.c              |   17 ++++++++++++++++-
 net/core/net_namespace.c      |   24 ++++++++++++++++++++++++
 10 files changed, 70 insertions(+), 1 deletions(-)

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index be177f7..ddc2bb4 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -54,6 +54,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
 	ei->ns_ops    = ns_ops;
 	ei->ns	      = ns;
 
+	inode->i_ino = ns_ops->inum(ei->ns);
 	dentry->d_op = &pid_dentry_operations;
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index a6d1655..22a4dc4 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -60,6 +60,8 @@ struct ipc_namespace {
 
 	/* user_ns which owns the ipc ns */
 	struct user_namespace *user_ns;
+
+	unsigned int	proc_inum;
 };
 
 extern struct ipc_namespace init_ipc_ns;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 3067b44..1aee7f0 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -29,8 +29,11 @@ struct mm_struct;
 
 enum {
 	PROC_ROOT_INO = 1,
+	PROC_IPC_INIT_INO = 2,
+	PROC_UTS_INIT_INO = 3,
 };
 
+
 /*
  * This is not completely implemented yet. The idea is to
  * create an in-memory tree (like the actual /proc filesystem
@@ -257,6 +260,7 @@ struct proc_ns_operations {
 	void *(*get)(struct task_struct *task);
 	void (*put)(void *ns);
 	int (*install)(struct nsproxy *nsproxy, void *ns);
+	unsigned int (*inum)(void *ns);
 };
 extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 4e5b021..03db764 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -44,6 +44,7 @@ struct uts_namespace {
 	struct kref kref;
 	struct new_utsname name;
 	struct user_namespace *user_ns;
+	unsigned int proc_inum;
 };
 extern struct uts_namespace init_uts_ns;
 
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 2bf9ed9..4b85be2 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -49,6 +49,8 @@ struct net {
 	struct list_head	cleanup_list;	/* namespaces on death row */
 	struct list_head	exit_list;	/* Use only net_mutex */
 
+	unsigned int		proc_inum;
+
 	struct proc_dir_entry 	*proc_net;
 	struct proc_dir_entry 	*proc_net_stat;
 
diff --git a/init/version.c b/init/version.c
index 86fe0cc..58170f1 100644
--- a/init/version.c
+++ b/init/version.c
@@ -12,6 +12,7 @@
 #include <linux/utsname.h>
 #include <generated/utsrelease.h>
 #include <linux/version.h>
+#include <linux/proc_fs.h>
 
 #ifndef CONFIG_KALLSYMS
 #define version(a) Version_ ## a
@@ -34,6 +35,7 @@ struct uts_namespace init_uts_ns = {
 		.domainname	= UTS_DOMAINNAME,
 	},
 	.user_ns = &init_user_ns,
+	.proc_inum = PROC_UTS_INIT_INO,
 };
 EXPORT_SYMBOL_GPL(init_uts_ns);
 
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 8b5ce5d..f7da485 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/ipc.h>
 #include <linux/ipc_namespace.h>
+#include <linux/proc_fs.h>
 #include <asm/uaccess.h>
 
 #include "util.h"
@@ -33,6 +34,7 @@ struct ipc_namespace init_ipc_ns = {
 	.mq_msgsize_max  = DFLT_MSGSIZEMAX,
 #endif
 	.user_ns = &init_user_ns,
+	.proc_inum = PROC_IPC_INIT_INO,
 };
 
 atomic_t nr_ipc_ns = ATOMIC_INIT(1);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index ce0a647..cd7f733 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -26,9 +26,16 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
 	if (ns == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	err = proc_alloc_inum(&ns->proc_inum);
+	if (err) {
+		kfree(ns);
+		return ERR_PTR(err);
+	}
+
 	atomic_set(&ns->count, 1);
 	err = mq_init_ns(ns);
 	if (err) {
+		proc_free_inum(ns->proc_inum);
 		kfree(ns);
 		return ERR_PTR(err);
 	}
@@ -113,6 +120,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
 	 */
 	ipcns_notify(IPCNS_REMOVED);
 	put_user_ns(ns->user_ns);
+	proc_free_inum(ns->proc_inum);
 	kfree(ns);
 }
 
@@ -170,10 +178,18 @@ static int ipcns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
+static unsigned int ipcns_inum(void *vp)
+{
+	struct ipc_namespace *ns = vp;
+
+	return ns->proc_inum;
+}
+
 const struct proc_ns_operations ipcns_operations = {
 	.name		= "ipc",
 	.type		= CLONE_NEWIPC,
 	.get		= ipcns_get,
 	.put		= ipcns_put,
 	.install	= ipcns_install,
+	.inum		= ipcns_inum,
 };
diff --git a/kernel/utsname.c b/kernel/utsname.c
index bff131b..3ab6a08 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -36,11 +36,18 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
 					  struct uts_namespace *old_ns)
 {
 	struct uts_namespace *ns;
+	int err;
 
 	ns = create_uts_ns();
 	if (!ns)
 		return ERR_PTR(-ENOMEM);
 
+	err = proc_alloc_inum(&ns->proc_inum);
+	if (err) {
+		kfree(ns);
+		return ERR_PTR(err);
+	}
+
 	down_read(&uts_sem);
 	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
 	ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
@@ -78,6 +85,7 @@ void free_uts_ns(struct kref *kref)
 
 	ns = container_of(kref, struct uts_namespace, kref);
 	put_user_ns(ns->user_ns);
+	proc_free_inum(ns->proc_inum);
 	kfree(ns);
 }
 
@@ -110,11 +118,18 @@ static int utsns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
+static unsigned int utsns_inum(void *vp)
+{
+	struct uts_namespace *ns = vp;
+
+	return ns->proc_inum;
+}
+
 const struct proc_ns_operations utsns_operations = {
 	.name		= "uts",
 	.type		= CLONE_NEWUTS,
 	.get		= utsns_get,
 	.put		= utsns_put,
 	.install	= utsns_install,
+	.inum		= utsns_inum,
 };
-
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index e41e511..6199ec2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -358,6 +358,21 @@ struct net *get_net_ns_by_pid(pid_t pid)
 }
 EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 
+static __net_init int net_ns_net_init(struct net *net)
+{
+	return proc_alloc_inum(&net->proc_inum);
+}
+
+static __net_exit void net_ns_net_exit(struct net *net)
+{
+	proc_free_inum(net->proc_inum);
+}
+
+static struct pernet_operations __net_initdata net_ns_ops = {
+	.init = net_ns_net_init,
+	.exit = net_ns_net_exit,		
+};
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
@@ -389,6 +404,8 @@ static int __init net_ns_init(void)
 
 	mutex_unlock(&net_mutex);
 
+	register_pernet_subsys(&net_ns_ops);
+
 	return 0;
 }
 
@@ -616,11 +633,18 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
+static unsigned int netns_inum(void *ns)
+{
+	struct net *net = ns;
+	return net->proc_inum;
+}
+
 const struct proc_ns_operations netns_operations = {
 	.name		= "net",
 	.type		= CLONE_NEWNET,
 	.get		= netns_get,
 	.put		= netns_put,
 	.install	= netns_install,
+	.inum		= netns_inum,
 };
 #endif
-- 
1.7.5.1.217.g4e3aa

^ permalink raw reply related

* [PATCH 1/2] proc: Generalize proc inode allocation
From: Eric W. Biederman @ 2011-06-17 23:31 UTC (permalink / raw)
  To: Linux Containers
  Cc: Alexey Dobriyan, netdev, David Lamparter, linux-kernel,
	Serge E. Hallyn
In-Reply-To: <20110523014751.GB2351982@jupiter.n2.diac24.net>


Generalize the proc inode allocation so that it can be
used without having to having to create a proc_dir_entry.

This will allow namespace file descriptors to remain light
weight entitities but still have the same inode number
when the backing namespace is the same.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---

Baring problems in review I plan to merge these patches
via my linux-2.6-nsfd tree.

 fs/proc/generic.c       |   26 +++++++++++++-------------
 include/linux/proc_fs.h |   10 ++++++++++
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f1637f1..65416a1 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
  * Return an inode number between PROC_DYNAMIC_FIRST and
  * 0xffffffff, or zero on failure.
  */
-static unsigned int get_inode_number(void)
+int proc_alloc_inum(unsigned int *inum)
 {
 	unsigned int i;
 	int error;
 
 retry:
-	if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
-		return 0;
+	if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
+		return -ENOMEM;
 
 	spin_lock(&proc_inum_lock);
 	error = ida_get_new(&proc_inum_ida, &i);
@@ -365,18 +365,19 @@ retry:
 	if (error == -EAGAIN)
 		goto retry;
 	else if (error)
-		return 0;
+		return error;
 
 	if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
 		spin_lock(&proc_inum_lock);
 		ida_remove(&proc_inum_ida, i);
 		spin_unlock(&proc_inum_lock);
-		return 0;
+		return -ENOSPC;
 	}
-	return PROC_DYNAMIC_FIRST + i;
+	*inum = PROC_DYNAMIC_FIRST + i;
+	return 0;
 }
 
-static void release_inode_number(unsigned int inum)
+void proc_free_inum(unsigned int inum)
 {
 	spin_lock(&proc_inum_lock);
 	ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
@@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = {
 
 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
 {
-	unsigned int i;
 	struct proc_dir_entry *tmp;
+	int ret;
 	
-	i = get_inode_number();
-	if (i == 0)
-		return -EAGAIN;
-	dp->low_ino = i;
+	ret = proc_alloc_inum(&dp->low_ino);
+	if (ret)
+		return ret;
 
 	if (S_ISDIR(dp->mode)) {
 		if (dp->proc_iops == NULL) {
@@ -766,7 +766,7 @@ EXPORT_SYMBOL(proc_create_data);
 
 static void free_proc_entry(struct proc_dir_entry *de)
 {
-	release_inode_number(de->low_ino);
+	proc_free_inum(de->low_ino);
 
 	if (S_ISLNK(de->mode))
 		kfree(de->data);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index e7576cf..3067b44 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -175,6 +175,8 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
 
 extern struct file *proc_ns_fget(int fd);
 
+extern int proc_alloc_inum(unsigned int *pino);
+extern void proc_free_inum(unsigned int inum);
 #else
 
 #define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
@@ -229,6 +231,14 @@ static inline struct file *proc_ns_fget(int fd)
 	return ERR_PTR(-EINVAL);
 }
 
+static inline int proc_alloc_inum(unsigned int *inum)
+{
+	*inum = 1;
+	return 0;
+}
+static inline void proc_free_inum(unsigned int inum)
+{
+}
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
-- 
1.7.5.1.217.g4e3aa


^ permalink raw reply related

* [PATCH #2 1/4] r8169: move the firmware down into the device private data.
From: Francois Romieu @ 2011-06-17 22:41 UTC (permalink / raw)
  To: davem; +Cc: netdev, Realtek linux nic maintainers, Hayes Wang, Ben Hutchings
In-Reply-To: <20110617224001.GA2483@electric-eye.fr.zoreil.com>

No functional difference.

Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
---
 drivers/net/r8169.c |   74 +++++++++++++++++++++++++++++++++------------------
 1 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 7310824..3eeefe4 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -667,7 +667,9 @@ struct rtl8169_private {
 	struct rtl8169_counters counters;
 	u32 saved_wolopts;
 
-	const struct firmware *fw;
+	struct rtl_fw {
+		const struct firmware *fw;
+	} *rtl_fw;
 #define RTL_FIRMWARE_UNKNOWN	ERR_PTR(-EAGAIN);
 };
 
@@ -1222,11 +1224,12 @@ static void rtl8169_get_drvinfo(struct net_device *dev,
 				struct ethtool_drvinfo *info)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl_fw *rtl_fw = tp->rtl_fw;
 
 	strcpy(info->driver, MODULENAME);
 	strcpy(info->version, RTL8169_VERSION);
 	strcpy(info->bus_info, pci_name(tp->pci_dev));
-	strncpy(info->fw_version, IS_ERR_OR_NULL(tp->fw) ? "N/A" :
+	strncpy(info->fw_version, IS_ERR_OR_NULL(rtl_fw) ? "N/A" :
 		rtl_lookup_firmware_name(tp), sizeof(info->fw_version) - 1);
 }
 
@@ -1741,9 +1744,9 @@ static void rtl_writephy_batch(struct rtl8169_private *tp,
 #define PHY_DELAY_MS		0xe0000000
 #define PHY_WRITE_ERI_WORD	0xf0000000
 
-static void
-rtl_phy_write_fw(struct rtl8169_private *tp, const struct firmware *fw)
+static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
 {
+	const struct firmware *fw = rtl_fw->fw;
 	__le32 *phytable = (__le32 *)fw->data;
 	struct net_device *dev = tp->dev;
 	size_t index, fw_size = fw->size / sizeof(*phytable);
@@ -1879,18 +1882,20 @@ rtl_phy_write_fw(struct rtl8169_private *tp, const struct firmware *fw)
 
 static void rtl_release_firmware(struct rtl8169_private *tp)
 {
-	if (!IS_ERR_OR_NULL(tp->fw))
-		release_firmware(tp->fw);
-	tp->fw = RTL_FIRMWARE_UNKNOWN;
+	if (!IS_ERR_OR_NULL(tp->rtl_fw)) {
+		release_firmware(tp->rtl_fw->fw);
+		kfree(tp->rtl_fw);
+	}
+	tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
 }
 
 static void rtl_apply_firmware(struct rtl8169_private *tp)
 {
-	const struct firmware *fw = tp->fw;
+	struct rtl_fw *rtl_fw = tp->rtl_fw;
 
 	/* TODO: release firmware once rtl_phy_write_fw signals failures. */
-	if (!IS_ERR_OR_NULL(fw))
-		rtl_phy_write_fw(tp, fw);
+	if (!IS_ERR_OR_NULL(rtl_fw))
+		rtl_phy_write_fw(tp, rtl_fw);
 }
 
 static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
@@ -3443,7 +3448,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->timer.data = (unsigned long) dev;
 	tp->timer.function = rtl8169_phy_timer;
 
-	tp->fw = RTL_FIRMWARE_UNKNOWN;
+	tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
 
 	rc = register_netdev(dev);
 	if (rc < 0)
@@ -3512,25 +3517,42 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
 }
 
-static void rtl_request_firmware(struct rtl8169_private *tp)
+static void rtl_request_uncached_firmware(struct rtl8169_private *tp)
 {
-	/* Return early if the firmware is already loaded / cached. */
-	if (IS_ERR(tp->fw)) {
-		const char *name;
+	struct rtl_fw *rtl_fw;
+	const char *name;
+	int rc = -ENOMEM;
 
-		name = rtl_lookup_firmware_name(tp);
-		if (name) {
-			int rc;
+	name = rtl_lookup_firmware_name(tp);
+	if (!name)
+		goto out_no_firmware;
 
-			rc = request_firmware(&tp->fw, name, &tp->pci_dev->dev);
-			if (rc >= 0)
-				return;
+	rtl_fw = kzalloc(sizeof(*rtl_fw), GFP_KERNEL);
+	if (!rtl_fw)
+		goto err_warn;
 
-			netif_warn(tp, ifup, tp->dev, "unable to load "
-				"firmware patch %s (%d)\n", name, rc);
-		}
-		tp->fw = NULL;
-	}
+	rc = request_firmware(&rtl_fw->fw, name, &tp->pci_dev->dev);
+	if (rc < 0)
+		goto err_free;
+
+	tp->rtl_fw = rtl_fw;
+out:
+	return;
+
+err_free:
+	kfree(rtl_fw);
+err_warn:
+	netif_warn(tp, ifup, tp->dev, "unable to load firmware patch %s (%d)\n",
+		   name, rc);
+out_no_firmware:
+	tp->rtl_fw = NULL;
+	goto out;
+}
+
+static void rtl_request_firmware(struct rtl8169_private *tp)
+{
+	if (IS_ERR(tp->rtl_fw))
+		rtl_request_uncached_firmware(tp);
 }
 
 static int rtl8169_open(struct net_device *dev)
-- 
1.7.4.4


^ permalink raw reply related

* [PATCH #2 net-next 0/4] Pull request for 'davem-next.r8169' branch
From: Francois Romieu @ 2011-06-17 22:40 UTC (permalink / raw)
  To: davem; +Cc: netdev, Realtek linux nic maintainers, Hayes Wang, Ben Hutchings

Please pull from branch 'davem-next.r8169.hayes' in repository

git://git.kernel.org/pub/scm/linux/kernel/git/romieu/netdev-2.6.git davem-next.r8169

to get the changes below.

Since changeset #1 :
- fixes and suggestions from Ben Hutchings

ethtool displays no firmware information after patch #2/4 when the
firmware is invalid. Patch #3/4 brings it back to "N/A".

Distance from 'davem-next' (c4dc4d108ace27cc0c594b67bd6bd945deaac8c2)
---------------------------------------------------------------------

fc20b0849d416b981a1d504de01be5e3f7baff53
8914e0bfb789ca9af83cb250236e8b2583568ae4
af59605269f8ef6b5df5a8f8d30e21318ba9333a
b6ffd97f5bcfd12df88ece6bc6df7a761ac5a049

Diffstat
--------

 drivers/net/r8169.c |  217 +++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 167 insertions(+), 50 deletions(-)

Shortlog
--------

Francois Romieu (3):
      r8169: move the firmware down into the device private data.
      r8169: explicit firmware format check.
      r8169: check firmware content sooner.

Hayes Wang (1):
      r8169: support new firmware format.

Patch
-----

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 7310824..bc5bb37 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -667,7 +667,18 @@ struct rtl8169_private {
 	struct rtl8169_counters counters;
 	u32 saved_wolopts;
 
-	const struct firmware *fw;
+	struct rtl_fw {
+		const struct firmware *fw;
+
+#define RTL_VER_SIZE		32
+
+		char version[RTL_VER_SIZE];
+
+		struct rtl_fw_phy_action {
+			__le32 *code;
+			size_t size;
+		} phy_action;
+	} *rtl_fw;
 #define RTL_FIRMWARE_UNKNOWN	ERR_PTR(-EAGAIN);
 };
 
@@ -1222,12 +1233,14 @@ static void rtl8169_get_drvinfo(struct net_device *dev,
 				struct ethtool_drvinfo *info)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl_fw *rtl_fw = tp->rtl_fw;
 
 	strcpy(info->driver, MODULENAME);
 	strcpy(info->version, RTL8169_VERSION);
 	strcpy(info->bus_info, pci_name(tp->pci_dev));
-	strncpy(info->fw_version, IS_ERR_OR_NULL(tp->fw) ? "N/A" :
-		rtl_lookup_firmware_name(tp), sizeof(info->fw_version) - 1);
+	BUILD_BUG_ON(sizeof(info->fw_version) < sizeof(rtl_fw->version));
+	strcpy(info->fw_version, IS_ERR_OR_NULL(rtl_fw) ? "N/A" :
+	       rtl_fw->version);
 }
 
 static int rtl8169_get_regs_len(struct net_device *dev)
@@ -1741,21 +1754,75 @@ static void rtl_writephy_batch(struct rtl8169_private *tp,
 #define PHY_DELAY_MS		0xe0000000
 #define PHY_WRITE_ERI_WORD	0xf0000000
 
-static void
-rtl_phy_write_fw(struct rtl8169_private *tp, const struct firmware *fw)
+struct fw_info {
+	u32	magic;
+	char	version[RTL_VER_SIZE];
+	__le32	fw_start;
+	__le32	fw_len;
+	u8	chksum;
+} __packed;
+
+#define FW_OPCODE_SIZE	sizeof(typeof(*((struct rtl_fw_phy_action *)0)->code))
+
+static bool rtl_fw_format_ok(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
 {
-	__le32 *phytable = (__le32 *)fw->data;
-	struct net_device *dev = tp->dev;
-	size_t index, fw_size = fw->size / sizeof(*phytable);
-	u32 predata, count;
+	const struct firmware *fw = rtl_fw->fw;
+	struct fw_info *fw_info = (struct fw_info *)fw->data;
+	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
+	char *version = rtl_fw->version;
+	bool rc = false;
 
-	if (fw->size % sizeof(*phytable)) {
-		netif_err(tp, probe, dev, "odd sized firmware %zd\n", fw->size);
-		return;
+	if (fw->size < FW_OPCODE_SIZE)
+		goto out;
+
+	if (!fw_info->magic) {
+		size_t i, size, start;
+		u8 checksum = 0;
+
+		if (fw->size < sizeof(*fw_info))
+			goto out;
+
+		for (i = 0; i < fw->size; i++)
+			checksum += fw->data[i];
+		if (checksum != 0)
+			goto out;
+
+		start = le32_to_cpu(fw_info->fw_start);
+		if (start > fw->size)
+			goto out;
+
+		size = le32_to_cpu(fw_info->fw_len);
+		if (size > (fw->size - start) / FW_OPCODE_SIZE)
+			goto out;
+
+		memcpy(version, fw_info->version, RTL_VER_SIZE);
+
+		pa->code = (__le32 *)(fw->data + start);
+		pa->size = size;
+	} else {
+		if (fw->size % FW_OPCODE_SIZE)
+			goto out;
+
+		strlcpy(version, rtl_lookup_firmware_name(tp), RTL_VER_SIZE);
+
+		pa->code = (__le32 *)fw->data;
+		pa->size = fw->size / FW_OPCODE_SIZE;
 	}
+	version[RTL_VER_SIZE - 1] = 0;
 
-	for (index = 0; index < fw_size; index++) {
-		u32 action = le32_to_cpu(phytable[index]);
+	rc = true;
+out:
+	return rc;
+}
+
+static bool rtl_fw_data_ok(struct rtl8169_private *tp, struct net_device *dev,
+			   struct rtl_fw_phy_action *pa)
+{
+	bool rc = false;
+	size_t index;
+
+	for (index = 0; index < pa->size; index++) {
+		u32 action = le32_to_cpu(pa->code[index]);
 		u32 regno = (action & 0x0fff0000) >> 16;
 
 		switch(action & 0xf0000000) {
@@ -1771,25 +1838,25 @@ rtl_phy_write_fw(struct rtl8169_private *tp, const struct firmware *fw)
 
 		case PHY_BJMPN:
 			if (regno > index) {
-				netif_err(tp, probe, tp->dev,
+				netif_err(tp, ifup, tp->dev,
 					  "Out of range of firmware\n");
-				return;
+				goto out;
 			}
 			break;
 		case PHY_READCOUNT_EQ_SKIP:
-			if (index + 2 >= fw_size) {
-				netif_err(tp, probe, tp->dev,
+			if (index + 2 >= pa->size) {
+				netif_err(tp, ifup, tp->dev,
 					  "Out of range of firmware\n");
-				return;
+				goto out;
 			}
 			break;
 		case PHY_COMP_EQ_SKIPN:
 		case PHY_COMP_NEQ_SKIPN:
 		case PHY_SKIPN:
-			if (index + 1 + regno >= fw_size) {
-				netif_err(tp, probe, tp->dev,
+			if (index + 1 + regno >= pa->size) {
+				netif_err(tp, ifup, tp->dev,
 					  "Out of range of firmware\n");
-				return;
+				goto out;
 			}
 			break;
 
@@ -1797,17 +1864,42 @@ rtl_phy_write_fw(struct rtl8169_private *tp, const struct firmware *fw)
 		case PHY_WRITE_MAC_BYTE:
 		case PHY_WRITE_ERI_WORD:
 		default:
-			netif_err(tp, probe, tp->dev,
+			netif_err(tp, ifup, tp->dev,
 				  "Invalid action 0x%08x\n", action);
-			return;
+			goto out;
 		}
 	}
+	rc = true;
+out:
+	return rc;
+}
+
+static int rtl_check_firmware(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
+{
+	struct net_device *dev = tp->dev;
+	int rc = -EINVAL;
+
+	if (!rtl_fw_format_ok(tp, rtl_fw)) {
+		netif_err(tp, ifup, dev, "invalid firwmare\n");
+		goto out;
+	}
+
+	if (rtl_fw_data_ok(tp, dev, &rtl_fw->phy_action))
+		rc = 0;
+out:
+	return rc;
+}
+
+static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
+{
+	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
+	u32 predata, count;
+	size_t index;
 
-	predata = 0;
-	count = 0;
+	predata = count = 0;
 
-	for (index = 0; index < fw_size; ) {
-		u32 action = le32_to_cpu(phytable[index]);
+	for (index = 0; index < pa->size; ) {
+		u32 action = le32_to_cpu(pa->code[index]);
 		u32 data = action & 0x0000ffff;
 		u32 regno = (action & 0x0fff0000) >> 16;
 
@@ -1879,18 +1971,20 @@ rtl_phy_write_fw(struct rtl8169_private *tp, const struct firmware *fw)
 
 static void rtl_release_firmware(struct rtl8169_private *tp)
 {
-	if (!IS_ERR_OR_NULL(tp->fw))
-		release_firmware(tp->fw);
-	tp->fw = RTL_FIRMWARE_UNKNOWN;
+	if (!IS_ERR_OR_NULL(tp->rtl_fw)) {
+		release_firmware(tp->rtl_fw->fw);
+		kfree(tp->rtl_fw);
+	}
+	tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
 }
 
 static void rtl_apply_firmware(struct rtl8169_private *tp)
 {
-	const struct firmware *fw = tp->fw;
+	struct rtl_fw *rtl_fw = tp->rtl_fw;
 
 	/* TODO: release firmware once rtl_phy_write_fw signals failures. */
-	if (!IS_ERR_OR_NULL(fw))
-		rtl_phy_write_fw(tp, fw);
+	if (!IS_ERR_OR_NULL(rtl_fw))
+		rtl_phy_write_fw(tp, rtl_fw);
 }
 
 static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
@@ -3443,7 +3537,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->timer.data = (unsigned long) dev;
 	tp->timer.function = rtl8169_phy_timer;
 
-	tp->fw = RTL_FIRMWARE_UNKNOWN;
+	tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
 
 	rc = register_netdev(dev);
 	if (rc < 0)
@@ -3512,25 +3606,48 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
 }
 
-static void rtl_request_firmware(struct rtl8169_private *tp)
+static void rtl_request_uncached_firmware(struct rtl8169_private *tp)
 {
-	/* Return early if the firmware is already loaded / cached. */
-	if (IS_ERR(tp->fw)) {
-		const char *name;
+	struct rtl_fw *rtl_fw;
+	const char *name;
+	int rc = -ENOMEM;
 
-		name = rtl_lookup_firmware_name(tp);
-		if (name) {
-			int rc;
+	name = rtl_lookup_firmware_name(tp);
+	if (!name)
+		goto out_no_firmware;
 
-			rc = request_firmware(&tp->fw, name, &tp->pci_dev->dev);
-			if (rc >= 0)
-				return;
+	rtl_fw = kzalloc(sizeof(*rtl_fw), GFP_KERNEL);
+	if (!rtl_fw)
+		goto err_warn;
 
-			netif_warn(tp, ifup, tp->dev, "unable to load "
-				"firmware patch %s (%d)\n", name, rc);
-		}
-		tp->fw = NULL;
-	}
+	rc = request_firmware(&rtl_fw->fw, name, &tp->pci_dev->dev);
+	if (rc < 0)
+		goto err_free;
+
+	rc = rtl_check_firmware(tp, rtl_fw);
+	if (rc < 0)
+		goto err_release_firmware;
+
+	tp->rtl_fw = rtl_fw;
+out:
+	return;
+
+err_release_firmware:
+	release_firmware(rtl_fw->fw);
+err_free:
+	kfree(rtl_fw);
+err_warn:
+	netif_warn(tp, ifup, tp->dev, "unable to load firmware patch %s (%d)\n",
+		   name, rc);
+out_no_firmware:
+	tp->rtl_fw = NULL;
+	goto out;
+}
+
+static void rtl_request_firmware(struct rtl8169_private *tp)
+{
+	if (IS_ERR(tp->rtl_fw))
+		rtl_request_uncached_firmware(tp);
 }
 
 static int rtl8169_open(struct net_device *dev)
-- 
Ueimor

^ permalink raw reply related

* Re: [PATCH net-next 2/4] r8169: explicit firmware format check.
From: Francois Romieu @ 2011-06-17 22:11 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: davem, netdev, Realtek linux nic maintainers, Hayes Wang,
	Ben Hutchings
In-Reply-To: <1308345200.2831.31.camel@bwh-desktop>

Ben Hutchings <bhutchings@solarflare.com> :
[...]
> > @@ -1230,7 +1239,7 @@ static void rtl8169_get_drvinfo(struct net_device *dev,
> >  	strcpy(info->version, RTL8169_VERSION);
> >  	strcpy(info->bus_info, pci_name(tp->pci_dev));
> >  	strncpy(info->fw_version, IS_ERR_OR_NULL(rtl_fw) ? "N/A" :
> > -		rtl_lookup_firmware_name(tp), sizeof(info->fw_version) - 1);
> > +		rtl_fw->version, RTL_VER_SIZE);
> 
> You appear to ensure that rtl_fw->version is properly terminated, so I
> would suggest:
> 
> 	BUILD_BUG_ON(sizeof(info->fw_version) < sizeof(rtl_fw->version));
> 	strcpy(info->fw_version, IS_ERR_OR_NULL(rtl_fw) ? "N/A" : rtl_fw->version);

Ok.

[...]
> > +	if (!(fw->size % FW_OPCODE_SIZE)) {
> > +		snprintf(rtl_fw->version, RTL_VER_SIZE, "%s",
> > +			 rtl_lookup_firmware_name(tp));
> 
> strlcpy()

Ok.

[...]
> This function is doing rather more than what its name suggests.  And it
> always returns true, so it doesn't actually do what its name suggests at
> all.

No idea for a short better name. Will fix the return part.

-- 
Ueimor

^ permalink raw reply

* [PATCH 2/2] core: add tracepoints for queueing skb to rcvbuf
From: Satoru Moriya @ 2011-06-17 22:00 UTC (permalink / raw)
  To: netdev@vger.kernel.org
  Cc: nhorman@tuxdriver.com, davem@davemloft.net,
	dle-develop@lists.sourceforge.net, Seiji Aguchi
In-Reply-To: <65795E11DBF1E645A09CEC7EAEE94B9C402B96E4@USINDEVS02.corp.hds.com>

This patch adds 2 tracepoints to get a status of a socket receive queue
and related parameter. 

One tracepoint is added to sock_queue_rcv_skb. It records rcvbuf size
and its usage. The other tracepoint is added to __sk_mem_schedule and
it records limitations of memory for sockets and current usage.

By using these tracepoints we're able to know detailed reason why kernel
drop the packet.

Signed-off-by: Satoru Moriya <satoru.moriya@hds.com>
---
 include/trace/events/sock.h |   68 +++++++++++++++++++++++++++++++++++++++++++
 net/core/net-traces.c       |    1 +
 net/core/sock.c             |    5 +++
 3 files changed, 74 insertions(+), 0 deletions(-)
 create mode 100644 include/trace/events/sock.h

diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
new file mode 100644
index 0000000..779abb9
--- /dev/null
+++ b/include/trace/events/sock.h
@@ -0,0 +1,68 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sock
+
+#if !defined(_TRACE_SOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SOCK_H
+
+#include <net/sock.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(sock_rcvqueue_full,
+
+	TP_PROTO(struct sock *sk, struct sk_buff *skb),
+
+	TP_ARGS(sk, skb),
+
+	TP_STRUCT__entry(
+		__field(int, rmem_alloc)
+		__field(unsigned int, truesize)
+		__field(int, sk_rcvbuf)
+	),
+
+	TP_fast_assign(
+		__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
+		__entry->truesize   = skb->truesize;
+		__entry->sk_rcvbuf  = sk->sk_rcvbuf;
+	),
+
+	TP_printk("rmem_alloc=%d truesize=%u sk_rcvbuf=%d",
+		__entry->rmem_alloc, __entry->truesize, __entry->sk_rcvbuf)
+);
+
+TRACE_EVENT(sock_exceed_buf_limit,
+
+	TP_PROTO(struct sock *sk, struct proto *prot, long allocated),
+
+	TP_ARGS(sk, prot, allocated),
+
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+		__field(long *, sysctl_mem)
+		__field(long, allocated)
+		__field(int, sysctl_rmem)
+		__field(int, rmem_alloc)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name, prot->name, 32);
+		__entry->sysctl_mem = prot->sysctl_mem;
+		__entry->allocated = allocated;
+		__entry->sysctl_rmem = prot->sysctl_rmem[0];
+		__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
+	),
+
+	TP_printk("proto:%s sysctl_mem=%ld,%ld,%ld allocated=%ld "
+		"sysctl_rmem=%d rmem_alloc=%d",
+		__entry->name,
+		__entry->sysctl_mem[0],
+		__entry->sysctl_mem[1],
+		__entry->sysctl_mem[2],
+		__entry->allocated,
+		__entry->sysctl_rmem,
+		__entry->rmem_alloc)
+);
+
+#endif /* _TRACE_SOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 13aab64..52380b1 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -28,6 +28,7 @@
 #include <trace/events/skb.h>
 #include <trace/events/net.h>
 #include <trace/events/napi.h>
+#include <trace/events/sock.h>
 #include <trace/events/udp.h>
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index 6e81978..76c4031 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -128,6 +128,8 @@
 
 #include <linux/filter.h>
 
+#include <trace/events/sock.h>
+
 #ifdef CONFIG_INET
 #include <net/tcp.h>
 #endif
@@ -292,6 +294,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
 	    (unsigned)sk->sk_rcvbuf) {
 		atomic_inc(&sk->sk_drops);
+		trace_sock_rcvqueue_full(sk, skb);
 		return -ENOMEM;
 	}
 
@@ -1736,6 +1739,8 @@ suppress_allocation:
 			return 1;
 	}
 
+	trace_sock_exceed_buf_limit(sk, prot, allocated);
+
 	/* Alas. Undo changes. */
 	sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
 	atomic_long_sub(amt, prot->memory_allocated);
-- 
1.7.1



^ permalink raw reply related

* [PATCH 1/2] udp:  add tracepoints for queueing skb to rcvbuf
From: Satoru Moriya @ 2011-06-17 21:58 UTC (permalink / raw)
  To: netdev@vger.kernel.org
  Cc: nhorman@tuxdriver.com, davem@davemloft.net,
	dle-develop@lists.sourceforge.net, Seiji Aguchi
In-Reply-To: <65795E11DBF1E645A09CEC7EAEE94B9C402B96E4@USINDEVS02.corp.hds.com>

This patch adds a tracepoint to __udp_queue_rcv_skb to get the
return value of ip_queue_rcv_skb. It indicates why kernel drops
a packet at this point.

ip_queue_rcv_skb returns following values in the packet drop case:

rcvbuf is full                 : -ENOMEM
sk_filter returns error        : -EINVAL, -EACCESS, -ENOMEM, etc.
__sk_mem_schedule returns error: -ENOBUF


Signed-off-by: Satoru Moriya <satoru.moriya@hds.com>
---
 include/trace/events/udp.h |   32 ++++++++++++++++++++++++++++++++
 net/core/net-traces.c      |    1 +
 net/ipv4/udp.c             |    2 ++
 3 files changed, 35 insertions(+), 0 deletions(-)
 create mode 100644 include/trace/events/udp.h

diff --git a/include/trace/events/udp.h b/include/trace/events/udp.h
new file mode 100644
index 0000000..a664bb9
--- /dev/null
+++ b/include/trace/events/udp.h
@@ -0,0 +1,32 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM udp
+
+#if !defined(_TRACE_UDP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_UDP_H
+
+#include <linux/udp.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(udp_fail_queue_rcv_skb,
+
+	TP_PROTO(int rc, struct sock *sk),
+
+	TP_ARGS(rc, sk),
+
+	TP_STRUCT__entry(
+		__field(int, rc)
+		__field(__u16, lport)
+	),
+
+	TP_fast_assign(
+		__entry->rc = rc;
+		__entry->lport = inet_sk(sk)->inet_num;
+	),
+
+	TP_printk("rc=%d port=%hu", __entry->rc, __entry->lport)
+);
+
+#endif /* _TRACE_UDP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 7f1bb2a..13aab64 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -28,6 +28,7 @@
 #include <trace/events/skb.h>
 #include <trace/events/net.h>
 #include <trace/events/napi.h>
+#include <trace/events/udp.h>
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index abca870..37aa9bf 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -105,6 +105,7 @@
 #include <net/route.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
+#include <trace/events/udp.h>
 #include "udp_impl.h"
 
 struct udp_table udp_table __read_mostly;
@@ -1363,6 +1364,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 					 is_udplite);
 		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 		kfree_skb(skb);
+		trace_udp_fail_queue_rcv_skb(rc, sk);
 		return -1;
 	}
 
-- 
1.7.1



^ permalink raw reply related

* [PATCH 0/2] Tracepoints for queueing skb to rcvbuf
From: Satoru Moriya @ 2011-06-17 21:56 UTC (permalink / raw)
  To: netdev@vger.kernel.org
  Cc: nhorman@tuxdriver.com, davem@davemloft.net,
	dle-develop@lists.sourceforge.net, Seiji Aguchi

Hi,

Kernel may drops packets when it queues them to socket receive buffer. 
Currently We can detect packet drop events and know when and where it
happened via kfree_skb tracepoint. But it's difficult to know a detailed
reason because there are some possibilities. 

In UDP case, core function for queueing skb to socket rcvbuf is 
__udp_queue_rcv_skb and its call chain is following:

__udp_queue_rcv_skb
 ip_queue_rcv_skb
  sock_queue_rcv_skb
   sk_rmem_schedule
    __sk_mem_schedule

We can catch a packet drop event in __udp_queue_rcv_skb and it means
ip_queue_rcv_skb/sock_queue_rcv_skb returned negative value.
In sock_queue_rcv_skb there are 3 possibilities-(*) where it returns
negative value but we can't separate them. Moreover sock_queue_rcv_skb calls
__sk_mem_schedule and there are several if satetements to decide whether
kernel should drop the packet.

To separate these reasons, this patchset adds 3 tracepoints.

1st one is added to __udp_queue_rcv_skb to get return value of
ip_queue_rcv_skb. Analyzing it we can separate above (*) (3 possibilities).

2nd and 3rd one are to get more detailed information. We can collect status 
of socket receive queue and related parameters(some of them are sysctl knob
e.g. /proc/sys/net/ipv4/udp_mem, etc. for UDP) and then we can tune kernel
behavior easily.

Any comments and feedback are welcome.

Satoru Moriya (2):
  udp: add tracepoint for queueing skb to rcvbuf
  core: add tracepoints for queueing skb to rcvbuf

 include/trace/events/sock.h |   68 +++++++++++++++++++++++++++++++++++++++++++
 include/trace/events/udp.h  |   32 ++++++++++++++++++++
 net/core/net-traces.c       |    2 +
 net/core/sock.c             |    5 +++
 net/ipv4/udp.c              |    2 +
 5 files changed, 109 insertions(+), 0 deletions(-)
 create mode 100644 include/trace/events/sock.h
 create mode 100644 include/trace/events/udp.h

^ permalink raw reply

* PATCH 0/2] Tracepoints for queueing skb to rcvbuf
From: Satoru Moriya @ 2011-06-17 21:54 UTC (permalink / raw)
  To: netdev@vger.kernel.org
  Cc: nhorman@tuxdriver.com, davem@davemloft.net,
	dle-develop@lists.sourceforge.net, Seiji Aguchi

Hi,

Kernel may drops packets when it queues them to socket receive buffer.
Currently We can detect packet drop events and know when and where it
happened via kfree_skb tracepoint. But it's difficult to know a detailed
reason because there are some possibilities.

In UDP case, core function for queueing skb to socket rcvbuf is
__udp_queue_rcv_skb and its call chain is following:

__udp_queue_rcv_skb
 ip_queue_rcv_skb
  sock_queue_rcv_skb
   sk_rmem_schedule
    __sk_mem_schedule

We can catch a packet drop event in __udp_queue_rcv_skb and it means
ip_queue_rcv_skb/sock_queue_rcv_skb returned negative value.
In sock_queue_rcv_skb there are 3 possibilities-(*) where it returns
negative value but we can't separate them. Moreover sock_queue_rcv_skb calls
__sk_mem_schedule and there are several if satetements to decide whether
kernel should drop the packet.

To separate these reasons, this patchset adds 3 tracepoints.

1st one is added to __udp_queue_rcv_skb to get return value of
ip_queue_rcv_skb. Analyzing it we can separate above (*) (3 possibilities).

2nd and 3rd one are to get more detailed information. We can collect status
of socket receive queue and related parameters(some of them are sysctl knob
e.g. /proc/sys/net/ipv4/udp_mem, etc. for UDP) and then we can tune kernel
behavior easily.

Any comments and feedback are welcome.

Satoru Moriya (2):
  udp: add tracepoint for queueing skb to rcvbuf
  core: add tracepoints for queueing skb to rcvbuf

 include/trace/events/sock.h |   68 +++++++++++++++++++++++++++++++++++++++++++
 include/trace/events/udp.h  |   32 ++++++++++++++++++++
 net/core/net-traces.c       |    2 +
 net/core/sock.c             |    5 +++
 net/ipv4/udp.c              |    2 +
 5 files changed, 109 insertions(+), 0 deletions(-)
 create mode 100644 include/trace/events/sock.h
 create mode 100644 include/trace/events/udp.h

^ permalink raw reply

* Re: what's causing "ip_rt_bug"?
From: Tomasz Chmielewski @ 2011-06-17 21:37 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <1308342993.3539.30.camel@edumazet-laptop>

On 17.06.2011 22:36, Eric Dumazet wrote:
> Le vendredi 17 juin 2011 à 22:00 +0200, Tomasz Chmielewski a écrit :
>> I have a system pushing around 800 Mbit/s, ~130 kpps.
>>
>> It uses 2.6.35.12 kernel.
>>
>>
>> Several times a minute, I can see entries like (a.b.c.d - IP of this system):
>>
>> Jun 18 02:39:19 KOR-SV22 kernel: [37187.665951] ip_rt_bug: 110.x.x.x ->  a.b.c.d, ?
>> Jun 18 02:39:19 KOR-SV22 kernel: [37187.685419] ip_rt_bug: 110.x.x.x ->  a.b.c.d, ?
>> Jun 18 02:40:31 KOR-SV22 kernel: [37259.199315] ip_rt_bug: 124.x.x.x ->  a.b.c.d, ?
>> Jun 18 02:40:36 KOR-SV22 kernel: [37263.828000] ip_rt_bug: 124.x.x.x ->  a.b.c.d, ?
>> Jun 18 02:44:16 KOR-SV22 kernel: [37484.120689] ip_rt_bug: 110.x.x.x ->  a.b.c.d, ?
>> Jun 18 02:44:19 KOR-SV22 kernel: [37487.114357] ip_rt_bug: 110.x.x.x ->  a.b.c.d, ?
>>
> 
> Hi
> 
> What your routing table looks like ? (ip ro)

It's just a proxy, no special routing set:

# ip ro
58.185.117.18 via 119.46.110.193 dev eth0 
119.46.240.13 via 119.46.110.193 dev eth0 
58.185.117.29 via 119.46.110.193 dev eth0 
119.46.241.13 via 119.46.110.193 dev eth0 
58.185.117.28 via 119.46.110.193 dev eth0 
119.46.110.192/26 dev eth0  proto kernel  scope link  src 119.46.110.197 
169.254.0.0/16 dev eth0  scope link 
default via 119.46.110.195 dev eth0 


The box is also crashing every few days; and I really had no clue why (just connected a serial console to catch any new oops/panic).


The last time it crashed, I have this entry in syslog:

Jun 17 16:16:17 TRUE-SC02 kernel: [172488.602629] ip_rt_bug: 124.121.155.197 -> 119.46.110.197, ?
Jun 17 16:17:00 TRUE-SC02 kernel: [172531.239041] BUG: unable to handle kernel NULL pointer dereference at (null)
Jun 17 16:17:00 TRUE-SC02 kernel: [172531.239409] IP: [<ffffffff81361cae>] dev_queue_xmit+0x1e3/0x441
Jun 17 16:17:00 TRUE-SC02 kernel: [172531.239760] PGD 43c30b067 PUD 439e63067 PMD 0
Jun 17 16:17:00 TRUE-SC02 kernel: [172531.240103] Oops: 0000 [#1] SMP
Jun 17 16:19:58 TRUE-SC02 syslogd 1.4.1: restart.



Right now, it uses the newest igb driver, and I started seeing "Out of socket memory" quite a bit (didn't have it with the original igb driver from 2.6.35.12).

So I doubled this value to be:

 net.ipv4.tcp_max_orphans = 256000


and "Out of socket memory" stopped showing up.

Instead, "ip_rt_bug" shows up.


-- 
Tomasz Chmielewski
http://wpkg.org

^ permalink raw reply

* [PATCH 7/7] dcb: Add missing error check in dcb_ieee_set()
From: John Fastabend @ 2011-06-17 21:16 UTC (permalink / raw)
  To: davem; +Cc: netdev, shmulikr
In-Reply-To: <20110617211027.26578.11317.stgit@jf-dev1-dcblab>

Missing error checking before nla_parse_nested().

Reported-by: Mark Rustad <mark.d.rustad@intel.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 net/dcb/dcbnl.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 28fc386..e48879f 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1367,6 +1367,9 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
 	if (!ops)
 		return err;
 
+	if (!tb[DCB_ATTR_IEEE])
+		return -EINVAL;
+
 	err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX,
 			       tb[DCB_ATTR_IEEE], dcbnl_ieee_policy);
 	if (err)


^ permalink raw reply related

* [PATCH 6/7] dcb: fix return type on dcb_setapp()
From: John Fastabend @ 2011-06-17 21:16 UTC (permalink / raw)
  To: davem; +Cc: netdev, shmulikr
In-Reply-To: <20110617211027.26578.11317.stgit@jf-dev1-dcblab>

Incorrect return type on dcb_setapp() this routine
returns negative error codes. All call sites of
dcb_setapp() assign the return value to an int already
so no need to update drivers.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 include/net/dcbnl.h |    2 +-
 net/dcb/dcbnl.c     |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 0a28788..f8c5505 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -28,7 +28,7 @@ struct dcb_app_type {
 	struct list_head  list;
 };
 
-u8 dcb_setapp(struct net_device *, struct dcb_app *);
+int dcb_setapp(struct net_device *, struct dcb_app *);
 u8 dcb_getapp(struct net_device *, struct dcb_app *);
 int dcb_ieee_setapp(struct net_device *, struct dcb_app *);
 int dcb_ieee_delapp(struct net_device *, struct dcb_app *);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 87010ef..28fc386 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1884,7 +1884,7 @@ EXPORT_SYMBOL(dcb_getapp);
  * removes applications from the app list if the priority is
  * set to zero.
  */
-u8 dcb_setapp(struct net_device *dev, struct dcb_app *new)
+int dcb_setapp(struct net_device *dev, struct dcb_app *new)
 {
 	struct dcb_app_type *itr;
 	struct dcb_app_type event;


^ permalink raw reply related

* [PATCH 5/7] dcb: Add dcb_ieee_getapp_mask() for drivers to query APP settings
From: John Fastabend @ 2011-06-17 21:16 UTC (permalink / raw)
  To: davem; +Cc: netdev, shmulikr
In-Reply-To: <20110617211027.26578.11317.stgit@jf-dev1-dcblab>

With multiple APP entries per selector and protocol drivers
or stacks may want to pick a specific value or stripe traffic
across many priorities. Also if an APP entry in use is
deleted the stack/driver may want to choose from the existing
APP entries.

To facilitate this and avoid having duplicate code to walk
the APP ring provide a routine dcb_ieee_getapp_mask() to
return a u8 bitmask of all priorities set for the specified
selector and protocol. This routine and bitmask is a helper
for DCB kernel users.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 include/net/dcbnl.h |    1 +
 net/dcb/dcbnl.c     |   26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 0 deletions(-)

diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 8c545b1..0a28788 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -32,6 +32,7 @@ u8 dcb_setapp(struct net_device *, struct dcb_app *);
 u8 dcb_getapp(struct net_device *, struct dcb_app *);
 int dcb_ieee_setapp(struct net_device *, struct dcb_app *);
 int dcb_ieee_delapp(struct net_device *, struct dcb_app *);
+u8 dcb_ieee_getapp_mask(struct net_device *, struct dcb_app *);
 
 /*
  * Ops struct for the netlink callbacks.  Used by DCB-enabled drivers through
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 233a6fc..87010ef 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1928,6 +1928,32 @@ out:
 EXPORT_SYMBOL(dcb_setapp);
 
 /**
+ * dcb_ieee_getapp_mask - retrieve the IEEE DCB application priority
+ *
+ * Helper routine which on success returns a non-zero 802.1Qaz user
+ * priority bitmap otherwise returns 0 to indicate the dcb_app was
+ * not found in APP list.
+ */
+u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app)
+{
+	struct dcb_app_type *itr;
+	u8 prio = 0;
+
+	spin_lock(&dcb_lock);
+	list_for_each_entry(itr, &dcb_app_list, list) {
+		if (itr->app.selector == app->selector &&
+		    itr->app.protocol == app->protocol &&
+		    (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) {
+			prio |= 1 << itr->app.priority;
+		}
+	}
+	spin_unlock(&dcb_lock);
+
+	return prio;
+}
+EXPORT_SYMBOL(dcb_ieee_getapp_mask);
+
+/**
  * dcb_ieee_setapp - add IEEE dcb application data to app list
  *
  * This adds Application data to the list. Multiple application


^ permalink raw reply related

* [PATCH 4/7] dcb: Add ieee_dcb_delapp() and dcb op to delete app entry
From: John Fastabend @ 2011-06-17 21:16 UTC (permalink / raw)
  To: davem; +Cc: netdev, shmulikr
In-Reply-To: <20110617211027.26578.11317.stgit@jf-dev1-dcblab>

Now that we allow multiple IEEE App entries we need a way
to remove specific entries. To do this add the ieee_dcb_delapp()
routine.

Additionaly drivers may need to remove the APP entry from
their firmware tables. Add dcb ops routine to handle this.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 include/linux/dcbnl.h |    2 +
 include/net/dcbnl.h   |    2 +
 net/dcb/dcbnl.c       |   90 ++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index c522800..66a6723 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -203,6 +203,7 @@ struct dcbmsg {
  * @DCB_CMD_GFEATCFG: get DCBX features flags
  * @DCB_CMD_SFEATCFG: set DCBX features negotiation flags
  * @DCB_CMD_CEE_GET: get CEE aggregated configuration
+ * @DCB_CMD_IEEE_DEL: delete IEEE 802.1Qaz configuration
  */
 enum dcbnl_commands {
 	DCB_CMD_UNDEFINED,
@@ -246,6 +247,7 @@ enum dcbnl_commands {
 	DCB_CMD_SFEATCFG,
 
 	DCB_CMD_CEE_GET,
+	DCB_CMD_IEEE_DEL,
 
 	__DCB_CMD_ENUM_MAX,
 	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 47a17d2..8c545b1 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -31,6 +31,7 @@ struct dcb_app_type {
 u8 dcb_setapp(struct net_device *, struct dcb_app *);
 u8 dcb_getapp(struct net_device *, struct dcb_app *);
 int dcb_ieee_setapp(struct net_device *, struct dcb_app *);
+int dcb_ieee_delapp(struct net_device *, struct dcb_app *);
 
 /*
  * Ops struct for the netlink callbacks.  Used by DCB-enabled drivers through
@@ -44,6 +45,7 @@ struct dcbnl_rtnl_ops {
 	int (*ieee_setpfc) (struct net_device *, struct ieee_pfc *);
 	int (*ieee_getapp) (struct net_device *, struct dcb_app *);
 	int (*ieee_setapp) (struct net_device *, struct dcb_app *);
+	int (*ieee_delapp) (struct net_device *, struct dcb_app *);
 	int (*ieee_peer_getets) (struct net_device *, struct ieee_ets *);
 	int (*ieee_peer_getpfc) (struct net_device *, struct ieee_pfc *);
 
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 4e77170..233a6fc 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1450,6 +1450,52 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
 
 	return err;
 }
+
+static int dcbnl_ieee_del(struct net_device *netdev, struct nlattr **tb,
+			  u32 pid, u32 seq, u16 flags)
+{
+	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
+	struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1];
+	int err = -EOPNOTSUPP;
+
+	if (!ops)
+		return -EOPNOTSUPP;
+
+	if (!tb[DCB_ATTR_IEEE])
+		return -EINVAL;
+
+	err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX,
+			       tb[DCB_ATTR_IEEE], dcbnl_ieee_policy);
+	if (err)
+		return err;
+
+	if (ieee[DCB_ATTR_IEEE_APP_TABLE]) {
+		struct nlattr *attr;
+		int rem;
+
+		nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
+			struct dcb_app *app_data;
+
+			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
+				continue;
+			app_data = nla_data(attr);
+			if (ops->ieee_delapp)
+				err = ops->ieee_delapp(netdev, app_data);
+			else
+				err = dcb_ieee_delapp(netdev, app_data);
+			if (err)
+				goto err;
+		}
+	}
+
+err:
+	dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_DEL, DCB_ATTR_IEEE,
+		    pid, seq, flags);
+	dcbnl_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_DEL, seq, 0);
+	return err;
+}
+
+
 /* DCBX configuration */
 static int dcbnl_getdcbx(struct net_device *netdev, struct nlattr **tb,
 			 u32 pid, u32 seq, u16 flags)
@@ -1764,11 +1810,15 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		goto out;
 	case DCB_CMD_IEEE_SET:
 		ret = dcbnl_ieee_set(netdev, tb, pid, nlh->nlmsg_seq,
-				 nlh->nlmsg_flags);
+				     nlh->nlmsg_flags);
 		goto out;
 	case DCB_CMD_IEEE_GET:
 		ret = dcbnl_ieee_get(netdev, tb, pid, nlh->nlmsg_seq,
-				 nlh->nlmsg_flags);
+				     nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_IEEE_DEL:
+		ret = dcbnl_ieee_del(netdev, tb, pid, nlh->nlmsg_seq,
+				     nlh->nlmsg_flags);
 		goto out;
 	case DCB_CMD_GDCBX:
 		ret = dcbnl_getdcbx(netdev, tb, pid, nlh->nlmsg_seq,
@@ -1923,6 +1973,42 @@ out:
 }
 EXPORT_SYMBOL(dcb_ieee_setapp);
 
+/**
+ * dcb_ieee_delapp - delete IEEE dcb application data from list
+ *
+ * This removes a matching APP data from the APP list
+ */
+int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del)
+{
+	struct dcb_app_type *itr;
+	struct dcb_app_type event;
+	int err = -ENOENT;
+
+	memcpy(&event.name, dev->name, sizeof(event.name));
+	memcpy(&event.app, del, sizeof(event.app));
+
+	spin_lock(&dcb_lock);
+	/* Search for existing match and remove it. */
+	list_for_each_entry(itr, &dcb_app_list, list) {
+		if (itr->app.selector == del->selector &&
+		    itr->app.protocol == del->protocol &&
+		    itr->app.priority == del->priority &&
+		    (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) {
+			list_del(&itr->list);
+			kfree(itr);
+			err = 0;
+			goto out;
+		}
+	}
+
+out:
+	spin_unlock(&dcb_lock);
+	if (!err)
+		call_dcbevent_notifiers(DCB_APP_EVENT, &event);
+	return err;
+}
+EXPORT_SYMBOL(dcb_ieee_delapp);
+
 static void dcb_flushapp(void)
 {
 	struct dcb_app_type *app;


^ permalink raw reply related

* [PATCH 3/7] dcb: Add ieee_dcb_setapp() to be used for IEEE 802.1Qaz APP data
From: John Fastabend @ 2011-06-17 21:16 UTC (permalink / raw)
  To: davem; +Cc: netdev, shmulikr
In-Reply-To: <20110617211027.26578.11317.stgit@jf-dev1-dcblab>

This adds a setapp routine for IEEE802.1Qaz encoded APP data types.
The IEEE 802.1Qaz spec encodes the priority bits differently and
allows for multiple APP data entries of the same selector and
protocol. Trying to force these to use the same set routines was
becoming tedious. Furthermore, userspace could probably enforce
the correct semantics, but expecting drivers to do this seems
error prone in the firmware case.

For these reasons add ieee_dcb_setapp() that understands the
IEEE 802.1Qaz encoded form.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 include/net/dcbnl.h |    1 +
 net/dcb/dcbnl.c     |   55 +++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index e5983c9..47a17d2 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -30,6 +30,7 @@ struct dcb_app_type {
 
 u8 dcb_setapp(struct net_device *, struct dcb_app *);
 u8 dcb_getapp(struct net_device *, struct dcb_app *);
+int dcb_ieee_setapp(struct net_device *, struct dcb_app *);
 
 /*
  * Ops struct for the netlink callbacks.  Used by DCB-enabled drivers through
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index b514579..4e77170 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1398,7 +1398,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
 			if (ops->ieee_setapp)
 				err = ops->ieee_setapp(netdev, app_data);
 			else
-				err = dcb_setapp(netdev, app_data);
+				err = dcb_ieee_setapp(netdev, app_data);
 			if (err)
 				goto err;
 		}
@@ -1828,10 +1828,11 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app)
 EXPORT_SYMBOL(dcb_getapp);
 
 /**
- * ixgbe_dcbnl_setapp - add dcb application data to app list
+ * dcb_setapp - add CEE dcb application data to app list
  *
- * Priority 0 is the default priority this removes applications
- * from the app list if the priority is set to zero.
+ * Priority 0 is an invalid priority in CEE spec. This routine
+ * removes applications from the app list if the priority is
+ * set to zero.
  */
 u8 dcb_setapp(struct net_device *dev, struct dcb_app *new)
 {
@@ -1876,6 +1877,52 @@ out:
 }
 EXPORT_SYMBOL(dcb_setapp);
 
+/**
+ * dcb_ieee_setapp - add IEEE dcb application data to app list
+ *
+ * This adds Application data to the list. Multiple application
+ * entries may exists for the same selector and protocol as long
+ * as the priorities are different.
+ */
+int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new)
+{
+	struct dcb_app_type *itr, *entry;
+	struct dcb_app_type event;
+	int err = 0;
+
+	memcpy(&event.name, dev->name, sizeof(event.name));
+	memcpy(&event.app, new, sizeof(event.app));
+
+	spin_lock(&dcb_lock);
+	/* Search for existing match and abort if found */
+	list_for_each_entry(itr, &dcb_app_list, list) {
+		if (itr->app.selector == new->selector &&
+		    itr->app.protocol == new->protocol &&
+		    itr->app.priority == new->priority &&
+		    (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) {
+			err = -EEXIST;
+			goto out;
+		}
+	}
+
+	/* App entry does not exist add new entry */
+	entry = kmalloc(sizeof(struct dcb_app_type), GFP_ATOMIC);
+	if (!entry) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	memcpy(&entry->app, new, sizeof(*new));
+	strncpy(entry->name, dev->name, IFNAMSIZ);
+	list_add(&entry->list, &dcb_app_list);
+out:
+	spin_unlock(&dcb_lock);
+	if (!err)
+		call_dcbevent_notifiers(DCB_APP_EVENT, &event);
+	return err;
+}
+EXPORT_SYMBOL(dcb_ieee_setapp);
+
 static void dcb_flushapp(void)
 {
 	struct dcb_app_type *app;


^ permalink raw reply related

* [PATCH 1/7] dcb: Add DCBX capabilities bitmask to the get_ieee response
From: John Fastabend @ 2011-06-17 21:16 UTC (permalink / raw)
  To: davem; +Cc: netdev, shmulikr
In-Reply-To: <20110617211027.26578.11317.stgit@jf-dev1-dcblab>

Adding the capabilities bitmask to the get_ieee response allows
user space to determine the current DCBX mode. Either CEE or IEEE
this is useful with devices that support switching between modes
where knowing the current state is relevant.

Derived from work by Mark Rustad

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 net/dcb/dcbnl.c |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index ed1bb8c..3a6d97d 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1288,6 +1288,7 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
 	struct nlattr *ieee, *app;
 	struct dcb_app_type *itr;
 	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
+	int dcbx;
 	int err;
 
 	if (!ops)
@@ -1338,6 +1339,12 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
 			}
 		}
 	}
+
+	if (netdev->dcbnl_ops->getdcbx)
+		dcbx = netdev->dcbnl_ops->getdcbx(netdev);
+	else
+		dcbx = -EOPNOTSUPP;
+
 	spin_unlock(&dcb_lock);
 	nla_nest_end(skb, app);
 
@@ -1366,6 +1373,11 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
 	}
 
 	nla_nest_end(skb, ieee);
+	if (dcbx >= 0) {
+		err = nla_put_u8(skb, DCB_ATTR_DCBX, dcbx);
+		if (err)
+			goto nla_put_failure;
+	}
 	nlmsg_end(skb, nlh);
 
 	return rtnl_unicast(skb, &init_net, pid);


^ permalink raw reply related

* [PATCH 2/7] net: dcbnl, add multicast group for DCB
From: John Fastabend @ 2011-06-17 21:16 UTC (permalink / raw)
  To: davem; +Cc: netdev, shmulikr
In-Reply-To: <20110617211027.26578.11317.stgit@jf-dev1-dcblab>

Now that dcbnl is being used in many cases by more
than a single agent it is beneficial to be notified
when some entity either driver or user space has
changed the DCB attributes.

Today applications either end up polling the interface
or relying on a user space database to maintain the DCB
state and post events. Polling is a poor solution for
obvious reasons. And relying on a user space database
has its own downside. Namely it has created strange
boot dependencies requiring the database be populated
before any applications dependent on DCB attributes
starts or the application goes into a polling loop.
Populating the database requires negotiating link
setting with the peer and can take anywhere from less
than a second up to a few seconds depending on the switch
implementation.

Perhaps more importantly if another application or an
embedded agent sets a DCB link attribute the database
has no way of knowing other than polling the kernel.
This prevents applications from responding quickly to
changes in link events which at least in the FCoE case
and probably any other protocols expecting a lossless
link may result in IO errors.

By adding a multicast group for DCB we have clean way
to disseminate kernel DCB link attributes up to user
space. Avoiding the need for user space to maintain
a coherant database and disperse events that potentially
do not reflect the current link state.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 include/linux/rtnetlink.h |    2 
 net/dcb/dcbnl.c           |  228 +++++++++++++++++++++++++++++----------------
 2 files changed, 147 insertions(+), 83 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index bbad657..c81226a 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -585,6 +585,8 @@ enum rtnetlink_groups {
 #define RTNLGRP_PHONET_IFADDR	RTNLGRP_PHONET_IFADDR
 	RTNLGRP_PHONET_ROUTE,
 #define RTNLGRP_PHONET_ROUTE	RTNLGRP_PHONET_ROUTE
+	RTNLGRP_DCB,
+#define RTNLGRP_DCB		RTNLGRP_DCB
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 3a6d97d..b514579 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1166,64 +1166,6 @@ err:
 	return ret;
 }
 
-/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not
- * be completed the entire msg is aborted and error value is returned.
- * No attempt is made to reconcile the case where only part of the
- * cmd can be completed.
- */
-static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
-			  u32 pid, u32 seq, u16 flags)
-{
-	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
-	struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1];
-	int err = -EOPNOTSUPP;
-
-	if (!ops)
-		goto err;
-
-	err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX,
-			       tb[DCB_ATTR_IEEE], dcbnl_ieee_policy);
-	if (err)
-		goto err;
-
-	if (ieee[DCB_ATTR_IEEE_ETS] && ops->ieee_setets) {
-		struct ieee_ets *ets = nla_data(ieee[DCB_ATTR_IEEE_ETS]);
-		err = ops->ieee_setets(netdev, ets);
-		if (err)
-			goto err;
-	}
-
-	if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) {
-		struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]);
-		err = ops->ieee_setpfc(netdev, pfc);
-		if (err)
-			goto err;
-	}
-
-	if (ieee[DCB_ATTR_IEEE_APP_TABLE]) {
-		struct nlattr *attr;
-		int rem;
-
-		nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
-			struct dcb_app *app_data;
-			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
-				continue;
-			app_data = nla_data(attr);
-			if (ops->ieee_setapp)
-				err = ops->ieee_setapp(netdev, app_data);
-			else
-				err = dcb_setapp(netdev, app_data);
-			if (err)
-				goto err;
-		}
-	}
-
-err:
-	dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_SET, DCB_ATTR_IEEE,
-		    pid, seq, flags);
-	return err;
-}
-
 static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb,
 				int app_nested_type, int app_info_type,
 				int app_entry_type)
@@ -1279,30 +1221,13 @@ nla_put_failure:
 }
 
 /* Handle IEEE 802.1Qaz GET commands. */
-static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
-			  u32 pid, u32 seq, u16 flags)
+static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 {
-	struct sk_buff *skb;
-	struct nlmsghdr *nlh;
-	struct dcbmsg *dcb;
 	struct nlattr *ieee, *app;
 	struct dcb_app_type *itr;
 	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
 	int dcbx;
-	int err;
-
-	if (!ops)
-		return -EOPNOTSUPP;
-
-	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!skb)
-		return -ENOBUFS;
-
-	nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
-
-	dcb = NLMSG_DATA(nlh);
-	dcb->dcb_family = AF_UNSPEC;
-	dcb->cmd = DCB_CMD_IEEE_GET;
+	int err = -EMSGSIZE;
 
 	NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name);
 
@@ -1378,16 +1303,153 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
 		if (err)
 			goto nla_put_failure;
 	}
-	nlmsg_end(skb, nlh);
 
-	return rtnl_unicast(skb, &init_net, pid);
+	return 0;
+
 nla_put_failure:
-	nlmsg_cancel(skb, nlh);
-nlmsg_failure:
-	kfree_skb(skb);
-	return -1;
+	return err;
 }
 
+static int dcbnl_notify(struct net_device *dev, int event, int cmd,
+			u32 seq, u32 pid)
+{
+	struct net *net = dev_net(dev);
+	struct sk_buff *skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	const struct dcbnl_rtnl_ops *ops = dev->dcbnl_ops;
+	int err;
+
+	if (!ops)
+		return -EOPNOTSUPP;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	nlh = nlmsg_put(skb, pid, 0, event, sizeof(*dcb), 0);
+	if (nlh == NULL) {
+		kfree(skb);
+		return -EMSGSIZE;
+	}
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = cmd;
+
+	err = dcbnl_ieee_fill(skb, dev);
+	if (err < 0) {
+		/* Report error to broadcast listeners */
+		nlmsg_cancel(skb, nlh);
+		kfree_skb(skb);
+		rtnl_set_sk_err(net, RTNLGRP_DCB, err);
+	} else {
+		/* End nlmsg and notify broadcast listeners */
+		nlmsg_end(skb, nlh);
+		rtnl_notify(skb, net, 0, RTNLGRP_DCB, NULL, GFP_KERNEL);
+	}
+
+	return err;
+}
+
+/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not
+ * be completed the entire msg is aborted and error value is returned.
+ * No attempt is made to reconcile the case where only part of the
+ * cmd can be completed.
+ */
+static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
+			  u32 pid, u32 seq, u16 flags)
+{
+	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
+	struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1];
+	int err = -EOPNOTSUPP;
+
+	if (!ops)
+		return err;
+
+	err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX,
+			       tb[DCB_ATTR_IEEE], dcbnl_ieee_policy);
+	if (err)
+		return err;
+
+	if (ieee[DCB_ATTR_IEEE_ETS] && ops->ieee_setets) {
+		struct ieee_ets *ets = nla_data(ieee[DCB_ATTR_IEEE_ETS]);
+		err = ops->ieee_setets(netdev, ets);
+		if (err)
+			goto err;
+	}
+
+	if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) {
+		struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]);
+		err = ops->ieee_setpfc(netdev, pfc);
+		if (err)
+			goto err;
+	}
+
+	if (ieee[DCB_ATTR_IEEE_APP_TABLE]) {
+		struct nlattr *attr;
+		int rem;
+
+		nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
+			struct dcb_app *app_data;
+			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
+				continue;
+			app_data = nla_data(attr);
+			if (ops->ieee_setapp)
+				err = ops->ieee_setapp(netdev, app_data);
+			else
+				err = dcb_setapp(netdev, app_data);
+			if (err)
+				goto err;
+		}
+	}
+
+err:
+	dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_SET, DCB_ATTR_IEEE,
+		    pid, seq, flags);
+	dcbnl_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_SET, seq, 0);
+	return err;
+}
+
+static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
+			  u32 pid, u32 seq, u16 flags)
+{
+	struct net *net = dev_net(netdev);
+	struct sk_buff *skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
+	int err;
+
+	if (!ops)
+		return -EOPNOTSUPP;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	nlh = nlmsg_put(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+	if (nlh == NULL) {
+		kfree(skb);
+		return -EMSGSIZE;
+	}
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_IEEE_GET;
+
+	err = dcbnl_ieee_fill(skb, netdev);
+
+	if (err < 0) {
+		nlmsg_cancel(skb, nlh);
+		kfree_skb(skb);
+	} else {
+		nlmsg_end(skb, nlh);
+		err = rtnl_unicast(skb, net, pid);
+	}
+
+	return err;
+}
 /* DCBX configuration */
 static int dcbnl_getdcbx(struct net_device *netdev, struct nlattr **tb,
 			 u32 pid, u32 seq, u16 flags)


^ permalink raw reply related

* [PATCH 0/7] Series short description
From: John Fastabend @ 2011-06-17 21:16 UTC (permalink / raw)
  To: davem; +Cc: netdev, shmulikr

The following series implements improvements to the DCB netlink
interface. This work was done to improve Application handling
and support firmware based LLDP agents.

This adds a multicast group for DCB currently user space has
trouble keeping in sync across multiple applications and with
firmware agents making DCB attribute changes in the driver.

To help support the recently ratified IEEE802.1Qaz extension
which supports multiple APP data entries for the same
selector and protocol fields a series of ieee set and delete
routines were improved and in the delete case added.

Finally the patches 5 and 6 are fixes to currently existing
code.

Please consider for net-next-2.6 inclusion.

---

John Fastabend (7):
      dcb: Add missing error check in dcb_ieee_set()
      dcb: fix return type on dcb_setapp()
      dcb: Add dcb_ieee_getapp_mask() for drivers to query APP settings
      dcb: Add ieee_dcb_delapp() and dcb op to delete app entry
      dcb: Add ieee_dcb_setapp() to be used for IEEE 802.1Qaz APP data
      net: dcbnl, add multicast group for DCB
      dcb: Add DCBX capabilities bitmask to the get_ieee response

 include/linux/dcbnl.h     |    2 
 include/linux/rtnetlink.h |    2 
 include/net/dcbnl.h       |    6 +
 net/dcb/dcbnl.c           |  414 +++++++++++++++++++++++++++++++++++----------
 4 files changed, 334 insertions(+), 90 deletions(-)

-- 
Signature

^ permalink raw reply

* Re: [PATCH net-next 2/4] r8169: explicit firmware format check.
From: Ben Hutchings @ 2011-06-17 21:16 UTC (permalink / raw)
  To: Francois Romieu
  Cc: davem, netdev, Realtek linux nic maintainers, Hayes Wang,
	Ben Hutchings
In-Reply-To: <20110617193105.GB2287@electric-eye.fr.zoreil.com>

On Fri, 2011-06-17 at 21:31 +0200, Francois Romieu wrote:
[...]
> -	if (fw->size % sizeof(*phytable)) {
> -		netif_err(tp, probe, dev, "odd sized firmware %zd\n", fw->size);
> +	if (!rtl_fw_format_ok(tp, rtl_fw)) {
> +		netif_err(tp, probe, dev, "invalid firwmare\n");
[...]

Also, there's a typo in this error message.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* Re: [PATCH net-next 3/4] r8169: support new firmware format.
From: Ben Hutchings @ 2011-06-17 21:15 UTC (permalink / raw)
  To: Francois Romieu
  Cc: davem, netdev, Realtek linux nic maintainers, Hayes Wang,
	Ben Hutchings
In-Reply-To: <20110617193133.GC2287@electric-eye.fr.zoreil.com>

On Fri, 2011-06-17 at 21:31 +0200, Francois Romieu wrote:
> The new firmware format adds versioning as firmware for a specific
> chipset appears to be subject to change. Current "legacy" format is
> still supported.
[...]
>  static bool rtl_fw_format_ok(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
>  {
[...] 
> -	if (!(fw->size % FW_OPCODE_SIZE)) {
> +	if (fw->size < FW_OPCODE_SIZE)
> +		goto out;
[...]
> +		if (fw->size % FW_OPCODE_SIZE)
> +			goto out;
> +
[...]
> -
> +out:
>  	return rc;
>  }

These changes belong in patch 2/4.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox