Netdev List
 help / color / mirror / Atom feed
* [RFC] [PATCH] iproute2: Add IPsec extended sequence number support
From: Steffen Klassert @ 2010-11-22 10:37 UTC (permalink / raw)
  To: Herbert Xu, David Miller
  Cc: Andreas Gruenbacher, Alex Badea, netdev, linux-crypto
In-Reply-To: <20101122102455.GC1868@secunet.com>


Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/linux/xfrm.h |   12 ++++++++++++
 ip/ipxfrm.c          |    8 +++++++-
 ip/xfrm_state.c      |   37 +++++++++++++++++++++++++++++--------
 3 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 07f2b63..dd6928d 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -84,6 +84,16 @@ struct xfrm_replay_state {
 	__u32	bitmap;
 };
 
+struct xfrm_replay_state_esn {
+	unsigned int	len;
+	__u32	oseq;
+	__u32	oseq_hi;
+	__u32	seq;
+	__u32	seq_hi;
+	__u32	replay_window;
+	__u32	bmp[0];
+};
+
 struct xfrm_algo {
 	char		alg_name[64];
 	unsigned int	alg_key_len;    /* in bits */
@@ -283,6 +293,7 @@ enum xfrm_attr_type_t {
 	XFRMA_KMADDRESS,        /* struct xfrm_user_kmaddress */
 	XFRMA_ALG_AUTH_TRUNC,	/* struct xfrm_algo_auth */
 	XFRMA_MARK,		/* struct xfrm_mark */
+	XFRMA_REPLAY_ESN_VAL,	/* struct xfrm_replay_esn */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
@@ -349,6 +360,7 @@ struct xfrm_usersa_info {
 #define XFRM_STATE_WILDRECV	8
 #define XFRM_STATE_ICMP		16
 #define XFRM_STATE_AF_UNSPEC	32
+#define XFRM_STATE_ESN          64
 };
 
 struct xfrm_usersa_id {
diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c
index 99a6756..548e4a4 100644
--- a/ip/ipxfrm.c
+++ b/ip/ipxfrm.c
@@ -665,6 +665,12 @@ done:
 void xfrm_xfrma_print(struct rtattr *tb[], __u16 family,
 		      FILE *fp, const char *prefix)
 {
+
+	if (tb[XFRMA_REPLAY_ESN_VAL]) {
+		struct rtattr *rta = tb[XFRMA_REPLAY_ESN_VAL];
+		struct xfrm_replay_state_esn *repl = (struct xfrm_replay_state_esn *) RTA_DATA(rta);
+		fprintf(fp, "\treplay-window %u\n", repl->replay_window);
+	}
 	if (tb[XFRMA_MARK]) {
 		struct rtattr *rta = tb[XFRMA_MARK];
 		struct xfrm_mark *m = (struct xfrm_mark *) RTA_DATA(rta);
@@ -809,7 +815,6 @@ void xfrm_state_info_print(struct xfrm_usersa_info *xsinfo,
 	STRBUF_CAT(buf, "\t");
 
 	fputs(buf, fp);
-	fprintf(fp, "replay-window %u ", xsinfo->replay_window);
 	if (show_stats > 0)
 		fprintf(fp, "seq 0x%08u ", xsinfo->seq);
 	if (show_stats > 0 || xsinfo->flags) {
@@ -822,6 +827,7 @@ void xfrm_state_info_print(struct xfrm_usersa_info *xsinfo,
 		XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_WILDRECV, "wildrecv");
 		XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_ICMP, "icmp");
 		XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_AF_UNSPEC, "af-unspec");
+		XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_ESN, "replay-esn");
 		if (flags)
 			fprintf(fp, "%x", flags);
 	}
diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
index 38d4039..4c66923 100644
--- a/ip/xfrm_state.c
+++ b/ip/xfrm_state.c
@@ -50,15 +50,16 @@
 #define NLMSG_BUF_SIZE 4096
 #define RTA_BUF_SIZE 2048
 #define XFRM_ALGO_KEY_BUF_SIZE 512
+#define XFRM_REPLAY_BMP_SIZE_U32 16
 
 static void usage(void) __attribute__((noreturn));
 
 static void usage(void)
 {
 	fprintf(stderr, "Usage: ip xfrm state { add | update } ID [ XFRM_OPT ] [ mode MODE ]\n");
-	fprintf(stderr, "        [ reqid REQID ] [ seq SEQ ] [ replay-window SIZE ] [ flag FLAG-LIST ]\n");
+	fprintf(stderr, "        [ reqid REQID ] [ seq SEQ ][ replay-window SIZE ] [ flag FLAG-LIST ]\n");
 	fprintf(stderr, "        [ encap ENCAP ] [ sel SELECTOR ] [ replay-seq SEQ ]\n");
-	fprintf(stderr, "        [ replay-oseq SEQ ] [ LIMIT-LIST ]\n");
+	fprintf(stderr, "        [ replay-oseq SEQ ] [ replay-seqhi SEQ ] [ replay-oseqhi SEQ ] [ LIMIT-LIST ]\n");
 	fprintf(stderr, "Usage: ip xfrm state allocspi ID [ mode MODE ] [ reqid REQID ] [ seq SEQ ]\n");
 	fprintf(stderr, "        [ min SPI max SPI ]\n");
 	fprintf(stderr, "Usage: ip xfrm state { delete | get } ID\n");
@@ -214,6 +215,8 @@ static int xfrm_state_flag_parse(__u8 *flags, int *argcp, char ***argvp)
 				*flags |= XFRM_STATE_ICMP;
 			else if (strcmp(*argv, "af-unspec") == 0)
 				*flags |= XFRM_STATE_AF_UNSPEC;
+			else if (strcmp(*argv, "replay-esn") == 0)
+				*flags |= XFRM_STATE_ESN;
 			else {
 				PREV_ARG(); /* back track */
 				break;
@@ -239,7 +242,11 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 		struct xfrm_usersa_info xsinfo;
 		char   			buf[RTA_BUF_SIZE];
 	} req;
-	struct xfrm_replay_state replay;
+	struct {
+		struct xfrm_replay_state_esn state;
+		__u32 bmp[XFRM_REPLAY_BMP_SIZE_U32];
+	} replay;
+
 	char *idp = NULL;
 	char *aeadop = NULL;
 	char *ealgop = NULL;
@@ -249,8 +256,11 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 	struct xfrm_mark mark = {0, 0};
 
 	memset(&req, 0, sizeof(req));
+
 	memset(&replay, 0, sizeof(replay));
 
+	replay.state.len = sizeof(replay);
+
 	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.xsinfo));
 	req.n.nlmsg_flags = NLM_F_REQUEST|flags;
 	req.n.nlmsg_type = cmd;
@@ -275,16 +285,24 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 			xfrm_seq_parse(&req.xsinfo.seq, &argc, &argv);
 		} else if (strcmp(*argv, "replay-window") == 0) {
 			NEXT_ARG();
-			if (get_u8(&req.xsinfo.replay_window, *argv, 0))
+			if (get_u32(&replay.state.replay_window, *argv, 0))
 				invarg("\"replay-window\" value is invalid", *argv);
 		} else if (strcmp(*argv, "replay-seq") == 0) {
 			NEXT_ARG();
-			if (get_u32(&replay.seq, *argv, 0))
+			if (get_u32(&replay.state.seq, *argv, 0))
 				invarg("\"replay-seq\" value is invalid", *argv);
 		} else if (strcmp(*argv, "replay-oseq") == 0) {
 			NEXT_ARG();
-			if (get_u32(&replay.oseq, *argv, 0))
+			if (get_u32(&replay.state.oseq, *argv, 0))
 				invarg("\"replay-oseq\" value is invalid", *argv);
+		} else if (strcmp(*argv, "replay-seqhi") == 0) {
+			NEXT_ARG();
+			if (get_u32(&replay.state.seq_hi, *argv, 0))
+				invarg("\"replay-seqhi\" value is invalid", *argv);
+		} else if (strcmp(*argv, "replay-oseqhi") == 0) {
+			NEXT_ARG();
+			if (get_u32(&replay.state.oseq_hi, *argv, 0))
+				invarg("\"replay-oseqhi\" value is invalid", *argv);
 		} else if (strcmp(*argv, "flag") == 0) {
 			NEXT_ARG();
 			xfrm_state_flag_parse(&req.xsinfo.flags, &argc, &argv);
@@ -434,9 +452,12 @@ parse_algo:
 		argc--; argv++;
 	}
 
-	if (replay.seq || replay.oseq)
-		addattr_l(&req.n, sizeof(req.buf), XFRMA_REPLAY_VAL,
+	if (replay.state.replay_window || replay.state.seq || replay.state.oseq ||
+	    replay.state.seq_hi || replay.state.oseq_hi ) {
+
+		addattr_l(&req.n, sizeof(req.buf), XFRMA_REPLAY_ESN_VAL,
 			  (void *)&replay, sizeof(replay));
+	}
 
 	if (!idp) {
 		fprintf(stderr, "Not enough information: \"ID\" is required\n");
-- 
1.7.0.4

^ permalink raw reply related

* Re: iwl3945: regression - unregister_netdevice: waiting for wlan0 to become free. Usage count = 1
From: Eric Dumazet @ 2010-11-22 11:19 UTC (permalink / raw)
  To: Michal Hocko, David Miller
  Cc: linux-wireless, Reinette Chatre, Wey-Yi Guy, Intel Linux Wireless,
	LKML, netdev
In-Reply-To: <20101122104341.GA3897@tiehlicka.suse.cz>

Le lundi 22 novembre 2010 à 11:43 +0100, Michal Hocko a écrit :
> Hi,
> I am experiencing iwl3945 driver reference counting problem with the current
> 2.6.37-rc2 kernel release. 
> 
> The problem can be easily reproduced by associating to an AP (I have
> tried only wep and wpa based auth) and then rmmod iw3945 which gets
> stuck in unregister_netdevice (in the endless loop as there is no way -
> I guess - to decrease the reference count to 0) spitting the following
> message:
> 
> unregister_netdevice: waiting for wlan0 to become free. Usage count = 1
> 
> The situation is even worse as the rmmod process sits on the CPU and
> cannot be killed and the loop never ends so the only solution is
> rebooting.
> 
> 2.6.37-rc1 looks good. I have tried to bisect to the culprit but I am
> failing to boot some of the kernels on the way (due to some unrelated
> issue which is already fixed in rc2). This is a partial bisect log
> (maybe it helps to narrow down the range of commits):
> 
> $ git bisect log
> git bisect start
> # bad: [e53beacd23d9cb47590da6a7a7f6d417b941a994] Linux 2.6.37-rc2
> git bisect bad e53beacd23d9cb47590da6a7a7f6d417b941a994
> # good: [151f52f09c5728ecfdd0c289da1a4b30bb416f2c] ipw2x00: remove the right /proc/net entry
> git bisect good 151f52f09c5728ecfdd0c289da1a4b30bb416f2c
> # good: [891cbd30ef456664e50bbd28436ef3006a81cf7c] Merge branch 'upstream/core' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen
> git bisect good 891cbd30ef456664e50bbd28436ef3006a81cf7c
> # good: [80ef913f5e6a84551545016cea709f5e96d0cda6] Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jgarzik/libata-dev
> git bisect good 80ef913f5e6a84551545016cea709f5e96d0cda6
> # bad: [c22cff08db00ef0411be088956d7934681a1f988] Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6
> git bisect bad c22cff08db00ef0411be088956d7934681a1f988
> # skip: [8877870f8a8127b653f8c9a55c6b4de9f96f639b] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6
> git bisect skip 8877870f8a8127b653f8c9a55c6b4de9f96f639b
> # skip: [75e6047431872dadd0b13503b374e48ccd71a507] ucc_geth: Fix deadlock
> git bisect skip 75e6047431872dadd0b13503b374e48ccd71a507
> # skip: [18943d292facbc70e6a36fc62399ae833f64671b] inet: fix ip_mc_drop_socket()
> git bisect skip 18943d292facbc70e6a36fc62399ae833f64671b
> [Here I gave up]
> 
> skipped steps are non-bootable kernels. From the log it looks like the last
> good commit is 80ef913f5e6a845.
> 
> There is only one patch in that range for iwlwifi drivers:
> $ git shortlog 80ef913f5e6a845..v2.6.37-rc2 -- drivers/net/wireless/iwlwifi/
> Wey-Yi Guy (1):
>       iwlwifi: dont use pci_dev before it being assign
> 
> and this one doesn't look to manipulate the reference counting so I
> guess this is more general problem.
> 
> $ git shortlog 80ef913f5e6a845..v2.6.37-rc2 -- net/wireless/
> Felix Fietkau (1):
>       cfg80211: fix a crash in dev lookup on dump commands
> 
> Doesn't look to be the culprit as well, though. Btw. I am getting the
> same patches also in the v2.6.37-rc1..v2.6.37-rc2 range.
> 
> Please let me know if you need any further information.
> Thanks!


Oh well, it seems David put the fix in net-next-2.6 instead of net-2.6

Please try :

http://git.kernel.org/?p=linux/kernel/git/davem/net-next-2.6.git;a=commitdiff;h=9d82ca98f71fd686ef2f3017c5e3e6a4871b6e46

Thanks

^ permalink raw reply

* [PATCH net-next-2.6]: rtnl: make link af-specific updates atomic
From: Thomas Graf @ 2010-11-22 11:31 UTC (permalink / raw)
  To: davem; +Cc: netdev

As David pointed out correctly, updates to af-specific attributes
are currently not atomic. If multiple changes are requested and
one of them fails, previous updates may have been applied already
leaving the link behind in a undefined state.

This patch splits the function parse_link_af() into two functions
validate_link_af() and set_link_at(). validate_link_af() is placed
to validate_linkmsg() check for errors as early as possible before
any changes to the link have been made. set_link_af() is called to
commit the changes later.

This method is not fail proof, while it is currently sufficient
to make set_link_af() inerrable and thus 100% atomic, the
validation function method will not be able to detect all error
scenarios in the future, there will likely always be errors
depending on states which are f.e. not protected by rtnl_mutex
and thus may change between validation and setting.

Also, instead of silently ignoring unknown address families and
config blocks for address families which did not register a set
function the errors EAFNOSUPPORT respectively EOPNOSUPPORT are
returned to avoid comitting 4 out of 5 update requests without
notifying the user.

Signed-off-by: Thomas Graf <tgraf@infradead.org>

Index: net-next-2.6/include/net/rtnetlink.h
===================================================================
--- net-next-2.6.orig/include/net/rtnetlink.h
+++ net-next-2.6/include/net/rtnetlink.h
@@ -92,8 +92,10 @@ extern void	rtnl_link_unregister(struct 
  * 		       specific netlink attributes.
  * 	@get_link_af_size: Function to calculate size of address family specific
  * 			   netlink attributes exlusive the container attribute.
- * 	@parse_link_af: Function to parse a IFLA_AF_SPEC attribute and modify
- *			net_device accordingly.
+ *	@validate_link_af: Validate a IFLA_AF_SPEC attribute, must check attr
+ *			   for invalid configuration settings.
+ * 	@set_link_af: Function to parse a IFLA_AF_SPEC attribute and modify
+ *		      net_device accordingly.
  */
 struct rtnl_af_ops {
 	struct list_head	list;
@@ -103,8 +105,10 @@ struct rtnl_af_ops {
 						const struct net_device *dev);
 	size_t			(*get_link_af_size)(const struct net_device *dev);
 
-	int			(*parse_link_af)(struct net_device *dev,
-						 const struct nlattr *attr);
+	int			(*validate_link_af)(const struct net_device *dev,
+						    const struct nlattr *attr);
+	int			(*set_link_af)(struct net_device *dev,
+					       const struct nlattr *attr);
 };
 
 extern int	__rtnl_af_register(struct rtnl_af_ops *ops);
Index: net-next-2.6/net/core/rtnetlink.c
===================================================================
--- net-next-2.6.orig/net/core/rtnetlink.c
+++ net-next-2.6/net/core/rtnetlink.c
@@ -1107,6 +1107,28 @@ static int validate_linkmsg(struct net_d
 			return -EINVAL;
 	}
 
+	if (tb[IFLA_AF_SPEC]) {
+		struct nlattr *af;
+		int rem, err;
+
+		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
+			const struct rtnl_af_ops *af_ops;
+
+			if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+				return -EAFNOSUPPORT;
+
+			if (!af_ops->set_link_af)
+				return -EOPNOTSUPP;
+
+			if (af_ops->validate_link_af) {
+				err = af_ops->validate_link_af(dev,
+			    				tb[IFLA_AF_SPEC]);
+				if (err < 0)
+					return err;
+			}
+		}
+	}
+
 	return 0;
 }
 
@@ -1356,12 +1378,9 @@ static int do_setlink(struct net_device 
 			const struct rtnl_af_ops *af_ops;
 
 			if (!(af_ops = rtnl_af_lookup(nla_type(af))))
-				continue;
-
-			if (!af_ops->parse_link_af)
-				continue;
+				BUG();
 
-			err = af_ops->parse_link_af(dev, af);
+			err = af_ops->set_link_af(dev, af);
 			if (err < 0)
 				goto errout;
 
Index: net-next-2.6/net/ipv4/devinet.c
===================================================================
--- net-next-2.6.orig/net/ipv4/devinet.c
+++ net-next-2.6/net/ipv4/devinet.c
@@ -1289,14 +1289,14 @@ static const struct nla_policy inet_af_p
 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
 };
 
-static int inet_parse_link_af(struct net_device *dev, const struct nlattr *nla)
+static int inet_validate_link_af(const struct net_device *dev,
+				 const struct nlattr *nla)
 {
-	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
 	int err, rem;
 
-	if (!in_dev)
-		return -EOPNOTSUPP;
+	if (dev && !__in_dev_get_rcu(dev))
+		return -EAFNOSUPPORT;
 
 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
 	if (err < 0)
@@ -1314,6 +1314,21 @@ static int inet_parse_link_af(struct net
 		}
 	}
 
+	return 0;
+}
+
+static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct nlattr *a, *tb[IFLA_INET_MAX+1];
+	int rem;
+
+	if (!in_dev)
+		return -EAFNOSUPPORT;
+
+	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
+		BUG();
+
 	if (tb[IFLA_INET_CONF]) {
 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
@@ -1689,7 +1704,8 @@ static struct rtnl_af_ops inet_af_ops = 
 	.family		  = AF_INET,
 	.fill_link_af	  = inet_fill_link_af,
 	.get_link_af_size = inet_get_link_af_size,
-	.parse_link_af	  = inet_parse_link_af,
+	.validate_link_af = inet_validate_link_af,
+	.set_link_af	  = inet_set_link_af,
 };
 
 void __init devinet_init(void)
Index: net-next-2.6/net/ipv6/addrconf.c
===================================================================
--- net-next-2.6.orig/net/ipv6/addrconf.c
+++ net-next-2.6/net/ipv6/addrconf.c
@@ -3956,11 +3956,6 @@ static int inet6_fill_link_af(struct sk_
 	return 0;
 }
 
-static int inet6_parse_link_af(struct net_device *dev, const struct nlattr *nla)
-{
-	return -EOPNOTSUPP;
-}
-
 static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 			     u32 pid, u32 seq, int event, unsigned int flags)
 {
@@ -4670,7 +4665,6 @@ static struct rtnl_af_ops inet6_ops = {
 	.family		  = AF_INET6,
 	.fill_link_af	  = inet6_fill_link_af,
 	.get_link_af_size = inet6_get_link_af_size,
-	.parse_link_af	  = inet6_parse_link_af,
 };
 
 /*

^ permalink raw reply

* Re: [PATCH 2/2 v7] xps: Transmit Packet Steering
From: Changli Gao @ 2010-11-22 11:42 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev, eric.dumazet
In-Reply-To: <alpine.DEB.2.00.1011211501430.14906@pokey.mtv.corp.google.com>

On Mon, Nov 22, 2010 at 7:17 AM, Tom Herbert <therbert@google.com> wrote:
> This patch implements transmit packet steering (XPS) for multiqueue
> devices.  XPS selects a transmit queue during packet transmission based
> on configuration.  This is done by mapping the CPU transmitting the
> packet to a queue.  This is the transmit side analogue to RPS-- where
> RPS is selecting a CPU based on receive queue, XPS selects a queue
> based on the CPU (previously there was an XPS patch from Eric
> Dumazet, but that might more appropriately be called transmit completion
> steering).
>
> Each transmit queue can be associated with a number of CPUs which will
> use the queue to send packets.  This is configured as a CPU mask on a
> per queue basis in:
>
> /sys/class/net/eth<n>/queues/tx-<n>/xps_cpus
>
> The mappings are stored per device in an inverted data structure that
> maps CPUs to queues.  In the netdevice structure this is an array of
> num_possible_cpu structures where each structure holds and array of
> queue_indexes for queues which that CPU can use.
>
> The benefits of XPS are improved locality in the per queue data
> structures.  Also, transmit completions are more likely to be done
> nearer to the sending thread, so this should promote locality back
> to the socket on free (e.g. UDP).  The benefits of XPS are dependent on
> cache hierarchy, application load, and other factors.  XPS would
> nominally be configured so that a queue would only be shared by CPUs
> which are sharing a cache, the degenerative configuration woud be that
> each CPU has it's own queue.
>
> Below are some benchmark results which show the potential benfit of
> this patch.  The netperf test has 500 instances of netperf TCP_RR test
> with 1 byte req. and resp.
>
> bnx2x on 16 core AMD
>   XPS (16 queues, 1 TX queue per CPU)  1234K at 100% CPU
>   No XPS (16 queues)                   996K at 100% CPU
>
> Signed-off-by: Tom Herbert <therbert@google.com>
> ---
>  include/linux/netdevice.h |   30 ++++
>  net/core/dev.c            |   53 ++++++-
>  net/core/net-sysfs.c      |  369 ++++++++++++++++++++++++++++++++++++++++++++-
>  net/core/net-sysfs.h      |    3 +
>  4 files changed, 447 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index b45c1b8..badf928 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -503,6 +503,10 @@ struct netdev_queue {
>        struct Qdisc            *qdisc;
>        unsigned long           state;
>        struct Qdisc            *qdisc_sleeping;
> +#ifdef CONFIG_RPS
> +       struct kobject          kobj;
> +#endif
> +

Why do you reuse CONFIG_RPS? I think it is confusing, as the code
enclosed is for XPS not RPS.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply

* Re: linux-next: Tree for November 18 (netfilter)
From: KOVACS Krisztian @ 2010-11-22 12:14 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: Randy Dunlap, Stephen Rothwell, netfilter-devel, linux-next, LKML,
	netdev, Balazs Scheidler
In-Reply-To: <4CE5712B.2080909@trash.net>

Hi,

On Thu, 2010-11-18 at 19:32 +0100, Patrick McHardy wrote:
> Krisztian, Balazs, could you please have a look at this? If the
> intention is to use NF_DEFRAG_IPV6 without conntrack, we probably
> need a couple of ifdefs.

Indeed, we were missing quite a few of those ifdefs... The patch below
seems to fix the issue for me.

commit ec0ac6f3e7749e25f481c1e0f75766974820fe84
Author: KOVACS Krisztian <hidden@balabit.hu>
Date:   Mon Nov 22 13:07:15 2010 +0100

    netfilter: fix compilation when conntrack is disabled but tproxy is
enabled
    
    The IPv6 tproxy patches split IPv6 defragmentation off of conntrack,
but
    failed to update the #ifdef stanzas guarding the defragmentation
related
    fields and code in skbuff and conntrack related code in
nf_defrag_ipv6.c.
    
    This patch adds the required #ifdefs so that IPv6 tproxy can truly
be used
    without connection tracking.
    
    Original report:
    http://marc.info/?l=linux-netdev&m=129010118516341&w=2
    
    Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
    Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e6ba898..4f2db79 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -255,6 +255,11 @@ typedef unsigned int sk_buff_data_t;
 typedef unsigned char *sk_buff_data_t;
 #endif
 
+#if defined(CONFIG_NF_DEFRAG_IPV4) ||
defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \
+    defined(CONFIG_NF_DEFRAG_IPV6) ||
defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
+#define NET_SKBUFF_NF_DEFRAG_NEEDED 1
+#endif
+
 /** 
  *	struct sk_buff - socket buffer
  *	@next: Next buffer in list
@@ -362,6 +367,8 @@ struct sk_buff {
 	void			(*destructor)(struct sk_buff *skb);
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct nf_conntrack	*nfct;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	struct sk_buff		*nfct_reasm;
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -2051,6 +2058,8 @@ static inline void nf_conntrack_get(struct
nf_conntrack *nfct)
 	if (nfct)
 		atomic_inc(&nfct->use);
 }
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
 {
 	if (skb)
@@ -2079,6 +2088,8 @@ static inline void nf_reset(struct sk_buff *skb)
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(skb->nfct);
 	skb->nfct = NULL;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	nf_conntrack_put_reasm(skb->nfct_reasm);
 	skb->nfct_reasm = NULL;
 #endif
@@ -2095,6 +2106,8 @@ static inline void __nf_copy(struct sk_buff *dst,
const struct sk_buff *src)
 	dst->nfct = src->nfct;
 	nf_conntrack_get(src->nfct);
 	dst->nfctinfo = src->nfctinfo;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	dst->nfct_reasm = src->nfct_reasm;
 	nf_conntrack_get_reasm(src->nfct_reasm);
 #endif
@@ -2108,6 +2121,8 @@ static inline void nf_copy(struct sk_buff *dst,
const struct sk_buff *src)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(dst->nfct);
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	nf_conntrack_put_reasm(dst->nfct_reasm);
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 1ee717e..a4c9936 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -7,16 +7,6 @@ extern struct nf_conntrack_l4proto
nf_conntrack_l4proto_tcp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
 
-extern int nf_ct_frag6_init(void);
-extern void nf_ct_frag6_cleanup(void);
-extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32
user);
-extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff
*skb,
-			       struct net_device *in,
-			       struct net_device *out,
-			       int (*okfn)(struct sk_buff *));
-
-struct inet_frags_ctl;
-
 #include <linux/sysctl.h>
 extern struct ctl_table nf_ct_ipv6_sysctl_table[];
 
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 94dd54d..fd79c9a 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -3,4 +3,14 @@
 
 extern void nf_defrag_ipv6_enable(void);
 
+extern int nf_ct_frag6_init(void);
+extern void nf_ct_frag6_cleanup(void);
+extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32
user);
+extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff
*skb,
+			       struct net_device *in,
+			       struct net_device *out,
+			       int (*okfn)(struct sk_buff *));
+
+struct inet_frags_ctl;
+
 #endif /* _NF_DEFRAG_IPV6_H */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 104f844..74ebf4b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -380,6 +380,8 @@ static void skb_release_head_state(struct sk_buff
*skb)
 	}
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(skb->nfct);
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	nf_conntrack_put_reasm(skb->nfct_reasm);
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 99abfb5..97c5b21 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -19,13 +19,15 @@
 
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_bridge.h>
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#endif
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 
 static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
@@ -33,8 +35,10 @@ static enum ip6_defrag_users
nf_ct6_defrag_user(unsigned int hooknum,
 {
 	u16 zone = NF_CT_DEFAULT_ZONE;
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	if (skb->nfct)
 		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+#endif
 
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge &&
@@ -56,9 +60,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 {
 	struct sk_buff *reasm;
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	/* Previously seen (loopback)?	*/
 	if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
 		return NF_ACCEPT;
+#endif
 
 	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
 	/* queued */



^ permalink raw reply related

* Re: linux-next: Tree for November 18 (netfilter)
From: KOVACS Krisztian @ 2010-11-22 12:28 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: Randy Dunlap, Stephen Rothwell, netfilter-devel, linux-next, LKML,
	netdev, Balazs Scheidler
In-Reply-To: <1290428077.725556.3.camel@nienna.balabit>

Hi,

On Mon, 2010-11-22 at 13:14 +0100, KOVACS Krisztian wrote:
> Indeed, we were missing quite a few of those ifdefs... The patch below
> seems to fix the issue for me.
> 
> commit ec0ac6f3e7749e25f481c1e0f75766974820fe84
> Author: KOVACS Krisztian <hidden@balabit.hu>
> Date:   Mon Nov 22 13:07:15 2010 +0100

Bah, it seems the patch got line-wrapped by my MUA, here it is again.
Let's hope I got it right this time...

commit ec0ac6f3e7749e25f481c1e0f75766974820fe84
Author: KOVACS Krisztian <hidden@balabit.hu>
Date:   Mon Nov 22 13:07:15 2010 +0100

    netfilter: fix compilation when conntrack is disabled but tproxy is enabled
    
    The IPv6 tproxy patches split IPv6 defragmentation off of conntrack, but
    failed to update the #ifdef stanzas guarding the defragmentation related
    fields and code in skbuff and conntrack related code in nf_defrag_ipv6.c.
    
    This patch adds the required #ifdefs so that IPv6 tproxy can truly be used
    without connection tracking.
    
    Original report:
    http://marc.info/?l=linux-netdev&m=129010118516341&w=2
    
    Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
    Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e6ba898..4f2db79 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -255,6 +255,11 @@ typedef unsigned int sk_buff_data_t;
 typedef unsigned char *sk_buff_data_t;
 #endif
 
+#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \
+    defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
+#define NET_SKBUFF_NF_DEFRAG_NEEDED 1
+#endif
+
 /** 
  *	struct sk_buff - socket buffer
  *	@next: Next buffer in list
@@ -362,6 +367,8 @@ struct sk_buff {
 	void			(*destructor)(struct sk_buff *skb);
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct nf_conntrack	*nfct;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	struct sk_buff		*nfct_reasm;
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -2051,6 +2058,8 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
 	if (nfct)
 		atomic_inc(&nfct->use);
 }
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
 {
 	if (skb)
@@ -2079,6 +2088,8 @@ static inline void nf_reset(struct sk_buff *skb)
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(skb->nfct);
 	skb->nfct = NULL;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	nf_conntrack_put_reasm(skb->nfct_reasm);
 	skb->nfct_reasm = NULL;
 #endif
@@ -2095,6 +2106,8 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 	dst->nfct = src->nfct;
 	nf_conntrack_get(src->nfct);
 	dst->nfctinfo = src->nfctinfo;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	dst->nfct_reasm = src->nfct_reasm;
 	nf_conntrack_get_reasm(src->nfct_reasm);
 #endif
@@ -2108,6 +2121,8 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(dst->nfct);
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	nf_conntrack_put_reasm(dst->nfct_reasm);
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 1ee717e..a4c9936 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -7,16 +7,6 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
 
-extern int nf_ct_frag6_init(void);
-extern void nf_ct_frag6_cleanup(void);
-extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
-extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
-			       struct net_device *in,
-			       struct net_device *out,
-			       int (*okfn)(struct sk_buff *));
-
-struct inet_frags_ctl;
-
 #include <linux/sysctl.h>
 extern struct ctl_table nf_ct_ipv6_sysctl_table[];
 
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 94dd54d..fd79c9a 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -3,4 +3,14 @@
 
 extern void nf_defrag_ipv6_enable(void);
 
+extern int nf_ct_frag6_init(void);
+extern void nf_ct_frag6_cleanup(void);
+extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
+extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
+			       struct net_device *in,
+			       struct net_device *out,
+			       int (*okfn)(struct sk_buff *));
+
+struct inet_frags_ctl;
+
 #endif /* _NF_DEFRAG_IPV6_H */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 104f844..74ebf4b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -380,6 +380,8 @@ static void skb_release_head_state(struct sk_buff *skb)
 	}
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(skb->nfct);
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	nf_conntrack_put_reasm(skb->nfct_reasm);
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 99abfb5..97c5b21 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -19,13 +19,15 @@
 
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_bridge.h>
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#endif
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 
 static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
@@ -33,8 +35,10 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
 {
 	u16 zone = NF_CT_DEFAULT_ZONE;
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	if (skb->nfct)
 		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+#endif
 
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge &&
@@ -56,9 +60,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 {
 	struct sk_buff *reasm;
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	/* Previously seen (loopback)?	*/
 	if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
 		return NF_ACCEPT;
+#endif
 
 	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
 	/* queued */

^ permalink raw reply related

* Re: iwl3945: regression - unregister_netdevice: waiting for wlan0 to become free. Usage count = 1
From: Michal Hocko @ 2010-11-22 12:46 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, linux-wireless, Reinette Chatre, Wey-Yi Guy,
	Intel Linux Wireless, LKML, netdev
In-Reply-To: <1290424776.2811.32.camel@edumazet-laptop>

On Mon 22-11-10 12:19:36, Eric Dumazet wrote:
> Le lundi 22 novembre 2010 ?? 11:43 +0100, Michal Hocko a ??crit :
> > Hi,
> > I am experiencing iwl3945 driver reference counting problem with the current
> > 2.6.37-rc2 kernel release. 
> > 
> > The problem can be easily reproduced by associating to an AP (I have
> > tried only wep and wpa based auth) and then rmmod iw3945 which gets
> > stuck in unregister_netdevice (in the endless loop as there is no way -
> > I guess - to decrease the reference count to 0) spitting the following
> > message:
> > 
> > unregister_netdevice: waiting for wlan0 to become free. Usage count = 1
> > 
[...]
> 
> 
> Oh well, it seems David put the fix in net-next-2.6 instead of net-2.6
> 
> Please try :
> 
> http://git.kernel.org/?p=linux/kernel/git/davem/net-next-2.6.git;a=commitdiff;h=9d82ca98f71fd686ef2f3017c5e3e6a4871b6e46

Yes, the patch really fixes the issue. Thanks a lot!
Btw. I have checked -rc3 which doesn't conttain the fix as well.

> 
> Thanks
> 
> 

-- 
Michal Hocko
L3 team 
SUSE LINUX s.r.o.
Lihovarska 1060/12
190 00 Praha 9    
Czech Republic

^ permalink raw reply

* [PATCH] ks8842: Fix TX cache flush issue
From: Richard Röjfors @ 2010-11-22 12:50 UTC (permalink / raw)
  To: netdev; +Cc: davem, ferringb

This patch fixes a cache sync issue found in MeeGo 1.1.

It was found that bytes after the first 64 of the TX buffer was not
flushed from the cache correctly.

The patch switches out kmalloc/dma_map_single/dma_sync_single_for_device
to dma_alloc_coherent.

Signed-off-by: Richard Röjfors <richard.rojfors@pelagicore.com>
---
diff --git a/drivers/net/ks8842.c b/drivers/net/ks8842.c
index 928b2b8..55a11ba 100644
--- a/drivers/net/ks8842.c
+++ b/drivers/net/ks8842.c
@@ -449,10 +449,6 @@ static int ks8842_tx_frame_dma(struct sk_buff *skb, struct net_device *netdev)
 	*buf++ = (skb->len >> 8) & 0xff;
 	skb_copy_from_linear_data(skb, buf, skb->len);
 
-	dma_sync_single_range_for_device(adapter->dev,
-		sg_dma_address(&ctl->sg), 0, sg_dma_len(&ctl->sg),
-		DMA_TO_DEVICE);
-
 	/* make sure the length is a multiple of 4 */
 	if (sg_dma_len(&ctl->sg) % 4)
 		sg_dma_len(&ctl->sg) += 4 - sg_dma_len(&ctl->sg) % 4;
@@ -908,12 +904,10 @@ static void ks8842_dealloc_dma_bufs(struct ks8842_adapter *adapter)
 
 	tasklet_kill(&rx_ctl->tasklet);
 
-	if (sg_dma_address(&tx_ctl->sg))
-		dma_unmap_single(adapter->dev, sg_dma_address(&tx_ctl->sg),
-			DMA_BUFFER_SIZE, DMA_TO_DEVICE);
+	if (tx_ctl->buf)
+		dma_free_coherent(adapter->dev, DMA_BUFFER_SIZE,
+			tx_ctl->buf, sg_dma_address(&tx_ctl->sg));
 	sg_dma_address(&tx_ctl->sg) = 0;
-
-	kfree(tx_ctl->buf);
 	tx_ctl->buf = NULL;
 }
 
@@ -945,21 +939,13 @@ static int ks8842_alloc_dma_bufs(struct net_device *netdev)
 	}
 
 	/* allocate DMA buffer */
-	tx_ctl->buf = kmalloc(DMA_BUFFER_SIZE, GFP_KERNEL);
+	tx_ctl->buf = dma_alloc_coherent(adapter->dev, DMA_BUFFER_SIZE,
+		&sg_dma_address(&tx_ctl->sg), GFP_KERNEL);
 	if (!tx_ctl->buf) {
 		err = -ENOMEM;
 		goto err;
 	}
 
-	sg_dma_address(&tx_ctl->sg) = dma_map_single(adapter->dev,
-		tx_ctl->buf, DMA_BUFFER_SIZE, DMA_TO_DEVICE);
-	err = dma_mapping_error(adapter->dev,
-		sg_dma_address(&tx_ctl->sg));
-	if (err) {
-		sg_dma_address(&tx_ctl->sg) = 0;
-		goto err;
-	}
-
 	rx_ctl->chan = dma_request_channel(mask, ks8842_dma_filter_fn,
 					   (void *)(long)rx_ctl->channel);
 	if (!rx_ctl->chan) {


^ permalink raw reply related

* [PATCH] netns: Don't leak others' openreq-s in proc
From: Pavel Emelyanov @ 2010-11-22 13:26 UTC (permalink / raw)
  To: David Miller, Linux Netdev List

The /proc/net/tcp leaks openreq sockets from other namespaces.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>

---

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 69ccbc1..e13da6d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2043,7 +2043,9 @@ get_req:
 	}
 get_sk:
 	sk_nulls_for_each_from(sk, node) {
-		if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
+		if (!net_eq(sock_net(sk), net))
+			continue;
+		if (sk->sk_family == st->family) {
 			cur = sk;
 			goto out;
 		}

^ permalink raw reply related

* Re: [PATCH 2/2 v7] xps: Transmit Packet Steering
From: Eric Dumazet @ 2010-11-22 13:33 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev
In-Reply-To: <alpine.DEB.2.00.1011211501430.14906@pokey.mtv.corp.google.com>

Le dimanche 21 novembre 2010 à 15:17 -0800, Tom Herbert a écrit :

...

> +
> +static DEFINE_MUTEX(xps_map_mutex);
> +
> +static ssize_t store_xps_map(struct netdev_queue *queue,
> +		      struct netdev_queue_attribute *attribute,
> +		      const char *buf, size_t len)
> +{
> +	struct net_device *dev = queue->dev;
> +	cpumask_var_t mask;
> +	int err, i, cpu, pos, map_len, alloc_len, need_set;
> +	unsigned long index;
> +	struct xps_map *map, *new_map;
> +	struct xps_dev_maps *dev_maps, *new_dev_maps;
> +	int nonempty = 0;
> +
> +	if (!capable(CAP_NET_ADMIN))
> +		return -EPERM;
> +
> +	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
> +		return -ENOMEM;
> +
> +	index = get_netdev_queue_index(queue);
> +
> +	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
> +	if (err) {
> +		free_cpumask_var(mask);
> +		return err;
> +	}
> +
> +	new_dev_maps = kzalloc(max_t(unsigned,
> +	    XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
> +	if (!new_dev_maps) {
> +		free_cpumask_var(mask);
> +		return -ENOMEM;
> +	}
> +
> +	mutex_lock(&xps_map_mutex);
> +
> +	dev_maps = dev->xps_maps;
> +
> +	for_each_possible_cpu(cpu) {
> +		new_map = map = dev_maps ? dev_maps->cpu_map[cpu] : NULL;
> +
> +		if (map) {
> +			for (pos = 0; pos < map->len; pos++)
> +				if (map->queues[pos] == index)
> +					break;
> +			map_len = map->len;
> +			alloc_len = map->alloc_len;
> +		} else
> +			pos = map_len = alloc_len = 0;
> +
> +		need_set = cpu_isset(cpu, *mask) && cpu_online(cpu);
> +
> +		if (need_set && pos >= map_len) {
> +			/* Need to add queue to this CPU's map */
> +			if (map_len >= alloc_len) {
> +				alloc_len = alloc_len ?
> +				    2 * alloc_len : XPS_MIN_MAP_ALLOC;
> +				new_map = kzalloc(XPS_MAP_SIZE(alloc_len),
> +				    GFP_KERNEL);

		kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)) 

to get memory close to the cpu ?

> +				if (!new_map)
> +					goto error;
> +				new_map->alloc_len = alloc_len;
> +				for (i = 0; i < map_len; i++)
> +					new_map->queues[i] = map->queues[i];
> +				new_map->len = map_len;
> +			}
> +			new_map->queues[new_map->len++] = index;
> +		} else if (!need_set && pos < map_len) {
> +			/* Need to remove queue from this CPU's map */
> +			if (map_len > 1)
> +				new_map->queues[pos] =
> +				    new_map->queues[--new_map->len];
> +			else
> +				new_map = NULL;
> +		}
> +		new_dev_maps->cpu_map[cpu] = new_map;
> +	}
> +
> +	/* Cleanup old maps */
> +	for_each_possible_cpu(cpu) {
> +		map = dev_maps ? dev_maps->cpu_map[cpu] : NULL;
> +		if (map && new_dev_maps->cpu_map[cpu] != map)
> +			call_rcu(&map->rcu, xps_map_release);
> +		if (new_dev_maps->cpu_map[cpu])
> +			nonempty = 1;
> +	}
> +


^ permalink raw reply

* Re: [PATCH] arch/tile: fix rwlock so would-be write lockers don't block new readers
From: Chris Metcalf @ 2010-11-22 13:35 UTC (permalink / raw)
  To: Cypher Wu; +Cc: linux-kernel, Américo Wang, Eric Dumazet, netdev
In-Reply-To: <AANLkTikGj+zG9OqVJseKh4fGqQNnqnJpHgcaaXOL8nNi@mail.gmail.com>

On 11/22/2010 12:39 AM, Cypher Wu wrote:
> 2010/11/15 Chris Metcalf <cmetcalf@tilera.com>:
>> This avoids a deadlock in the IGMP code where one core gets a read
>> lock, another core starts trying to get a write lock (thus blocking
>> new readers), and then the first core tries to recursively re-acquire
>> the read lock.
>>
>> We still try to preserve some degree of balance by giving priority
>> to additional write lockers that come along while the lock is held
>> for write, so they can all complete quickly and return the lock to
>> the readers.
>>
>> Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
>> ---
>> This should apply relatively cleanly to 2.6.26.7 source code too.
>> [...]
>
> I've finished my business trip and tested that patch for more than an
> hour and it works. The test is still running now.
>
> But it seems there still has a potential problem: we used ticket lock
> for write_lock(), and if there are so many write_lock() occurred, is
> 256 ticket enough for 64 or even more cores to avoiding overflow?
> Since is we try to write_unlock() and there's already write_lock()
> waiting we'll only adding current ticket.

This is OK, since each core can issue at most one (blocking) write_lock(),
and we have only 64 cores.  Future >256 core machines will be based on
TILE-Gx anyway, which doesn't have the 256-core limit since it doesn't use
the spinlock_32.c implementation.

-- 
Chris Metcalf, Tilera Corp.
http://www.tilera.com

^ permalink raw reply

* Re: [PATCN net-next-2.6] drivers/net: use vzalloc()
From: Jon Mason @ 2010-11-22 14:42 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <1290420906.2811.14.camel@edumazet-laptop>

On Mon, Nov 22, 2010 at 02:15:06AM -0800, Eric Dumazet wrote:
> Use vzalloc() and vzalloc_node() in net drivers
>

Acking the vxge portions.

Acked-by: Jon Mason <jon.mason@exar.com>

> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
> People, I hope you dont mind if I make a single patch, and dont Cc all
> maintainers, for such trivial change.
>
>  drivers/net/bnx2.c                   |    9 +------
>  drivers/net/cxgb3/cxgb3_offload.c    |    6 +---
>  drivers/net/cxgb4/cxgb4_main.c       |    6 +---
>  drivers/net/e1000/e1000_main.c       |    6 +---
>  drivers/net/e1000e/netdev.c          |    6 +---
>  drivers/net/ehea/ehea_main.c         |    4 ---
>  drivers/net/igb/igb_main.c           |    6 +---
>  drivers/net/igbvf/netdev.c           |    6 +---
>  drivers/net/ixgb/ixgb_main.c         |    6 +---
>  drivers/net/ixgbe/ixgbe_main.c       |   10 +++-----
>  drivers/net/ixgbevf/ixgbevf_main.c   |    6 +---
>  drivers/net/netxen/netxen_nic_init.c |    6 +---
>  drivers/net/pch_gbe/pch_gbe_main.c   |    6 +---
>  drivers/net/pptp.c                   |    3 --
>  drivers/net/qlcnic/qlcnic_init.c     |    6 +---
>  drivers/net/sfc/filter.c             |    3 --
>  drivers/net/vxge/vxge-config.c       |   31 ++++++-------------------
>  17 files changed, 39 insertions(+), 87 deletions(-)
>
> diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
> index 062600b..0de196d 100644
> --- a/drivers/net/bnx2.c
> +++ b/drivers/net/bnx2.c
> @@ -766,13 +766,10 @@ bnx2_alloc_rx_mem(struct bnx2 *bp)
>                 int j;
>
>                 rxr->rx_buf_ring =
> -                       vmalloc(SW_RXBD_RING_SIZE * bp->rx_max_ring);
> +                       vzalloc(SW_RXBD_RING_SIZE * bp->rx_max_ring);
>                 if (rxr->rx_buf_ring == NULL)
>                         return -ENOMEM;
>
> -               memset(rxr->rx_buf_ring, 0,
> -                      SW_RXBD_RING_SIZE * bp->rx_max_ring);
> -
>                 for (j = 0; j < bp->rx_max_ring; j++) {
>                         rxr->rx_desc_ring[j] =
>                                 dma_alloc_coherent(&bp->pdev->dev,
> @@ -785,13 +782,11 @@ bnx2_alloc_rx_mem(struct bnx2 *bp)
>                 }
>
>                 if (bp->rx_pg_ring_size) {
> -                       rxr->rx_pg_ring = vmalloc(SW_RXPG_RING_SIZE *
> +                       rxr->rx_pg_ring = vzalloc(SW_RXPG_RING_SIZE *
>                                                   bp->rx_max_pg_ring);
>                         if (rxr->rx_pg_ring == NULL)
>                                 return -ENOMEM;
>
> -                       memset(rxr->rx_pg_ring, 0, SW_RXPG_RING_SIZE *
> -                              bp->rx_max_pg_ring);
>                 }
>
>                 for (j = 0; j < bp->rx_max_pg_ring; j++) {
> diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
> index bcf0753..ef02aa6 100644
> --- a/drivers/net/cxgb3/cxgb3_offload.c
> +++ b/drivers/net/cxgb3/cxgb3_offload.c
> @@ -1164,12 +1164,10 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new)
>   */
>  void *cxgb_alloc_mem(unsigned long size)
>  {
> -       void *p = kmalloc(size, GFP_KERNEL);
> +       void *p = kzalloc(size, GFP_KERNEL);
>
>         if (!p)
> -               p = vmalloc(size);
> -       if (p)
> -               memset(p, 0, size);
> +               p = vzalloc(size);
>         return p;
>  }
>
> diff --git a/drivers/net/cxgb4/cxgb4_main.c b/drivers/net/cxgb4/cxgb4_main.c
> index f50bc98..848f89d 100644
> --- a/drivers/net/cxgb4/cxgb4_main.c
> +++ b/drivers/net/cxgb4/cxgb4_main.c
> @@ -868,12 +868,10 @@ out:      release_firmware(fw);
>   */
>  void *t4_alloc_mem(size_t size)
>  {
> -       void *p = kmalloc(size, GFP_KERNEL);
> +       void *p = kzalloc(size, GFP_KERNEL);
>
>         if (!p)
> -               p = vmalloc(size);
> -       if (p)
> -               memset(p, 0, size);
> +               p = vzalloc(size);
>         return p;
>  }
>
> diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
> index 4686c39..dcb7f82 100644
> --- a/drivers/net/e1000/e1000_main.c
> +++ b/drivers/net/e1000/e1000_main.c
> @@ -1425,13 +1425,12 @@ static int e1000_setup_tx_resources(struct e1000_adapter *adapter,
>         int size;
>
>         size = sizeof(struct e1000_buffer) * txdr->count;
> -       txdr->buffer_info = vmalloc(size);
> +       txdr->buffer_info = vzalloc(size);
>         if (!txdr->buffer_info) {
>                 e_err(probe, "Unable to allocate memory for the Tx descriptor "
>                       "ring\n");
>                 return -ENOMEM;
>         }
> -       memset(txdr->buffer_info, 0, size);
>
>         /* round up to nearest 4K */
>
> @@ -1621,13 +1620,12 @@ static int e1000_setup_rx_resources(struct e1000_adapter *adapter,
>         int size, desc_len;
>
>         size = sizeof(struct e1000_buffer) * rxdr->count;
> -       rxdr->buffer_info = vmalloc(size);
> +       rxdr->buffer_info = vzalloc(size);
>         if (!rxdr->buffer_info) {
>                 e_err(probe, "Unable to allocate memory for the Rx descriptor "
>                       "ring\n");
>                 return -ENOMEM;
>         }
> -       memset(rxdr->buffer_info, 0, size);
>
>         desc_len = sizeof(struct e1000_rx_desc);
>
> diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
> index 9b3f0a9..0adcb79 100644
> --- a/drivers/net/e1000e/netdev.c
> +++ b/drivers/net/e1000e/netdev.c
> @@ -2059,10 +2059,9 @@ int e1000e_setup_tx_resources(struct e1000_adapter *adapter)
>         int err = -ENOMEM, size;
>
>         size = sizeof(struct e1000_buffer) * tx_ring->count;
> -       tx_ring->buffer_info = vmalloc(size);
> +       tx_ring->buffer_info = vzalloc(size);
>         if (!tx_ring->buffer_info)
>                 goto err;
> -       memset(tx_ring->buffer_info, 0, size);
>
>         /* round up to nearest 4K */
>         tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc);
> @@ -2095,10 +2094,9 @@ int e1000e_setup_rx_resources(struct e1000_adapter *adapter)
>         int i, size, desc_len, err = -ENOMEM;
>
>         size = sizeof(struct e1000_buffer) * rx_ring->count;
> -       rx_ring->buffer_info = vmalloc(size);
> +       rx_ring->buffer_info = vzalloc(size);
>         if (!rx_ring->buffer_info)
>                 goto err;
> -       memset(rx_ring->buffer_info, 0, size);
>
>         for (i = 0; i < rx_ring->count; i++) {
>                 buffer_info = &rx_ring->buffer_info[i];
> diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
> index 182b2a7..a84c389 100644
> --- a/drivers/net/ehea/ehea_main.c
> +++ b/drivers/net/ehea/ehea_main.c
> @@ -1496,12 +1496,10 @@ static int ehea_init_q_skba(struct ehea_q_skb_arr *q_skba, int max_q_entries)
>  {
>         int arr_size = sizeof(void *) * max_q_entries;
>
> -       q_skba->arr = vmalloc(arr_size);
> +       q_skba->arr = vzalloc(arr_size);
>         if (!q_skba->arr)
>                 return -ENOMEM;
>
> -       memset(q_skba->arr, 0, arr_size);
> -
>         q_skba->len = max_q_entries;
>         q_skba->index = 0;
>         q_skba->os_skbs = 0;
> diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
> index 892d196..67ea262 100644
> --- a/drivers/net/igb/igb_main.c
> +++ b/drivers/net/igb/igb_main.c
> @@ -2436,10 +2436,9 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring)
>         int size;
>
>         size = sizeof(struct igb_buffer) * tx_ring->count;
> -       tx_ring->buffer_info = vmalloc(size);
> +       tx_ring->buffer_info = vzalloc(size);
>         if (!tx_ring->buffer_info)
>                 goto err;
> -       memset(tx_ring->buffer_info, 0, size);
>
>         /* round up to nearest 4K */
>         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
> @@ -2587,10 +2586,9 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
>         int size, desc_len;
>
>         size = sizeof(struct igb_buffer) * rx_ring->count;
> -       rx_ring->buffer_info = vmalloc(size);
> +       rx_ring->buffer_info = vzalloc(size);
>         if (!rx_ring->buffer_info)
>                 goto err;
> -       memset(rx_ring->buffer_info, 0, size);
>
>         desc_len = sizeof(union e1000_adv_rx_desc);
>
> diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c
> index 4c998b7..8dbde23 100644
> --- a/drivers/net/igbvf/netdev.c
> +++ b/drivers/net/igbvf/netdev.c
> @@ -430,10 +430,9 @@ int igbvf_setup_tx_resources(struct igbvf_adapter *adapter,
>         int size;
>
>         size = sizeof(struct igbvf_buffer) * tx_ring->count;
> -       tx_ring->buffer_info = vmalloc(size);
> +       tx_ring->buffer_info = vzalloc(size);
>         if (!tx_ring->buffer_info)
>                 goto err;
> -       memset(tx_ring->buffer_info, 0, size);
>
>         /* round up to nearest 4K */
>         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
> @@ -470,10 +469,9 @@ int igbvf_setup_rx_resources(struct igbvf_adapter *adapter,
>         int size, desc_len;
>
>         size = sizeof(struct igbvf_buffer) * rx_ring->count;
> -       rx_ring->buffer_info = vmalloc(size);
> +       rx_ring->buffer_info = vzalloc(size);
>         if (!rx_ring->buffer_info)
>                 goto err;
> -       memset(rx_ring->buffer_info, 0, size);
>
>         desc_len = sizeof(union e1000_adv_rx_desc);
>
> diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
> index caa8192..211a169 100644
> --- a/drivers/net/ixgb/ixgb_main.c
> +++ b/drivers/net/ixgb/ixgb_main.c
> @@ -669,13 +669,12 @@ ixgb_setup_tx_resources(struct ixgb_adapter *adapter)
>         int size;
>
>         size = sizeof(struct ixgb_buffer) * txdr->count;
> -       txdr->buffer_info = vmalloc(size);
> +       txdr->buffer_info = vzalloc(size);
>         if (!txdr->buffer_info) {
>                 netif_err(adapter, probe, adapter->netdev,
>                           "Unable to allocate transmit descriptor ring memory\n");
>                 return -ENOMEM;
>         }
> -       memset(txdr->buffer_info, 0, size);
>
>         /* round up to nearest 4K */
>
> @@ -759,13 +758,12 @@ ixgb_setup_rx_resources(struct ixgb_adapter *adapter)
>         int size;
>
>         size = sizeof(struct ixgb_buffer) * rxdr->count;
> -       rxdr->buffer_info = vmalloc(size);
> +       rxdr->buffer_info = vzalloc(size);
>         if (!rxdr->buffer_info) {
>                 netif_err(adapter, probe, adapter->netdev,
>                           "Unable to allocate receive descriptor ring\n");
>                 return -ENOMEM;
>         }
> -       memset(rxdr->buffer_info, 0, size);
>
>         /* Round up to nearest 4K */
>
> diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
> index 5409af3..4249b51 100644
> --- a/drivers/net/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ixgbe/ixgbe_main.c
> @@ -5181,12 +5181,11 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
>         int size;
>
>         size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count;
> -       tx_ring->tx_buffer_info = vmalloc_node(size, tx_ring->numa_node);
> +       tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
>         if (!tx_ring->tx_buffer_info)
> -               tx_ring->tx_buffer_info = vmalloc(size);
> +               tx_ring->tx_buffer_info = vzalloc(size);
>         if (!tx_ring->tx_buffer_info)
>                 goto err;
> -       memset(tx_ring->tx_buffer_info, 0, size);
>
>         /* round up to nearest 4K */
>         tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
> @@ -5246,12 +5245,11 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
>         int size;
>
>         size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
> -       rx_ring->rx_buffer_info = vmalloc_node(size, rx_ring->numa_node);
> +       rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
>         if (!rx_ring->rx_buffer_info)
> -               rx_ring->rx_buffer_info = vmalloc(size);
> +               rx_ring->rx_buffer_info = vzalloc(size);
>         if (!rx_ring->rx_buffer_info)
>                 goto err;
> -       memset(rx_ring->rx_buffer_info, 0, size);
>
>         /* Round up to nearest 4K */
>         rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
> diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c
> index 5b8063c..2216a3c 100644
> --- a/drivers/net/ixgbevf/ixgbevf_main.c
> +++ b/drivers/net/ixgbevf/ixgbevf_main.c
> @@ -2489,10 +2489,9 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_adapter *adapter,
>         int size;
>
>         size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count;
> -       tx_ring->tx_buffer_info = vmalloc(size);
> +       tx_ring->tx_buffer_info = vzalloc(size);
>         if (!tx_ring->tx_buffer_info)
>                 goto err;
> -       memset(tx_ring->tx_buffer_info, 0, size);
>
>         /* round up to nearest 4K */
>         tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
> @@ -2556,14 +2555,13 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter,
>         int size;
>
>         size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count;
> -       rx_ring->rx_buffer_info = vmalloc(size);
> +       rx_ring->rx_buffer_info = vzalloc(size);
>         if (!rx_ring->rx_buffer_info) {
>                 hw_dbg(&adapter->hw,
>                        "Unable to vmalloc buffer memory for "
>                        "the receive descriptor ring\n");
>                 goto alloc_failed;
>         }
> -       memset(rx_ring->rx_buffer_info, 0, size);
>
>         /* Round up to nearest 4K */
>         rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
> diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c
> index 95fe552..f946de2 100644
> --- a/drivers/net/netxen/netxen_nic_init.c
> +++ b/drivers/net/netxen/netxen_nic_init.c
> @@ -214,13 +214,12 @@ int netxen_alloc_sw_resources(struct netxen_adapter *adapter)
>         tx_ring->num_desc = adapter->num_txd;
>         tx_ring->txq = netdev_get_tx_queue(netdev, 0);
>
> -       cmd_buf_arr = vmalloc(TX_BUFF_RINGSIZE(tx_ring));
> +       cmd_buf_arr = vzalloc(TX_BUFF_RINGSIZE(tx_ring));
>         if (cmd_buf_arr == NULL) {
>                 dev_err(&pdev->dev, "%s: failed to allocate cmd buffer ring\n",
>                        netdev->name);
>                 goto err_out;
>         }
> -       memset(cmd_buf_arr, 0, TX_BUFF_RINGSIZE(tx_ring));
>         tx_ring->cmd_buf_arr = cmd_buf_arr;
>
>         recv_ctx = &adapter->recv_ctx;
> @@ -280,7 +279,7 @@ int netxen_alloc_sw_resources(struct netxen_adapter *adapter)
>
>                 }
>                 rds_ring->rx_buf_arr = (struct netxen_rx_buffer *)
> -                       vmalloc(RCV_BUFF_RINGSIZE(rds_ring));
> +                       vzalloc(RCV_BUFF_RINGSIZE(rds_ring));
>                 if (rds_ring->rx_buf_arr == NULL) {
>                         printk(KERN_ERR "%s: Failed to allocate "
>                                 "rx buffer ring %d\n",
> @@ -288,7 +287,6 @@ int netxen_alloc_sw_resources(struct netxen_adapter *adapter)
>                         /* free whatever was already allocated */
>                         goto err_out;
>                 }
> -               memset(rds_ring->rx_buf_arr, 0, RCV_BUFF_RINGSIZE(rds_ring));
>                 INIT_LIST_HEAD(&rds_ring->free_list);
>                 /*
>                  * Now go through all of them, set reference handles
> diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c
> index 472056b..afb7506 100644
> --- a/drivers/net/pch_gbe/pch_gbe_main.c
> +++ b/drivers/net/pch_gbe/pch_gbe_main.c
> @@ -1523,12 +1523,11 @@ int pch_gbe_setup_tx_resources(struct pch_gbe_adapter *adapter,
>         int desNo;
>
>         size = (int)sizeof(struct pch_gbe_buffer) * tx_ring->count;
> -       tx_ring->buffer_info = vmalloc(size);
> +       tx_ring->buffer_info = vzalloc(size);
>         if (!tx_ring->buffer_info) {
>                 pr_err("Unable to allocate memory for the buffer infomation\n");
>                 return -ENOMEM;
>         }
> -       memset(tx_ring->buffer_info, 0, size);
>
>         tx_ring->size = tx_ring->count * (int)sizeof(struct pch_gbe_tx_desc);
>
> @@ -1573,12 +1572,11 @@ int pch_gbe_setup_rx_resources(struct pch_gbe_adapter *adapter,
>         int desNo;
>
>         size = (int)sizeof(struct pch_gbe_buffer) * rx_ring->count;
> -       rx_ring->buffer_info = vmalloc(size);
> +       rx_ring->buffer_info = vzalloc(size);
>         if (!rx_ring->buffer_info) {
>                 pr_err("Unable to allocate memory for the receive descriptor ring\n");
>                 return -ENOMEM;
>         }
> -       memset(rx_ring->buffer_info, 0, size);
>         rx_ring->size = rx_ring->count * (int)sizeof(struct pch_gbe_rx_desc);
>         rx_ring->desc = dma_alloc_coherent(&pdev->dev, rx_ring->size,
>                                            &rx_ring->dma, GFP_KERNEL);
> diff --git a/drivers/net/pptp.c b/drivers/net/pptp.c
> index ccbc913..7556a92 100644
> --- a/drivers/net/pptp.c
> +++ b/drivers/net/pptp.c
> @@ -673,8 +673,7 @@ static int __init pptp_init_module(void)
>         int err = 0;
>         pr_info("PPTP driver version " PPTP_DRIVER_VERSION "\n");
>
> -       callid_sock = __vmalloc((MAX_CALLID + 1) * sizeof(void *),
> -               GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
> +       callid_sock = vzalloc((MAX_CALLID + 1) * sizeof(void *));
>         if (!callid_sock) {
>                 pr_err("PPTP: cann't allocate memory\n");
>                 return -ENOMEM;
> diff --git a/drivers/net/qlcnic/qlcnic_init.c b/drivers/net/qlcnic/qlcnic_init.c
> index 0d180c6..3f97018 100644
> --- a/drivers/net/qlcnic/qlcnic_init.c
> +++ b/drivers/net/qlcnic/qlcnic_init.c
> @@ -236,12 +236,11 @@ int qlcnic_alloc_sw_resources(struct qlcnic_adapter *adapter)
>         tx_ring->num_desc = adapter->num_txd;
>         tx_ring->txq = netdev_get_tx_queue(netdev, 0);
>
> -       cmd_buf_arr = vmalloc(TX_BUFF_RINGSIZE(tx_ring));
> +       cmd_buf_arr = vzalloc(TX_BUFF_RINGSIZE(tx_ring));
>         if (cmd_buf_arr == NULL) {
>                 dev_err(&netdev->dev, "failed to allocate cmd buffer ring\n");
>                 goto err_out;
>         }
> -       memset(cmd_buf_arr, 0, TX_BUFF_RINGSIZE(tx_ring));
>         tx_ring->cmd_buf_arr = cmd_buf_arr;
>
>         recv_ctx = &adapter->recv_ctx;
> @@ -276,13 +275,12 @@ int qlcnic_alloc_sw_resources(struct qlcnic_adapter *adapter)
>                         break;
>                 }
>                 rds_ring->rx_buf_arr = (struct qlcnic_rx_buffer *)
> -                       vmalloc(RCV_BUFF_RINGSIZE(rds_ring));
> +                       vzalloc(RCV_BUFF_RINGSIZE(rds_ring));
>                 if (rds_ring->rx_buf_arr == NULL) {
>                         dev_err(&netdev->dev, "Failed to allocate "
>                                 "rx buffer ring %d\n", ring);
>                         goto err_out;
>                 }
> -               memset(rds_ring->rx_buf_arr, 0, RCV_BUFF_RINGSIZE(rds_ring));
>                 INIT_LIST_HEAD(&rds_ring->free_list);
>                 /*
>                  * Now go through all of them, set reference handles
> diff --git a/drivers/net/sfc/filter.c b/drivers/net/sfc/filter.c
> index 52cb608..44500b5 100644
> --- a/drivers/net/sfc/filter.c
> +++ b/drivers/net/sfc/filter.c
> @@ -428,10 +428,9 @@ int efx_probe_filters(struct efx_nic *efx)
>                                              GFP_KERNEL);
>                 if (!table->used_bitmap)
>                         goto fail;
> -               table->spec = vmalloc(table->size * sizeof(*table->spec));
> +               table->spec = vzalloc(table->size * sizeof(*table->spec));
>                 if (!table->spec)
>                         goto fail;
> -               memset(table->spec, 0, table->size * sizeof(*table->spec));
>         }
>
>         return 0;
> diff --git a/drivers/net/vxge/vxge-config.c b/drivers/net/vxge/vxge-config.c
> index 409c2e6..44d3ddd 100644
> --- a/drivers/net/vxge/vxge-config.c
> +++ b/drivers/net/vxge/vxge-config.c
> @@ -1220,13 +1220,12 @@ vxge_hw_device_initialize(
>                 goto exit;
>
>         hldev = (struct __vxge_hw_device *)
> -                       vmalloc(sizeof(struct __vxge_hw_device));
> +                       vzalloc(sizeof(struct __vxge_hw_device));
>         if (hldev == NULL) {
>                 status = VXGE_HW_ERR_OUT_OF_MEMORY;
>                 goto exit;
>         }
>
> -       memset(hldev, 0, sizeof(struct __vxge_hw_device));
>         hldev->magic = VXGE_HW_DEVICE_MAGIC;
>
>         vxge_hw_device_debug_set(hldev, VXGE_ERR, VXGE_COMPONENT_ALL);
> @@ -2064,15 +2063,12 @@ __vxge_hw_mempool_grow(struct vxge_hw_mempool *mempool, u32 num_allocate,
>                  * allocate new memblock and its private part at once.
>                  * This helps to minimize memory usage a lot. */
>                 mempool->memblocks_priv_arr[i] =
> -                               vmalloc(mempool->items_priv_size * n_items);
> +                               vzalloc(mempool->items_priv_size * n_items);
>                 if (mempool->memblocks_priv_arr[i] == NULL) {
>                         status = VXGE_HW_ERR_OUT_OF_MEMORY;
>                         goto exit;
>                 }
>
> -               memset(mempool->memblocks_priv_arr[i], 0,
> -                            mempool->items_priv_size * n_items);
> -
>                 /* allocate DMA-capable memblock */
>                 mempool->memblocks_arr[i] =
>                         __vxge_hw_blockpool_malloc(mempool->devh,
> @@ -2145,12 +2141,11 @@ __vxge_hw_mempool_create(
>         }
>
>         mempool = (struct vxge_hw_mempool *)
> -                       vmalloc(sizeof(struct vxge_hw_mempool));
> +                       vzalloc(sizeof(struct vxge_hw_mempool));
>         if (mempool == NULL) {
>                 status = VXGE_HW_ERR_OUT_OF_MEMORY;
>                 goto exit;
>         }
> -       memset(mempool, 0, sizeof(struct vxge_hw_mempool));
>
>         mempool->devh                   = devh;
>         mempool->memblock_size          = memblock_size;
> @@ -2170,31 +2165,27 @@ __vxge_hw_mempool_create(
>
>         /* allocate array of memblocks */
>         mempool->memblocks_arr =
> -               (void **) vmalloc(sizeof(void *) * mempool->memblocks_max);
> +               (void **) vzalloc(sizeof(void *) * mempool->memblocks_max);
>         if (mempool->memblocks_arr == NULL) {
>                 __vxge_hw_mempool_destroy(mempool);
>                 status = VXGE_HW_ERR_OUT_OF_MEMORY;
>                 mempool = NULL;
>                 goto exit;
>         }
> -       memset(mempool->memblocks_arr, 0,
> -               sizeof(void *) * mempool->memblocks_max);
>
>         /* allocate array of private parts of items per memblocks */
>         mempool->memblocks_priv_arr =
> -               (void **) vmalloc(sizeof(void *) * mempool->memblocks_max);
> +               (void **) vzalloc(sizeof(void *) * mempool->memblocks_max);
>         if (mempool->memblocks_priv_arr == NULL) {
>                 __vxge_hw_mempool_destroy(mempool);
>                 status = VXGE_HW_ERR_OUT_OF_MEMORY;
>                 mempool = NULL;
>                 goto exit;
>         }
> -       memset(mempool->memblocks_priv_arr, 0,
> -                   sizeof(void *) * mempool->memblocks_max);
>
>         /* allocate array of memblocks DMA objects */
>         mempool->memblocks_dma_arr = (struct vxge_hw_mempool_dma *)
> -               vmalloc(sizeof(struct vxge_hw_mempool_dma) *
> +               vzalloc(sizeof(struct vxge_hw_mempool_dma) *
>                         mempool->memblocks_max);
>
>         if (mempool->memblocks_dma_arr == NULL) {
> @@ -2203,20 +2194,16 @@ __vxge_hw_mempool_create(
>                 mempool = NULL;
>                 goto exit;
>         }
> -       memset(mempool->memblocks_dma_arr, 0,
> -                       sizeof(struct vxge_hw_mempool_dma) *
> -                       mempool->memblocks_max);
>
>         /* allocate hash array of items */
>         mempool->items_arr =
> -               (void **) vmalloc(sizeof(void *) * mempool->items_max);
> +               (void **) vzalloc(sizeof(void *) * mempool->items_max);
>         if (mempool->items_arr == NULL) {
>                 __vxge_hw_mempool_destroy(mempool);
>                 status = VXGE_HW_ERR_OUT_OF_MEMORY;
>                 mempool = NULL;
>                 goto exit;
>         }
> -       memset(mempool->items_arr, 0, sizeof(void *) * mempool->items_max);
>
>         /* calculate initial number of memblocks */
>         memblocks_to_allocate = (mempool->items_initial +
> @@ -4272,14 +4259,12 @@ vxge_hw_vpath_open(struct __vxge_hw_device *hldev,
>                 goto vpath_open_exit1;
>
>         vp = (struct __vxge_hw_vpath_handle *)
> -               vmalloc(sizeof(struct __vxge_hw_vpath_handle));
> +               vzalloc(sizeof(struct __vxge_hw_vpath_handle));
>         if (vp == NULL) {
>                 status = VXGE_HW_ERR_OUT_OF_MEMORY;
>                 goto vpath_open_exit2;
>         }
>
> -       memset(vp, 0, sizeof(struct __vxge_hw_vpath_handle));
> -
>         vp->vpath = vpath;
>
>         if (vpath->vp_config->fifo.enable == VXGE_HW_FIFO_ENABLE) {
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

The information and any attached documents contained in this message
may be confidential and/or legally privileged.  The message is
intended solely for the addressee(s).  If you are not the intended
recipient, you are hereby notified that any use, dissemination, or
reproduction is strictly prohibited and may be unlawful.  If you are
not the intended recipient, please contact the sender immediately by
return e-mail and destroy all copies of the original message.

^ permalink raw reply

* Re: iwl3945: regression - unregister_netdevice: waiting for wlan0 to become free. Usage count = 1
From: David Miller @ 2010-11-22 15:36 UTC (permalink / raw)
  To: eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w
  Cc: mhocko-AlSwsSmVLrQ, linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	reinette.chatre-ral2JQCrhuEAvxtiuMwx3w,
	wey-yi.w.guy-ral2JQCrhuEAvxtiuMwx3w, ilw-VuQAYsv1563Yd54FQh9/CA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1290424776.2811.32.camel@edumazet-laptop>

From: Eric Dumazet <eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date: Mon, 22 Nov 2010 12:19:36 +0100

> Oh well, it seems David put the fix in net-next-2.6 instead of net-2.6
> 
> Please try :
> 
> http://git.kernel.org/?p=linux/kernel/git/davem/net-next-2.6.git;a=commitdiff;h=9d82ca98f71fd686ef2f3017c5e3e6a4871b6e46

My bad, I'll toss this into net-2.6

Thanks for catching this Eric.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 56/62] iwlwifi: Use static const
From: Guy, Wey-Yi @ 2010-11-22 15:37 UTC (permalink / raw)
  To: Joe Perches
  Cc: Stefano Brivio, Chatre, Reinette, Intel Linux Wireless,
	John W. Linville,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <2773a20a26f4e326f0849e8ae8fb4f347d6a6ecb.1290305776.git.joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>

Hi Joe,

On Sat, 2010-11-20 at 18:38 -0800, Joe Perches wrote:
> Using static const generally increases object text and decreases data size.
> It also generally decreases overall object size.
> 
>    text	   data	    bss	    dec	    hex	filename
>   48644	     57	  12120	  60821	   ed95	drivers/net/wireless/b43/phy_n.o.new
>   48661	     57	  12120	  60838	   eda6	drivers/net/wireless/b43/phy_n.o.old
>   37906	     86	   7904	  45896	   b348	drivers/net/wireless/iwlwifi/iwl-agn-lib.o.new
>   37937	     86	   7904	  45927	   b367	drivers/net/wireless/iwlwifi/iwl-agn-lib.o.old
>   37781	    523	   6752	  45056	   b000	drivers/net/wireless/iwlwifi/iwl-3945.o.new
>   37781	    523	   6752	  45056	   b000	drivers/net/wireless/iwlwifi/iwl-3945.o.old
> 
> Changed b43_nphy_write_clip_detection to take a const u16 *
> 
> Signed-off-by: Joe Perches <joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>
> ---

I don't see size difference on 3945, otherwise the patch looks ok to me

Wey



--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 00/00] Remove deprecated items from Makefiles
From: David Miller @ 2010-11-22 16:17 UTC (permalink / raw)
  To: tdent48227
  Cc: marcel, padovan, linux-bluetooth, netdev, sjur.brandeland,
	socketcan, urs.thuermann, socketcan-core, sage, ceph-devel,
	wang840925, jlayton, kaber, pekkas, linux-kernel, netfilter-devel,
	netfilter, samuel
In-Reply-To: <1290387808-2239-1-git-send-email-tdent48227@gmail.com>

From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 20:03:11 -0500

> I changed Makefiles to use <modules>-y instead of <modules>-objs because -objs
> is deprecated and not even mentioned in Documentation/kbuild/makefiles.txt.
> 
> Also, remove some if-conditional statments because I used the ccflags-$ flag 
> instead of EXTRA_CFLAGS because EXTRA_CFLAGS.

All applied, thanks Tracey.

^ permalink raw reply

* Re: linux-next: Tree for November 18 (netfilter)
From: Randy Dunlap @ 2010-11-22 16:19 UTC (permalink / raw)
  To: KOVACS Krisztian
  Cc: Patrick McHardy, Stephen Rothwell, netfilter-devel, linux-next,
	LKML, netdev, Balazs Scheidler
In-Reply-To: <1290428929.726241.1.camel@nienna.balabit>

On 11/22/10 04:28, KOVACS Krisztian wrote:
> Hi,
> 
> On Mon, 2010-11-22 at 13:14 +0100, KOVACS Krisztian wrote:
>> Indeed, we were missing quite a few of those ifdefs... The patch below
>> seems to fix the issue for me.
>>
>> commit ec0ac6f3e7749e25f481c1e0f75766974820fe84
>> Author: KOVACS Krisztian <hidden@balabit.hu>
>> Date:   Mon Nov 22 13:07:15 2010 +0100
> 
> Bah, it seems the patch got line-wrapped by my MUA, here it is again.
> Let's hope I got it right this time...
> 
> commit ec0ac6f3e7749e25f481c1e0f75766974820fe84
> Author: KOVACS Krisztian <hidden@balabit.hu>
> Date:   Mon Nov 22 13:07:15 2010 +0100
> 
>     netfilter: fix compilation when conntrack is disabled but tproxy is enabled
>     
>     The IPv6 tproxy patches split IPv6 defragmentation off of conntrack, but
>     failed to update the #ifdef stanzas guarding the defragmentation related
>     fields and code in skbuff and conntrack related code in nf_defrag_ipv6.c.
>     
>     This patch adds the required #ifdefs so that IPv6 tproxy can truly be used
>     without connection tracking.
>     
>     Original report:
>     http://marc.info/?l=linux-netdev&m=129010118516341&w=2
>     
>     Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
>     Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>

That builds.  Thanks.

Acked-by: Randy Dunlap <randy.dunlap@oracle.com>


> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index e6ba898..4f2db79 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -255,6 +255,11 @@ typedef unsigned int sk_buff_data_t;
>  typedef unsigned char *sk_buff_data_t;
>  #endif
>  
> +#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \
> +    defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
> +#define NET_SKBUFF_NF_DEFRAG_NEEDED 1
> +#endif
> +
>  /** 
>   *	struct sk_buff - socket buffer
>   *	@next: Next buffer in list
> @@ -362,6 +367,8 @@ struct sk_buff {
>  	void			(*destructor)(struct sk_buff *skb);
>  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
>  	struct nf_conntrack	*nfct;
> +#endif
> +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
>  	struct sk_buff		*nfct_reasm;
>  #endif
>  #ifdef CONFIG_BRIDGE_NETFILTER
> @@ -2051,6 +2058,8 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
>  	if (nfct)
>  		atomic_inc(&nfct->use);
>  }
> +#endif
> +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
>  static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
>  {
>  	if (skb)
> @@ -2079,6 +2088,8 @@ static inline void nf_reset(struct sk_buff *skb)
>  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
>  	nf_conntrack_put(skb->nfct);
>  	skb->nfct = NULL;
> +#endif
> +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
>  	nf_conntrack_put_reasm(skb->nfct_reasm);
>  	skb->nfct_reasm = NULL;
>  #endif
> @@ -2095,6 +2106,8 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
>  	dst->nfct = src->nfct;
>  	nf_conntrack_get(src->nfct);
>  	dst->nfctinfo = src->nfctinfo;
> +#endif
> +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
>  	dst->nfct_reasm = src->nfct_reasm;
>  	nf_conntrack_get_reasm(src->nfct_reasm);
>  #endif
> @@ -2108,6 +2121,8 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
>  {
>  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
>  	nf_conntrack_put(dst->nfct);
> +#endif
> +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
>  	nf_conntrack_put_reasm(dst->nfct_reasm);
>  #endif
>  #ifdef CONFIG_BRIDGE_NETFILTER
> diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
> index 1ee717e..a4c9936 100644
> --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
> +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
> @@ -7,16 +7,6 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
>  extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
>  extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
>  
> -extern int nf_ct_frag6_init(void);
> -extern void nf_ct_frag6_cleanup(void);
> -extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
> -extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
> -			       struct net_device *in,
> -			       struct net_device *out,
> -			       int (*okfn)(struct sk_buff *));
> -
> -struct inet_frags_ctl;
> -
>  #include <linux/sysctl.h>
>  extern struct ctl_table nf_ct_ipv6_sysctl_table[];
>  
> diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
> index 94dd54d..fd79c9a 100644
> --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
> +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
> @@ -3,4 +3,14 @@
>  
>  extern void nf_defrag_ipv6_enable(void);
>  
> +extern int nf_ct_frag6_init(void);
> +extern void nf_ct_frag6_cleanup(void);
> +extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
> +extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
> +			       struct net_device *in,
> +			       struct net_device *out,
> +			       int (*okfn)(struct sk_buff *));
> +
> +struct inet_frags_ctl;
> +
>  #endif /* _NF_DEFRAG_IPV6_H */
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 104f844..74ebf4b 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -380,6 +380,8 @@ static void skb_release_head_state(struct sk_buff *skb)
>  	}
>  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
>  	nf_conntrack_put(skb->nfct);
> +#endif
> +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
>  	nf_conntrack_put_reasm(skb->nfct_reasm);
>  #endif
>  #ifdef CONFIG_BRIDGE_NETFILTER
> diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
> index 99abfb5..97c5b21 100644
> --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
> +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
> @@ -19,13 +19,15 @@
>  
>  #include <linux/netfilter_ipv6.h>
>  #include <linux/netfilter_bridge.h>
> +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
>  #include <net/netfilter/nf_conntrack.h>
>  #include <net/netfilter/nf_conntrack_helper.h>
>  #include <net/netfilter/nf_conntrack_l4proto.h>
>  #include <net/netfilter/nf_conntrack_l3proto.h>
>  #include <net/netfilter/nf_conntrack_core.h>
> -#include <net/netfilter/nf_conntrack_zones.h>
>  #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
> +#endif
> +#include <net/netfilter/nf_conntrack_zones.h>
>  #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
>  
>  static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
> @@ -33,8 +35,10 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
>  {
>  	u16 zone = NF_CT_DEFAULT_ZONE;
>  
> +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
>  	if (skb->nfct)
>  		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
> +#endif
>  
>  #ifdef CONFIG_BRIDGE_NETFILTER
>  	if (skb->nf_bridge &&
> @@ -56,9 +60,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
>  {
>  	struct sk_buff *reasm;
>  
> +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
>  	/* Previously seen (loopback)?	*/
>  	if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
>  		return NF_ACCEPT;
> +#endif
>  
>  	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
>  	/* queued */
> 
> 


-- 
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***

^ permalink raw reply

* Re: [PATCH] macvlan: Introduce 'passthru' mode to takeover the underlying device
From: David Miller @ 2010-11-22 16:24 UTC (permalink / raw)
  To: sri; +Cc: arnd, kaber, shemminger, mst, netdev, kvm
In-Reply-To: <1288307450.30131.82.camel@sridhar.beaverton.ibm.com>

From: Sridhar Samudrala <sri@us.ibm.com>
Date: Thu, 28 Oct 2010 16:10:50 -0700

> With the current default 'vepa' mode, a KVM guest using virtio with 
> macvtap backend has the following limitations.
> - cannot change/add a mac address on the guest virtio-net
> - cannot create a vlan device on the guest virtio-net
> - cannot enable promiscuous mode on guest virtio-net
> 
> To address these limitations, this patch introduces a new mode called
> 'passthru' when creating a macvlan device which allows takeover of the
> underlying device and passing it to a guest using virtio with macvtap
> backend.
> 
> Only one macvlan device is allowed in passthru mode and it inherits
> the mac address from the underlying device and sets it in promiscuous 
> mode to receive and forward all the packets.
> 
> Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>

Applied, thanks Sridhar.

^ permalink raw reply

* Re: [PATCH] qlge: Fix incorrect usage of module parameters and netdev msg level
From: David Miller @ 2010-11-22 16:29 UTC (permalink / raw)
  To: sonnyrao; +Cc: netdev, miltonm, ron.mercer, linux-driver, linux-kernel
In-Reply-To: <1290075903-3038-1-git-send-email-sonnyrao@linux.vnet.ibm.com>

From: Sonny Rao <sonnyrao@linux.vnet.ibm.com>
Date: Thu, 18 Nov 2010 04:25:03 -0600

> Driver appears to be mistaking the permission field with default value
> in the case of debug and qlge_irq_type.
> 
> Driver is also passing debug as a bitmask into netif_msg_init()
> which really wants a number of bits, so fix that.
> 
> Signed-off-by: Milton Miller <miltonm@bga.com>
> Signed-off-by: Sonny Rao <sonnyrao@linux.vnet.ibm.com>

Applied, thanks Sonny.

^ permalink raw reply

* Re: [PATCH 0/2] phylib: Cleanup marvell.c and add 88E1149R support.
From: David Miller @ 2010-11-22 16:34 UTC (permalink / raw)
  To: ddaney-M3mlKVOIwJVv6pq1l3V1OdBPR1lH4CV8
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, cyril-l0cyMroinI0,
	arnaud.patard-dQbF7i+pzddAfugRpC6u6w
In-Reply-To: <1290203933-28251-1-git-send-email-ddaney-M3mlKVOIwJVv6pq1l3V1OdBPR1lH4CV8@public.gmane.org>

From: David Daney <ddaney-M3mlKVOIwJVv6pq1l3V1OdBPR1lH4CV8@public.gmane.org>
Date: Fri, 19 Nov 2010 13:58:51 -0800

> This is the second iteration of this patch.  I have split out the
> device tree support from the first version to a different patch set.
> The 88E1149R support is useful 'stand alone', so if it is acceptable,
> it can be merged first.
> 
> The first patch is a small cleanup suggested by Cyril Chemparathy, the
> second one adds basic 88E1149R support.

All applied to net-2.6, thanks.

^ permalink raw reply

* Re: [PATCH v2] of/phylib: Use device tree properties to initialize Marvell PHYs.
From: David Miller @ 2010-11-22 16:35 UTC (permalink / raw)
  To: grant.likely-s3s/WqlpOiPyB63q8FvJNQ
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ,
	ddaney-M3mlKVOIwJVv6pq1l3V1OdBPR1lH4CV8,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, cyril-l0cyMroinI0,
	arnaud.patard-dQbF7i+pzddAfugRpC6u6w
In-Reply-To: <20101120042848.GB7005-MrY2KI0G/OVr83L8+7iqerDks+cytr/Z@public.gmane.org>

From: Grant Likely <grant.likely-s3s/WqlpOiPyB63q8FvJNQ@public.gmane.org>
Date: Fri, 19 Nov 2010 21:28:49 -0700

> On Fri, Nov 19, 2010 at 02:13:18PM -0800, David Daney wrote:
>> Some aspects of PHY initialization are board dependent, things like
>> indicator LED connections and some clocking modes cannot be determined
>> by probing.  The dev_flags element of struct phy_device can be used to
>> control these things if an appropriate value can be passed from the
>> Ethernet driver.  We run into problems however if the PHY connections
>> are specified by the device tree.  There is no way for the Ethernet
>> driver to know what flags it should pass.
>> 
>> If we are using the device tree, the struct phy_device will be
>> populated with the device tree node corresponding to the PHY, and we
>> can extract extra configuration information from there.
>> 
>> The next question is what should the format of that information be?
>> It is highly device specific, and the device tree representation
>> should not be tied to any arbitrary kernel defined constants.  A
>> straight forward representation is just to specify the exact bits that
>> should be set using the "marvell,reg-init" property:
>> 
>>       phy5: ethernet-phy@5 {
>>         reg = <5>;
>>         compatible = "marvell,88e1149r";
>>         marvell,reg-init =
>>                 /* led[0]:1000, led[1]:100, led[2]:10, led[3]:tx */
>>                 <3 0x10 0 0x5777>, /* Reg 3,16 <- 0x5777 */
>>                 /* mix %:0, led[0123]:drive low off hiZ */
>>                 <3 0x11 0 0x00aa>, /* Reg 3,17 <- 0x00aa */
>>                 /* default blink periods. */
>>                 <3 0x12 0 0x4105>, /* Reg 3,18 <- 0x4105 */
>>                 /* led[4]:rx, led[5]:dplx, led[45]:drive low off hiZ */
>>                 <3 0x13 0 0x0a60>; /* Reg 3,19 <- 0x0a60 */
>>       };
>> 
>>       phy6: ethernet-phy@6 {
>>         reg = <6>;
>>         compatible = "marvell,88e1118";
>>         marvell,reg-init =
>>                 /* Fix rx and tx clock transition timing */
>>                 <2 0x15 0xffcf 0>, /* Reg 2,21 Clear bits 4, 5 */
>>                 /* Adjust LED drive. */
>>                 <3 0x11 0 0x442a>, /* Reg 3,17 <- 0442a */
>>                 /* irq, blink-activity, blink-link */
>>                 <3 0x10 0 0x0242>; /* Reg 3,16 <- 0x0242 */
>>       };
>> 
>> The Marvell PHYs have a page select register at register 22 (0x16), we
>> can specify any register by its page and register number.  These are
>> the first and second word.  The third word contains a mask to be ANDed
>> with the existing register value, and the fourth word is ORed with the
>> result to yield the new register value.  The new marvell_of_reg_init
>> function leaves the page select register unchanged, so a call to it
>> can be dropped into the .config_init functions without unduly
>> affecting the state of the PHY.
>> 
>> If CONFIG_OF_MDIO is not set, there is no of_node, or no
>> "marvell,reg-init" property, the PHY initialization is unchanged.
>> 
>> Signed-off-by: David Daney <ddaney-M3mlKVOIwJVv6pq1l3V1OdBPR1lH4CV8@public.gmane.org>
>> Cc: Grant Likely <grant.likely-s3s/WqlpOiPyB63q8FvJNQ@public.gmane.org>
>> Cc: Cyril Chemparathy <cyril-l0cyMroinI0@public.gmane.org>
>> Cc: David Daney <ddaney-M3mlKVOIwJVv6pq1l3V1OdBPR1lH4CV8@public.gmane.org>
>> Cc: Arnaud Patard <arnaud.patard-dQbF7i+pzddAfugRpC6u6w@public.gmane.org>
>> Cc: Benjamin Herrenschmidt <benh-XVmvHMARGAS8U2dJNN8I7kB+6BGkLq7r@public.gmane.org>
> 
> Untested/compiled, but looks good to me.
> 
> Reviewed-by: Grant Likely <grant.likely-s3s/WqlpOiPyB63q8FvJNQ@public.gmane.org>

Also applied, thanks everyone.

^ permalink raw reply

* Re: [PATCH 14/62] cxgb4vf: Use static const
From: Casey Leedom @ 2010-11-22 17:42 UTC (permalink / raw)
  To: Joe Perches; +Cc: netdev, linux-kernel
In-Reply-To: <d0a433aa6bbe20dfe4fd4f3ca9aec50f97601403.1290305776.git.joe@perches.com>

| From: Joe Perches <joe@perches.com>
| Date: Saturday, November 20, 2010 06:38 pm
| 
| Using static const generally increases object text and decreases data size.
| It also generally decreases overall object size.
| 
|    text	   data	    bss	    dec	    hex	filename
|   10179	     56	   2216	  12451	   30a3	drivers/net/cxgb4vf/t4vf_hw.o.new
|   10179	     56	   2216	  12451	   30a3	drivers/net/cxgb4vf/t4vf_hw.o.old
| 
| Signed-off-by: Joe Perches <joe@perches.com>
| ---
|  drivers/net/cxgb4vf/t4vf_hw.c |    2 +-
|  1 files changed, 1 insertions(+), 1 deletions(-)
| 
| diff --git a/drivers/net/cxgb4vf/t4vf_hw.c b/drivers/net/cxgb4vf/t4vf_hw.c
| index f7d7f97..daedf6e 100644
| --- a/drivers/net/cxgb4vf/t4vf_hw.c
| +++ b/drivers/net/cxgb4vf/t4vf_hw.c
| @@ -116,7 +116,7 @@ static void dump_mbox(struct adapter *adapter, const
| char *tag, u32 mbox_data) int t4vf_wr_mbox_core(struct adapter *adapter,
| const void *cmd, int size, void *rpl, bool sleep_ok)
|  {
| -	static int delay[] = {
| +	static const int delay[] = {
|  		1, 1, 3, 5, 10, 10, 20, 50, 100
|  	};

  Looks okay to me.  Thanks!

Casey

^ permalink raw reply

* [PATCH 1/9] AF_UNIX: Add constant for Unix socket options level
From: Alban Crequy @ 2010-11-22 18:36 UTC (permalink / raw)
  To: Alban Crequy
  Cc: David S. Miller, Eric Dumazet, Stephen Hemminger, Cyrill Gorcunov,
	Alexey Dobriyan, Lennart Poettering, Kay Sievers, Ian Molton,
	netdev, linux-kernel, Alban Crequy
In-Reply-To: <20101122183447.124afce5@chocolatine.cbg.collabora.co.uk>

Assign the next free socket options level to be used by the Unix
protocol and address family.

Signed-off-by: Alban Crequy <alban.crequy@collabora.co.uk>
---
 include/linux/socket.h |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 86b652f..7c5a4da 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -307,6 +307,7 @@ struct ucred {
 #define SOL_RDS		276
 #define SOL_IUCV	277
 #define SOL_CAIF	278
+#define SOL_UNIX	279
 
 /* IPX options */
 #define IPX_TYPE	1
-- 
1.7.1

^ permalink raw reply related

* [PATCH 2/9] AF_UNIX: add setsockopt on Unix sockets
From: Alban Crequy @ 2010-11-22 18:36 UTC (permalink / raw)
  To: Alban Crequy
  Cc: David S. Miller, Eric Dumazet, Stephen Hemminger, Cyrill Gorcunov,
	Alexey Dobriyan, Lennart Poettering, Kay Sievers, Ian Molton,
	netdev, linux-kernel, Alban Crequy
In-Reply-To: <20101122183447.124afce5@chocolatine.cbg.collabora.co.uk>

Signed-off-by: Alban Crequy <alban.crequy@collabora.co.uk>
---
 net/unix/af_unix.c |   15 ++++++++++++---
 1 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7ff31c6..6eca106 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -512,6 +512,8 @@ static unsigned int unix_dgram_poll(struct file *, struct socket *,
 				    poll_table *);
 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 static int unix_shutdown(struct socket *, int);
+static int unix_setsockopt(struct socket *, int, int,
+			   char __user *, unsigned int);
 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
 			       struct msghdr *, size_t);
 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
@@ -538,7 +540,7 @@ static const struct proto_ops unix_stream_ops = {
 	.ioctl =	unix_ioctl,
 	.listen =	unix_listen,
 	.shutdown =	unix_shutdown,
-	.setsockopt =	sock_no_setsockopt,
+	.setsockopt =	unix_setsockopt,
 	.getsockopt =	sock_no_getsockopt,
 	.sendmsg =	unix_stream_sendmsg,
 	.recvmsg =	unix_stream_recvmsg,
@@ -559,7 +561,7 @@ static const struct proto_ops unix_dgram_ops = {
 	.ioctl =	unix_ioctl,
 	.listen =	sock_no_listen,
 	.shutdown =	unix_shutdown,
-	.setsockopt =	sock_no_setsockopt,
+	.setsockopt =	unix_setsockopt,
 	.getsockopt =	sock_no_getsockopt,
 	.sendmsg =	unix_dgram_sendmsg,
 	.recvmsg =	unix_dgram_recvmsg,
@@ -580,7 +582,7 @@ static const struct proto_ops unix_seqpacket_ops = {
 	.ioctl =	unix_ioctl,
 	.listen =	unix_listen,
 	.shutdown =	unix_shutdown,
-	.setsockopt =	sock_no_setsockopt,
+	.setsockopt =	unix_setsockopt,
 	.getsockopt =	sock_no_getsockopt,
 	.sendmsg =	unix_seqpacket_sendmsg,
 	.recvmsg =	unix_dgram_recvmsg,
@@ -1533,6 +1535,13 @@ out:
 }
 
 
+static int unix_setsockopt(struct socket *sock, int level, int optname,
+			   char __user *optval, unsigned int optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+
 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			       struct msghdr *msg, size_t len)
 {
-- 
1.7.1

^ permalink raw reply related

* [PATCH 3/9] AF_UNIX: create, join and leave multicast groups with setsockopt
From: Alban Crequy @ 2010-11-22 18:36 UTC (permalink / raw)
  To: Alban Crequy
  Cc: David S. Miller, Eric Dumazet, Stephen Hemminger, Cyrill Gorcunov,
	Alexey Dobriyan, Lennart Poettering, Kay Sievers, Ian Molton,
	netdev, linux-kernel, Alban Crequy
In-Reply-To: <20101122183447.124afce5@chocolatine.cbg.collabora.co.uk>

Multicast is implemented on SOCK_DGRAM and SOCK_SEQPACKET Unix sockets.

An userspace application can create a multicast group with:
  struct unix_mreq mreq;
  mreq.address.sun_family = AF_UNIX;
  mreq.address.sun_path[0] = '\0';
  strcpy(mreq.address.sun_path + 1, "socket-address");
  mreq.flags = 0;

  sockfd = socket(AF_UNIX, SOCK_DGRAM, 0);
  ret = setsockopt(sockfd, SOL_UNIX, UNIX_CREATE_GROUP, &mreq, sizeof(mreq));

Then a multicast group can be joined and left with:
  ret = setsockopt(sockfd, SOL_UNIX, UNIX_JOIN_GROUP, &mreq, sizeof(mreq));
  ret = setsockopt(sockfd, SOL_UNIX, UNIX_LEAVE_GROUP, &mreq, sizeof(mreq));

A socket can be a member of several multicast group.

Signed-off-by: Alban Crequy <alban.crequy@collabora.co.uk>
---
 include/net/af_unix.h |   31 +++++++
 net/unix/af_unix.c    |  217 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 247 insertions(+), 1 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 90c9e28..bf114d5 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -40,6 +40,18 @@ struct unix_skb_parms {
 				spin_lock_nested(&unix_sk(s)->lock, \
 				SINGLE_DEPTH_NESTING)
 
+#define UNIX_MREQ_LOOPBACK	0x01
+struct unix_mreq
+{
+	struct sockaddr_un	address;
+	unsigned int		flags;
+};
+
+/* UNIX socket options */
+#define UNIX_CREATE_GROUP	1
+#define UNIX_JOIN_GROUP		2
+#define UNIX_LEAVE_GROUP	3
+
 #ifdef __KERNEL__
 /* The AF_UNIX socket */
 struct unix_sock {
@@ -56,8 +68,27 @@ struct unix_sock {
 	spinlock_t		lock;
 	unsigned int		gc_candidate : 1;
 	unsigned int		gc_maybe_cycle : 1;
+	unsigned int		is_mcast_addr : 1;
+
+	/* These multicast fields are protected by the global spinlock
+	 * unix_multicast_lock */
+	struct hlist_head	mcast_subscriptions;
+	struct hlist_head	mcast_members;
+	int			mcast_subscriptions_cnt;
+	int			mcast_members_cnt;
+
 	struct socket_wq	peer_wq;
 };
+
+struct unix_mcast
+{
+	struct unix_sock	*member;
+	struct unix_sock	*addr;
+	unsigned int		flags;
+	struct hlist_node	subscription_node;
+	struct hlist_node	member_node;
+};
+
 #define unix_sk(__sk) ((struct unix_sock *)__sk)
 
 #define peer_wait peer_wq.wait
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 6eca106..2278829 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -379,6 +379,9 @@ static int unix_release_sock(struct sock *sk, int embrion)
 	struct sock *skpair;
 	struct sk_buff *skb;
 	int state;
+	struct unix_mcast *node;
+	struct hlist_node *pos;
+	struct hlist_node *pos_tmp;
 
 	unix_remove_socket(sk);
 
@@ -392,6 +395,24 @@ static int unix_release_sock(struct sock *sk, int embrion)
 	u->mnt	     = NULL;
 	state = sk->sk_state;
 	sk->sk_state = TCP_CLOSE;
+	spin_lock(&unix_multicast_lock);
+	hlist_for_each_entry_safe(node, pos, pos_tmp, &u->mcast_subscriptions,
+				  subscription_node) {
+		hlist_del(&node->member_node);
+		hlist_del(&node->subscription_node);
+		node->addr->mcast_members_cnt--;
+		node->member->mcast_subscriptions_cnt--;
+		kfree(node);
+	}
+	hlist_for_each_entry_safe(node, pos, pos_tmp, &u->mcast_members,
+				  member_node) {
+		hlist_del(&node->member_node);
+		hlist_del(&node->subscription_node);
+		node->addr->mcast_members_cnt--;
+		node->member->mcast_subscriptions_cnt--;
+		kfree(node);
+	}
+	spin_unlock(&unix_multicast_lock);
 	unix_state_unlock(sk);
 
 	wake_up_interruptible_all(&u->peer_wait);
@@ -631,6 +652,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
 	atomic_long_set(&u->inflight, 0);
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->readlock); /* single task reading lock */
+	INIT_HLIST_HEAD(&u->mcast_subscriptions);
+	INIT_HLIST_HEAD(&u->mcast_members);
 	init_waitqueue_head(&u->peer_wait);
 	unix_insert_socket(unix_sockets_unbound, sk);
 out:
@@ -1535,10 +1558,202 @@ out:
 }
 
 
+static int unix_mc_create(struct socket *sock, struct unix_mreq *mreq)
+{
+	struct sock *other;
+	int err;
+	unsigned hash;
+	int namelen;
+
+	if (mreq->address.sun_family != AF_UNIX ||
+	    mreq->address.sun_path[0] != '\0')
+		return -EINVAL;
+
+	err = unix_mkname(&mreq->address, sizeof(struct sockaddr_un), &hash);
+	if (err < 0)
+		return err;
+
+	namelen = err;
+	other = unix_find_other(sock_net(sock->sk), &mreq->address, namelen,
+				sock->type, hash, &err);
+	if (other)
+		return -EADDRINUSE;
+
+	err = sock->ops->bind(sock,
+		(struct sockaddr*)&mreq->address,
+		sizeof(struct sockaddr_un));
+	if (err < 0)
+		return err;
+
+	unix_state_lock(sock->sk);
+	unix_sk(sock->sk)->is_mcast_addr = 1;
+	unix_state_unlock(sock->sk);
+
+	return 0;
+}
+
+
+static int unix_mc_join(struct socket *sock, struct unix_mreq *mreq)
+{
+	struct unix_sock *u = unix_sk(sock->sk);
+	struct sock *other;
+	struct unix_sock *otheru;
+	struct unix_mcast *node;
+	int err;
+	unsigned hash;
+	int namelen;
+
+	if (mreq->address.sun_family != AF_UNIX ||
+	    mreq->address.sun_path[0] != '\0')
+		return -EINVAL;
+
+	err = unix_autobind(sock);
+	if (err < 0)
+		return err;
+
+	err = unix_mkname(&mreq->address, sizeof(struct sockaddr_un), &hash);
+	if (err < 0)
+		return err;
+
+	namelen = err;
+	other = unix_find_other(sock_net(sock->sk), &mreq->address, namelen,
+				sock->type, hash, &err);
+	if (!other)
+		return -EINVAL;
+
+	if (other && !unix_sk(other)->is_mcast_addr) {
+		err = -EADDRINUSE;
+		goto sock_put_out;
+	}
+
+	otheru = unix_sk(other);
+
+	node = kmalloc(sizeof(struct unix_mcast), GFP_KERNEL);
+	if (!node) {
+		err = -ENOMEM;
+		goto sock_put_out;
+	}
+	node->member = u;
+	node->addr = otheru;
+	node->flags = mreq->flags;
+
+	spin_lock(&unix_multicast_lock);
+	hlist_add_head(&node->member_node, &otheru->mcast_members);
+	hlist_add_head(&node->subscription_node, &u->mcast_subscriptions);
+	otheru->mcast_members_cnt++;
+	u->mcast_subscriptions_cnt++;
+	spin_unlock(&unix_multicast_lock);
+
+	return 0;
+
+sock_put_out:
+	sock_put(other);
+	return err;
+}
+
+
+static int unix_mc_leave(struct socket *sock, struct unix_mreq *mreq)
+{
+	struct unix_sock *u = unix_sk(sock->sk);
+	struct sock *other;
+	struct unix_sock *otheru;
+	struct unix_mcast *node;
+	struct hlist_node *pos;
+	int err;
+	unsigned hash;
+	int namelen;
+
+	if (mreq->address.sun_family != AF_UNIX ||
+	    mreq->address.sun_path[0] != '\0')
+		return -EINVAL;
+
+	err = unix_mkname(&mreq->address, sizeof(struct sockaddr_un), &hash);
+	if (err < 0)
+		return err;
+
+	namelen = err;
+	other = unix_find_other(sock_net(sock->sk), &mreq->address, namelen,
+				sock->type, hash, &err);
+	if (!other)
+		return -EINVAL;
+
+	otheru = unix_sk(other);
+
+	if (!otheru->is_mcast_addr) {
+		err = -EINVAL;
+		goto sock_put_out;
+	}
+
+	spin_lock(&unix_multicast_lock);
+
+	hlist_for_each_entry(node, pos, &u->mcast_subscriptions,
+			     subscription_node) {
+		if (node->addr == otheru)
+			break;
+	}
+
+	if (!pos) {
+		spin_unlock(&unix_multicast_lock);
+		err = -EINVAL;
+		goto sock_put_out;
+	}
+
+	hlist_del(&node->member_node);
+	hlist_del(&node->subscription_node);
+	otheru->mcast_members_cnt--;
+	u->mcast_subscriptions_cnt--;
+	spin_unlock(&unix_multicast_lock);
+	kfree(node);
+	err = 0;
+
+sock_put_out:
+	sock_put(other);
+	return err;
+}
+
+
 static int unix_setsockopt(struct socket *sock, int level, int optname,
 			   char __user *optval, unsigned int optlen)
 {
-	return -EOPNOTSUPP;
+	struct unix_mreq mreq;
+	int err = 0;
+
+	if (level != SOL_UNIX)
+		return -ENOPROTOOPT;
+
+	switch (optname) {
+	case UNIX_CREATE_GROUP:
+	case UNIX_JOIN_GROUP:
+	case UNIX_LEAVE_GROUP:
+		if (optlen < sizeof(struct unix_mreq))
+			return -EINVAL;
+		if (copy_from_user(&mreq, optval, sizeof(struct unix_mreq)))
+			return -EFAULT;
+		break;
+
+	default:
+		break;
+	}
+
+	switch (optname) {
+	case UNIX_CREATE_GROUP:
+		err = unix_mc_create(sock, &mreq);
+		break;
+
+	case UNIX_JOIN_GROUP:
+		err = unix_mc_join(sock, &mreq);
+		break;
+
+	case UNIX_LEAVE_GROUP:
+		err = unix_mc_leave(sock, &mreq);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
 }
 
 
-- 
1.7.1

^ permalink raw reply related

* [PATCH 5/9] AF_UNIX: Deliver message to several recipients in case of multicast
From: Alban Crequy @ 2010-11-22 18:36 UTC (permalink / raw)
  To: Alban Crequy
  Cc: David S. Miller, Eric Dumazet, Stephen Hemminger, Cyrill Gorcunov,
	Alexey Dobriyan, Lennart Poettering, Kay Sievers, Ian Molton,
	netdev, linux-kernel, Alban Crequy
In-Reply-To: <20101122183447.124afce5@chocolatine.cbg.collabora.co.uk>

unix_dgram_sendmsg() implements the delivery both for SOCK_DGRAM and
SOCK_SEQPACKET Unix sockets.

The delivery is done in an atomic way: either the message is delivered to all
recipients or none, even in case of interruptions or errors.

Signed-off-by: Alban Crequy <alban.crequy@collabora.co.uk>
---
 net/unix/af_unix.c |  247 +++++++++++++++++++++++++++++++++++++++-------------
 1 files changed, 188 insertions(+), 59 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3cc9695..9207393 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1553,16 +1553,17 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 {
 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
-	struct net *net = sock_net(sk);
 	struct unix_sock *u = unix_sk(sk);
 	struct sockaddr_un *sunaddr = msg->msg_name;
-	struct sock *other = NULL;
+	struct sock_set *others_set = NULL;
 	int namelen = 0; /* fake GCC */
 	int err;
 	unsigned hash;
 	struct sk_buff *skb;
+	int i;
 	long timeo;
 	struct scm_cookie tmp_scm;
+	int multicast_delivery = !!u->mcast_subscriptions_cnt;
 
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
@@ -1580,12 +1581,30 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		if (err < 0)
 			goto out;
 		namelen = err;
-	} else {
+	} else if (!multicast_delivery) {
+		struct sock *other;
 		sunaddr = NULL;
 		err = -ENOTCONN;
 		other = unix_peer_get(sk);
 		if (!other)
 			goto out;
+		err = -ENOMEM;
+		others_set = kmalloc(sizeof(struct sock_set)
+				     + sizeof(struct sock_item),
+				     GFP_KERNEL);
+		if (!others_set)
+			goto out;
+		others_set->cnt = 1;
+		sock_hold(other);
+		others_set->items[0].s = other;
+		others_set->items[0].skb = NULL;
+		others_set->items[0].to_deliver = 1;
+	} else {
+		sunaddr = NULL;
+		err = -ENOTCONN;
+		others_set = unix_find_multicast_recipients(sk, NULL, &err);
+		if (!others_set)
+			goto out;
 	}
 
 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
@@ -1613,90 +1632,200 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 
 restart:
-	if (!other) {
+	if (!others_set) {
+		struct sock *other;
+		struct unix_sock *otheru;
 		err = -ECONNRESET;
 		if (sunaddr == NULL)
 			goto out_free;
 
-		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
-					hash, &err);
-		if (other == NULL)
+		other = unix_find_other(sock_net(sk), sunaddr, namelen,
+					sk->sk_type, hash, &err);
+		if (!other)
 			goto out_free;
+		otheru = unix_sk(other);
+
+		if (otheru->is_mcast_addr) {
+			/* FIXME: we should send to the requested recipient
+			 * specified in sendto(...dest_addr) instead of the
+			 * recipient specified by setsockopt... */
+			sock_put(other);
+			others_set = unix_find_multicast_recipients(sk, other,
+								    &err);
+			if (!others_set)
+				goto out_free;
+		} else {
+			others_set = kmalloc(sizeof(struct sock_set)
+					     + sizeof(struct sock_item),
+					     GFP_KERNEL);
+			if (!others_set)
+				goto out_free;
+			others_set->cnt = 1;
+			others_set->items[0].s = other;
+			others_set->items[0].skb = NULL;
+			others_set->items[0].to_deliver = 1;
+		}
 	}
 
-	unix_state_lock(other);
-	err = -EPERM;
-	if (!unix_may_send(sk, other))
-		goto out_unlock;
+	for (i = 0 ; i < others_set->cnt ; i++) {
+		struct sock *cur = others_set->items[i].s;
 
-	if (sock_flag(other, SOCK_DEAD)) {
-		/*
-		 *	Check with 1003.1g - what should
-		 *	datagram error
-		 */
-		unix_state_unlock(other);
-		sock_put(other);
+		others_set->items[i].skb = skb_clone(skb, GFP_KERNEL);
+		if (!others_set->items[i].skb) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+		skb_set_owner_w(others_set->items[i].skb, sk);
+	}
 
-		err = 0;
-		unix_state_lock(sk);
-		if (unix_peer(sk) == other) {
-			unix_peer(sk) = NULL;
-			unix_state_unlock(sk);
+	for (i = 0 ; i < others_set->cnt ; i++) {
+		struct sock *cur = others_set->items[i].s;
 
-			unix_dgram_disconnected(sk, other);
-			sock_put(other);
-			err = -ECONNREFUSED;
-		} else {
-			unix_state_unlock(sk);
+		if (!others_set->items[i].to_deliver)
+			continue;
+
+		unix_state_lock(cur);
+		err = -EPERM;
+		if (!multicast_delivery && !unix_may_send(sk, cur)) {
+			others_set->items[i].to_deliver = 0;
+			unix_state_unlock(cur);
+			kfree_skb(others_set->items[i].skb);
+			if (multicast_delivery)
+				continue;
+			else
+				goto out_free;
 		}
 
-		other = NULL;
-		if (err)
-			goto out_free;
-		goto restart;
+		if (sock_flag(cur, SOCK_DEAD)) {
+			/*
+			 *	Check with 1003.1g - what should
+			 *	datagram error
+			 */
+			unix_state_unlock(cur);
+
+			err = 0;
+			unix_state_lock(sk);
+			if (unix_peer(sk) == cur) {
+				unix_peer(sk) = NULL;
+				unix_state_unlock(sk);
+
+				unix_dgram_disconnected(sk, cur);
+				sock_put(cur);
+				err = -ECONNREFUSED;
+			} else {
+				unix_state_unlock(sk);
+			}
+
+			kfree_skb(others_set->items[i].skb);
+			if (err)
+				goto out_free;
+
+			if (multicast_delivery) {
+				others_set->items[i].to_deliver = 0;
+				continue;
+			} else {
+				kfree_sock_set(others_set);
+				others_set = NULL;
+				goto restart;
+			}
+		}
+
+		err = -EPIPE;
+		if (cur->sk_shutdown & RCV_SHUTDOWN) {
+			unix_state_unlock(cur);
+			kfree_skb(others_set->items[i].skb);
+			if (multicast_delivery) {
+				others_set->items[i].to_deliver = 0;
+				continue;
+			} else {
+				goto out_free;
+			}
+		}
+
+		if (sk->sk_type != SOCK_SEQPACKET) {
+			err = security_unix_may_send(sk->sk_socket,
+						     cur->sk_socket);
+			if (err) {
+				unix_state_unlock(cur);
+				kfree_skb(others_set->items[i].skb);
+				if (multicast_delivery) {
+					others_set->items[i].to_deliver = 0;
+					continue;
+				} else {
+					goto out_free;
+				}
+			}
+		}
+
+		if (unix_peer(cur) != sk && unix_recvq_full(cur)) {
+			kfree_skb(others_set->items[i].skb);
+ 
+			if (multicast_delivery) {
+				unix_state_unlock(cur);
+				others_set->items[i].to_deliver = 0;
+				continue;
+			} else {
+				if (!timeo) {
+					unix_state_unlock(cur);
+					err = -EAGAIN;
+					goto out_free;
+				}
+
+				timeo = unix_wait_for_peer(cur, timeo);
+
+				err = sock_intr_errno(timeo);
+				if (signal_pending(current))
+					goto out_free;
+
+				kfree_sock_set(others_set);
+				others_set = NULL;
+				goto restart;
+			}
+		}
 	}
 
-	err = -EPIPE;
-	if (other->sk_shutdown & RCV_SHUTDOWN)
-		goto out_unlock;
+	for (i = 0 ; i < others_set->cnt ; i++) {
+		struct sock *cur = others_set->items[i].s;
 
-	if (sk->sk_type != SOCK_SEQPACKET) {
-		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
-		if (err)
-			goto out_unlock;
+		if (!others_set->items[i].to_deliver)
+			continue;
+
+		if (sock_flag(cur, SOCK_RCVTSTAMP))
+			__net_timestamp(others_set->items[i].skb);
+
+		skb_queue_tail(&cur->sk_receive_queue,
+			       others_set->items[i].skb);
 	}
 
-	if (unix_peer(other) != sk && unix_recvq_full(other)) {
-		if (!timeo) {
-			err = -EAGAIN;
-			goto out_unlock;
-		}
+	for (i = 0 ; i < others_set->cnt ; i++) {
+		struct sock *cur = others_set->items[i].s;
 
-		timeo = unix_wait_for_peer(other, timeo);
+		if (!others_set->items[i].to_deliver)
+			continue;
 
-		err = sock_intr_errno(timeo);
-		if (signal_pending(current))
-			goto out_free;
+		unix_state_unlock(cur);
+	}
 
-		goto restart;
+	for (i = 0 ; i < others_set->cnt ; i++) {
+		struct sock *cur = others_set->items[i].s;
+
+		if (!others_set->items[i].to_deliver)
+			continue;
+
+		cur->sk_data_ready(cur, len);
 	}
 
-	if (sock_flag(other, SOCK_RCVTSTAMP))
-		__net_timestamp(skb);
-	skb_queue_tail(&other->sk_receive_queue, skb);
-	unix_state_unlock(other);
-	other->sk_data_ready(other, len);
-	sock_put(other);
+	kfree_skb(skb);
 	scm_destroy(siocb->scm);
+	if (others_set)
+		kfree_sock_set(others_set);
 	return len;
 
-out_unlock:
-	unix_state_unlock(other);
 out_free:
 	kfree_skb(skb);
 out:
-	if (other)
-		sock_put(other);
+	if (others_set)
+		kfree_sock_set(others_set);
 	scm_destroy(siocb->scm);
 	return err;
 }
-- 
1.7.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox