Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH iproute2 5/7] ip/xfrm: Command syntax should not expect a key for compression
From: David Ward @ 2013-03-25 14:23 UTC (permalink / raw)
  To: netdev; +Cc: David Ward
In-Reply-To: <1364221399-1024-1-git-send-email-david.ward@ll.mit.edu>

Compression algorithms do not use a key.

Signed-off-by: David Ward <david.ward@ll.mit.edu>
---
 ip/xfrm_state.c    |   26 +++++++++++++++++---------
 man/man8/ip-xfrm.8 |   13 ++++++++-----
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
index 08a4980..9b374ee 100644
--- a/ip/xfrm_state.c
+++ b/ip/xfrm_state.c
@@ -78,13 +78,14 @@ static void usage(void)
 	fprintf(stderr, "ALGO-LIST := [ ALGO-LIST ] ALGO\n");
 	fprintf(stderr, "ALGO := { ");
 	fprintf(stderr, "%s | ", strxf_algotype(XFRMA_ALG_CRYPT));
-	fprintf(stderr, "%s | ", strxf_algotype(XFRMA_ALG_AUTH));
-	fprintf(stderr, "%s", strxf_algotype(XFRMA_ALG_COMP));
+	fprintf(stderr, "%s", strxf_algotype(XFRMA_ALG_AUTH));
 	fprintf(stderr, " } ALGO-NAME ALGO-KEY |\n");
+	fprintf(stderr, "        %s", strxf_algotype(XFRMA_ALG_AUTH_TRUNC));
+	fprintf(stderr, " ALGO-NAME ALGO-KEY ALGO-TRUNC-LEN |\n");
 	fprintf(stderr, "        %s", strxf_algotype(XFRMA_ALG_AEAD));
 	fprintf(stderr, " ALGO-NAME ALGO-KEY ALGO-ICV-LEN |\n");
-	fprintf(stderr, "        %s", strxf_algotype(XFRMA_ALG_AUTH_TRUNC));
-	fprintf(stderr, " ALGO-NAME ALGO-KEY ALGO-TRUNC-LEN\n");
+	fprintf(stderr, "        %s", strxf_algotype(XFRMA_ALG_COMP));
+	fprintf(stderr, " ALGO-NAME\n");
  	fprintf(stderr, "MODE := transport | tunnel | ro | in_trigger | beet\n");
 	fprintf(stderr, "FLAG-LIST := [ FLAG-LIST ] FLAG\n");
 	fprintf(stderr, "FLAG := noecn | decap-dscp | nopmtudisc | wildrecv | icmp | af-unspec | align4\n");
@@ -374,7 +375,7 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 				int len;
 				__u32 icvlen, trunclen;
 				char *name;
-				char *key;
+				char *key = "";
 				char *buf;
 
 				switch (type) {
@@ -409,10 +410,17 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 				NEXT_ARG();
 				name = *argv;
 
-				if (!NEXT_ARG_OK())
-					missarg("ALGO-KEY");
-				NEXT_ARG();
-				key = *argv;
+				switch (type) {
+				case XFRMA_ALG_AEAD:
+				case XFRMA_ALG_CRYPT:
+				case XFRMA_ALG_AUTH:
+				case XFRMA_ALG_AUTH_TRUNC:
+					if (!NEXT_ARG_OK())
+						missarg("ALGO-KEY");
+					NEXT_ARG();
+					key = *argv;
+					break;
+				}
 
 				buf = alg.u.alg.alg_key;
 				len = sizeof(alg.u.alg);
diff --git a/man/man8/ip-xfrm.8 b/man/man8/ip-xfrm.8
index f359773..6017bc2 100644
--- a/man/man8/ip-xfrm.8
+++ b/man/man8/ip-xfrm.8
@@ -117,14 +117,17 @@ ip-xfrm \- transform configuration
 
 .ti -8
 .IR ALGO " :="
-.RB "{ " enc " | " auth " | " comp " } " 
+.RB "{ " enc " | " auth " } " 
 .IR ALGO-NAME " " ALGO-KEY " |"
 .br
-.B  aead
-.IR ALGO-NAME " " ALGO-KEY " " ALGO-ICV-LEN  " |"
-.br
 .B auth-trunc
-.IR ALGO-NAME " " ALGO-KEY " " ALGO-TRUNC-LEN
+.IR ALGO-NAME " " ALGO-KEY " " ALGO-TRUNC-LEN " |"
+.br
+.B aead
+.IR ALGO-NAME " " ALGO-KEY " " ALGO-ICV-LEN " |"
+.br
+.B comp
+.IR ALGO-NAME
 
 .ti -8
 .IR MODE " := "
-- 
1.7.1

^ permalink raw reply related

* [PATCH iproute2 4/7] ip/xfrm: Do not print a zero-length algorithm key
From: David Ward @ 2013-03-25 14:23 UTC (permalink / raw)
  To: netdev; +Cc: David Ward
In-Reply-To: <1364221399-1024-1-git-send-email-david.ward@ll.mit.edu>

Signed-off-by: David Ward <david.ward@ll.mit.edu>
---
 ip/ipxfrm.c |   12 +++++++-----
 1 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c
index 2576938..3113573 100644
--- a/ip/ipxfrm.c
+++ b/ip/ipxfrm.c
@@ -536,12 +536,14 @@ static void __xfrm_algo_print(struct xfrm_algo *algo, int type, int len,
 		goto fin;
 	}
 
-	fprintf(fp, "0x");
-	for (i = 0; i < keylen; i ++)
-		fprintf(fp, "%.2x", (unsigned char)algo->alg_key[i]);
+	if (keylen > 0) {
+		fprintf(fp, "0x");
+		for (i = 0; i < keylen; i ++)
+			fprintf(fp, "%.2x", (unsigned char)algo->alg_key[i]);
 
-	if (show_stats > 0)
-		fprintf(fp, " (%d bits)", algo->alg_key_len);
+		if (show_stats > 0)
+			fprintf(fp, " (%d bits)", algo->alg_key_len);
+	}
 
  fin:
 	if (newline)
-- 
1.7.1

^ permalink raw reply related

* [PATCH iproute2 3/7] ip/xfrm: Improve transform protocol-specific parameter checking
From: David Ward @ 2013-03-25 14:23 UTC (permalink / raw)
  To: netdev; +Cc: David Ward
In-Reply-To: <1364221399-1024-1-git-send-email-david.ward@ll.mit.edu>

Ensure that only algorithms and modes supported by the transform
protocol are specified (so that errors are more obvious).

Signed-off-by: David Ward <david.ward@ll.mit.edu>
---
 ip/xfrm_state.c |  108 ++++++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 80 insertions(+), 28 deletions(-)

diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
index 85d3e35..08a4980 100644
--- a/ip/xfrm_state.c
+++ b/ip/xfrm_state.c
@@ -486,52 +486,104 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 		}
 	}
 
-	switch (req.xsinfo.mode) {
-	case XFRM_MODE_TRANSPORT:
-	case XFRM_MODE_TUNNEL:
-		if (!xfrm_xfrmproto_is_ipsec(req.xsinfo.id.proto)) {
-			fprintf(stderr, "\"mode\" is invalid with proto=%s\n",
+	if (xfrm_xfrmproto_is_ipsec(req.xsinfo.id.proto)) {
+		switch (req.xsinfo.mode) {
+		case XFRM_MODE_TRANSPORT:
+		case XFRM_MODE_TUNNEL:
+			break;
+		case XFRM_MODE_BEET:
+			if (req.xsinfo.id.proto == IPPROTO_ESP)
+				break;
+		default:
+			fprintf(stderr, "MODE value is invalid with XFRM-PROTO value \"%s\"\n",
 				strxf_xfrmproto(req.xsinfo.id.proto));
 			exit(1);
 		}
-		break;
-	case XFRM_MODE_ROUTEOPTIMIZATION:
-	case XFRM_MODE_IN_TRIGGER:
-		if (!xfrm_xfrmproto_is_ro(req.xsinfo.id.proto)) {
-			fprintf(stderr, "\"mode\" is invalid with proto=%s\n",
+
+		switch (req.xsinfo.id.proto) {
+		case IPPROTO_ESP:
+			if (calgop) {
+				fprintf(stderr, "ALGO-TYPE value \"%s\" is invalid with XFRM-PROTO value \"%s\"\n",
+					strxf_algotype(XFRMA_ALG_COMP),
+					strxf_xfrmproto(req.xsinfo.id.proto));
+				exit(1);
+			}
+			if (!ealgop && !aeadop) {
+				fprintf(stderr, "ALGO-TYPE value \"%s\" or \"%s\" is required with XFRM-PROTO value \"%s\"\n",
+					strxf_algotype(XFRMA_ALG_CRYPT),
+					strxf_algotype(XFRMA_ALG_AEAD),
+					strxf_xfrmproto(req.xsinfo.id.proto));
+				exit(1);
+			}
+			break;
+		case IPPROTO_AH:
+			if (ealgop || aeadop || calgop) {
+				fprintf(stderr, "ALGO-TYPE values \"%s\", \"%s\", and \"%s\" are invalid with XFRM-PROTO value \"%s\"\n",
+					strxf_algotype(XFRMA_ALG_CRYPT),
+					strxf_algotype(XFRMA_ALG_AEAD),
+					strxf_algotype(XFRMA_ALG_COMP),
+					strxf_xfrmproto(req.xsinfo.id.proto));
+				exit(1);
+			}
+			if (!aalgop) {
+				fprintf(stderr, "ALGO-TYPE value \"%s\" or \"%s\" is required with XFRM-PROTO value \"%s\"\n",
+					strxf_algotype(XFRMA_ALG_AUTH),
+					strxf_algotype(XFRMA_ALG_AUTH_TRUNC),
+					strxf_xfrmproto(req.xsinfo.id.proto));
+				exit(1);
+			}
+			break;
+		case IPPROTO_COMP:
+			if (ealgop || aalgop || aeadop) {
+				fprintf(stderr, "ALGO-TYPE values \"%s\", \"%s\", \"%s\", and \"%s\" are invalid with XFRM-PROTO value \"%s\"\n",
+					strxf_algotype(XFRMA_ALG_CRYPT),
+					strxf_algotype(XFRMA_ALG_AUTH),
+					strxf_algotype(XFRMA_ALG_AUTH_TRUNC),
+					strxf_algotype(XFRMA_ALG_AEAD),
+					strxf_xfrmproto(req.xsinfo.id.proto));
+				exit(1);
+			}
+			if (!calgop) {
+				fprintf(stderr, "ALGO-TYPE value \"%s\" is required with XFRM-PROTO value \"%s\"\n",
+					strxf_algotype(XFRMA_ALG_COMP),
+					strxf_xfrmproto(req.xsinfo.id.proto));
+				exit(1);
+			}
+			break;
+		}
+	} else {
+		if (ealgop || aalgop || aeadop || calgop) {
+			fprintf(stderr, "ALGO is invalid with XFRM-PROTO value \"%s\"\n",
 				strxf_xfrmproto(req.xsinfo.id.proto));
 			exit(1);
 		}
-		break;
-	default:
-		break;
 	}
 
-	if (aeadop || ealgop || aalgop || calgop) {
-		if (!xfrm_xfrmproto_is_ipsec(req.xsinfo.id.proto)) {
-			fprintf(stderr, "\"ALGO\" is invalid with proto=%s\n",
+	if (xfrm_xfrmproto_is_ro(req.xsinfo.id.proto)) {
+		switch (req.xsinfo.mode) {
+		case XFRM_MODE_ROUTEOPTIMIZATION:
+		case XFRM_MODE_IN_TRIGGER:
+			break;
+		case 0:
+			fprintf(stderr, "\"mode\" is required with XFRM-PROTO value \"%s\"\n",
 				strxf_xfrmproto(req.xsinfo.id.proto));
 			exit(1);
-		}
-	} else {
-		if (xfrm_xfrmproto_is_ipsec(req.xsinfo.id.proto)) {
-			fprintf(stderr, "\"ALGO\" is required with proto=%s\n",
+		default:
+			fprintf(stderr, "MODE value is invalid with XFRM-PROTO value \"%s\"\n",
 				strxf_xfrmproto(req.xsinfo.id.proto));
-			exit (1);
+			exit(1);
 		}
-	}
 
-	if (coap) {
-		if (!xfrm_xfrmproto_is_ro(req.xsinfo.id.proto)) {
-			fprintf(stderr, "\"coa\" is invalid with proto=%s\n",
+		if (!coap) {
+			fprintf(stderr, "\"coa\" is required with XFRM-PROTO value \"%s\"\n",
 				strxf_xfrmproto(req.xsinfo.id.proto));
 			exit(1);
 		}
 	} else {
-		if (xfrm_xfrmproto_is_ro(req.xsinfo.id.proto)) {
-			fprintf(stderr, "\"coa\" is required with proto=%s\n",
+		if (coap) {
+			fprintf(stderr, "\"coa\" is invalid with XFRM-PROTO value \"%s\"\n",
 				strxf_xfrmproto(req.xsinfo.id.proto));
-			exit (1);
+			exit(1);
 		}
 	}
 
-- 
1.7.1

^ permalink raw reply related

* [PATCH iproute2 1/7] ip/xfrm: Extend SPI validity checking
From: David Ward @ 2013-03-25 14:23 UTC (permalink / raw)
  To: netdev; +Cc: David Ward

A Security Policy Index (SPI) is not used with Mobile IPv6. IPComp
uses a smaller 16-bit Compression Parameter Index (CPI) which is
passed as the SPI value. Perform checks whenever specifying an ID.

Signed-off-by: David Ward <david.ward@ll.mit.edu>
---
 ip/ipxfrm.c     |   12 ++++++++++++
 ip/xfrm_state.c |    5 -----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c
index db51918..2576938 100644
--- a/ip/ipxfrm.c
+++ b/ip/ipxfrm.c
@@ -1067,6 +1067,18 @@ int xfrm_id_parse(xfrm_address_t *saddr, struct xfrm_id *id, __u16 *family,
 	if (src.family && dst.family && (src.family != dst.family))
 		invarg("the same address family is required between \"src\" and \"dst\"", *argv);
 
+	if (id->spi && id->proto) {
+		if (xfrm_xfrmproto_is_ro(id->proto)) {
+			fprintf(stderr, "\"spi\" is invalid with XFRM-PROTO value \"%s\"\n",
+			        strxf_xfrmproto(id->proto));
+			exit(1);
+		} else if (id->proto == IPPROTO_COMP && ntohl(id->spi) >= 0x10000) {
+			fprintf(stderr, "SPI value is too large with XFRM-PROTO value \"%s\"\n",
+			        strxf_xfrmproto(id->proto));
+			exit(1);
+		}
+	}
+
 	if (loose == 0 && id->proto == 0)
 		missarg("XFRM-PROTO");
 	if (argc == *argcp)
diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
index c0cf88c..3c01ec5 100644
--- a/ip/xfrm_state.c
+++ b/ip/xfrm_state.c
@@ -502,11 +502,6 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 				strxf_xfrmproto(req.xsinfo.id.proto));
 			exit(1);
 		}
-		if (req.xsinfo.id.spi != 0) {
-			fprintf(stderr, "\"spi\" must be 0 with proto=%s\n",
-				strxf_xfrmproto(req.xsinfo.id.proto));
-			exit(1);
-		}
 		break;
 	default:
 		break;
-- 
1.7.1

^ permalink raw reply related

* [PATCH iproute2 2/7] ip/xfrm: Do not allow redundant algorithm combinations to be specified
From: David Ward @ 2013-03-25 14:23 UTC (permalink / raw)
  To: netdev; +Cc: David Ward
In-Reply-To: <1364221399-1024-1-git-send-email-david.ward@ll.mit.edu>

AEAD algorithms perform both encryption and authentication; they are
not combined with separate encryption or authentication algorithms.

Signed-off-by: David Ward <david.ward@ll.mit.edu>
---
 ip/xfrm_state.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
index 3c01ec5..85d3e35 100644
--- a/ip/xfrm_state.c
+++ b/ip/xfrm_state.c
@@ -379,18 +379,18 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 
 				switch (type) {
 				case XFRMA_ALG_AEAD:
-					if (aeadop)
+					if (ealgop || aalgop || aeadop)
 						duparg("ALGO-TYPE", *argv);
 					aeadop = *argv;
 					break;
 				case XFRMA_ALG_CRYPT:
-					if (ealgop)
+					if (ealgop || aeadop)
 						duparg("ALGO-TYPE", *argv);
 					ealgop = *argv;
 					break;
 				case XFRMA_ALG_AUTH:
 				case XFRMA_ALG_AUTH_TRUNC:
-					if (aalgop)
+					if (aalgop || aeadop)
 						duparg("ALGO-TYPE", *argv);
 					aalgop = *argv;
 					break;
-- 
1.7.1

^ permalink raw reply related

* Re: [Xen-devel] [PATCH 1/6] xen-netfront: remove unused variable `extra'
From: David Vrabel @ 2013-03-25 14:21 UTC (permalink / raw)
  To: Wei Liu
  Cc: xen-devel, netdev, annie.li, david.vrabel, ian.campbell,
	konrad.wilk
In-Reply-To: <1364209702-12437-2-git-send-email-wei.liu2@citrix.com>

On 25/03/13 11:08, Wei Liu wrote:
> This variable is supposed to hold reference to the last extra_info in the
> loop. However there is only type of extra info here and the loop to process
> extra info is missing, so this variable is never used and causes confusion.
> 
> Remove it at the moment. We can add it back when necessary.
> 
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>

Obviously correct.

Reviewed-by: David Vrabel <david.vrabel@citrix.com>

David

^ permalink raw reply

* Re: [PATCH net-next] 802: fix a possible race condition
From: Eric Dumazet @ 2013-03-25 14:08 UTC (permalink / raw)
  To: Cong Wang; +Cc: David Miller, netdev, david.ward, jorge
In-Reply-To: <1364218338.2532.15.camel@cr0>

On Mon, 2013-03-25 at 21:32 +0800, Cong Wang wrote:

> At least garp_join_timer() calls garp_pdu_queue() in a timer:
> 
> static void garp_join_timer(unsigned long data)
> {
>         struct garp_applicant *app = (struct garp_applicant *)data;
> 
>         spin_lock(&app->lock);
>         garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
>         garp_pdu_queue(app);
>         spin_unlock(&app->lock);
> 
>         garp_queue_xmit(app);
>         garp_join_timer_arm(app);
> }
> 
> which I don't think can hold RTNL lock possibly.
> 

But timer wont possibly run because of the previous :

del_timer_sync(&app->join_timer);

^ permalink raw reply

* Re: [Eulerkernel] [PATCH] af_unix: dont send SCM_CREDENTIAL when dest socket is NULL
From: Eric Dumazet @ 2013-03-25 14:04 UTC (permalink / raw)
  To: dingtianhong; +Cc: David S. Miller, Eric Dumazet, netdev, Li Zefan, Xinwei Hu
In-Reply-To: <515026DA.7050306@huawei.com>

On Mon, 2013-03-25 at 18:28 +0800, dingtianhong wrote:
> SCM_SCREDENTIALS should apply to write() syscalls only either source or destination
> socket asserted SOCK_PASSCRED. The original implememtation in maybe_add_creds is wrong,
> and breaks several LSB testcases ( i.e. /tset/LSB.os/netowkr/recvfrom/T.recvfrom).
> 
> Origionally-authored-by: Karel Srot <ksrot@redhat.com>
> Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
> ---
>    net/unix/af_unix.c | 4 ++--
>    1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 51be64f..99189fd 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -1413,8 +1413,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
>           if (UNIXCB(skb).cred)
>                   return;
>           if (test_bit(SOCK_PASSCRED, &sock->flags) ||
> -           !other->sk_socket ||
> -           test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
> +           (other->sk_socket &&
> +           test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) {
>                   UNIXCB(skb).pid  = get_pid(task_tgid(current));
>                   UNIXCB(skb).cred = get_current_cred();
>           }

I am not sure why adding credentials if other->sk_socket is NULL could
break an application ?

This was the case before commit introducing this code.

Anyway patch looks fine

Acked-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply

* Re: [Xen-devel] [PATCH 6/6] xen-netback: don't disconnect frontend when seeing oversize frame
From: Wei Liu @ 2013-03-25 13:52 UTC (permalink / raw)
  To: Jan Beulich
  Cc: David Vrabel, Ian Campbell, xen-devel@lists.xen.org,
	annie.li@oracle.com, konrad.wilk@oracle.com,
	netdev@vger.kernel.org
In-Reply-To: <515056F502000078000C8184@nat28.tlf.novell.com>

On Mon, Mar 25, 2013 at 12:53:57PM +0000, Jan Beulich wrote:
> >>> On 25.03.13 at 12:08, Wei Liu <wei.liu2@citrix.com> wrote:
> > @@ -947,10 +949,21 @@ static int netbk_count_requests(struct xenvif *vif,
> >  
> >  		memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots),
> >  		       sizeof(*txp));
> > -		if (txp->size > first->size) {
> > -			netdev_err(vif->dev, "Packet is bigger than frame.\n");
> > -			netbk_fatal_tx_err(vif);
> > -			return -EIO;
> > +
> > +		/* If the guest submitted a frame >= 64 KiB then
> > +		 * first->size overflowed and following slots will
> > +		 * appear to be larger than the frame.
> > +		 *
> > +		 * This cannot be fatal error as there are buggy
> > +		 * frontends that do this.
> > +		 *
> > +		 * Consume all slots and drop the packet.
> > +		 */
> > +		if (!drop && txp->size > first->size) {
> > +			if (net_ratelimit())
> > +				netdev_dbg(vif->dev,
> > +					   "Packet is bigger than frame.\n");
> > +			drop = true;
> 
> So this deals with one half of the problem, but shouldn't we also
> revert the disconnect when slots would exceed MAX_SKB_FRAGS
> (or max_skb_slots after patch 5)? Afaict you could trivially extend
> this patch to also cover that case...
> 

I don't think we should do that. IMO a guest using too many slots should
be considered malicious and disconnected.


Wei.

> Jan

^ permalink raw reply

* Re: [PATCH net-next] 802: fix a possible race condition
From: Cong Wang @ 2013-03-25 13:32 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, david.ward, jorge
In-Reply-To: <20130324.172403.776354963637295731.davem@davemloft.net>

On Sun, 2013-03-24 at 17:24 -0400, David Miller wrote:
> From: Cong Wang <amwang@redhat.com>
> Date: Sat, 23 Mar 2013 13:14:08 +0800
> 
> > From: Cong Wang <amwang@redhat.com>
> > 
> > garp_pdu_queue() should ways be called with this spin lock.
> > garp_uninit_applicant() only holds rtnl lock which is not
> > enough here.
> > 
> > Found by code inspection.
> > 
> > Cc: "David S. Miller" <davem@davemloft.net>
> > Cc: David Ward <david.ward@ll.mit.edu>
> > Cc: "Jorge Boncompte [DTI2]" <jorge@dti2.net>
> > Signed-off-by: Cong Wang <amwang@redhat.com>
> 
> Under what conditions can entries be removed or added to
> these RB-trees without the RTNL being held?

At least garp_join_timer() calls garp_pdu_queue() in a timer:

static void garp_join_timer(unsigned long data)
{
        struct garp_applicant *app = (struct garp_applicant *)data;

        spin_lock(&app->lock);
        garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
        garp_pdu_queue(app);
        spin_unlock(&app->lock);

        garp_queue_xmit(app);
        garp_join_timer_arm(app);
}

which I don't think can hold RTNL lock possibly.

> 
> If such events cannot happen, then no locking is needed.
> 
> Even if your change is correct and necessary, the answer to my
> needs to be added to your commit message.

Ok, I will.

Thanks.

^ permalink raw reply

* [PATCH] unix: fix a race condition in unix_release()
From: Paul Moore @ 2013-03-25 13:18 UTC (permalink / raw)
  To: netdev; +Cc: jan.stancek

As reported by Jan, and others over the past few years, there is a
race condition caused by unix_release setting the sock->sk pointer
to NULL before properly marking the socket as dead/orphaned.  This
can cause a problem with the LSM hook security_unix_may_send() if
there is another socket attempting to write to this partially
released socket in between when sock->sk is set to NULL and it is
marked as dead/orphaned.  This patch fixes this by only setting
sock->sk to NULL after the socket has been marked as dead; I also
take the opportunity to make unix_release_sock() a void function
as it only ever returned 0/success.

Dave, I think this one should go on the -stable pile.

Special thanks to Jan for coming up with a reproducer for this
problem.

Reported-by: Jan Stancek <jan.stancek@gmail.com>
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 net/unix/af_unix.c |    7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 51be64f..f153a8d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -382,7 +382,7 @@ static void unix_sock_destructor(struct sock *sk)
 #endif
 }

-static int unix_release_sock(struct sock *sk, int embrion)
+static void unix_release_sock(struct sock *sk, int embrion)
 {
 	struct unix_sock *u = unix_sk(sk);
 	struct path path;
@@ -451,8 +451,6 @@ static int unix_release_sock(struct sock *sk, int embrion)

 	if (unix_tot_inflight)
 		unix_gc();		/* Garbage collect fds */
-
-	return 0;
 }

 static void init_peercred(struct sock *sk)
@@ -699,9 +697,10 @@ static int unix_release(struct socket *sock)
 	if (!sk)
 		return 0;

+	unix_release_sock(sk, 0);
 	sock->sk = NULL;

-	return unix_release_sock(sk, 0);
+	return 0;
 }

 static int unix_autobind(struct socket *sock)

^ permalink raw reply related

* Re: [Xen-devel] [PATCH 6/6] xen-netback: don't disconnect frontend when seeing oversize frame
From: Jan Beulich @ 2013-03-25 12:53 UTC (permalink / raw)
  To: Wei Liu
  Cc: david.vrabel, ian.campbell, xen-devel, annie.li, konrad.wilk,
	netdev
In-Reply-To: <1364209702-12437-7-git-send-email-wei.liu2@citrix.com>

>>> On 25.03.13 at 12:08, Wei Liu <wei.liu2@citrix.com> wrote:
> @@ -947,10 +949,21 @@ static int netbk_count_requests(struct xenvif *vif,
>  
>  		memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots),
>  		       sizeof(*txp));
> -		if (txp->size > first->size) {
> -			netdev_err(vif->dev, "Packet is bigger than frame.\n");
> -			netbk_fatal_tx_err(vif);
> -			return -EIO;
> +
> +		/* If the guest submitted a frame >= 64 KiB then
> +		 * first->size overflowed and following slots will
> +		 * appear to be larger than the frame.
> +		 *
> +		 * This cannot be fatal error as there are buggy
> +		 * frontends that do this.
> +		 *
> +		 * Consume all slots and drop the packet.
> +		 */
> +		if (!drop && txp->size > first->size) {
> +			if (net_ratelimit())
> +				netdev_dbg(vif->dev,
> +					   "Packet is bigger than frame.\n");
> +			drop = true;

So this deals with one half of the problem, but shouldn't we also
revert the disconnect when slots would exceed MAX_SKB_FRAGS
(or max_skb_slots after patch 5)? Afaict you could trivially extend
this patch to also cover that case...

Jan

^ permalink raw reply

* [PATCH 12/12] netfilter: nf_conntrack: speed up module removal path if netns in use
From: pablo @ 2013-03-25 12:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1364213752-3934-1-git-send-email-pablo@netfilter.org>

From: Vladimir Davydov <VDavydov@parallels.com>

The patch introduces nf_conntrack_cleanup_net_list(), which cleanups
nf_conntrack for a list of netns and calls synchronize_net() only once
for them all. This should reduce netns destruction time.

I've measured cleanup time for 1k dummy net ns. Here are the results:

 <without the patch>
 # modprobe nf_conntrack
 # time modprobe -r nf_conntrack

 real	0m10.337s
 user	0m0.000s
 sys	0m0.376s

 <with the patch>
 # modprobe nf_conntrack
 # time modprobe -r nf_conntrack

 real    0m5.661s
 user    0m0.000s
 sys     0m0.216s

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h |    1 +
 net/netfilter/nf_conntrack_core.c         |   46 ++++++++++++++++++++---------
 net/netfilter/nf_conntrack_standalone.c   |   16 ++++++----
 3 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 930275fa..fb2b623 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -27,6 +27,7 @@ extern unsigned int nf_conntrack_in(struct net *net,
 
 extern int nf_conntrack_init_net(struct net *net);
 extern void nf_conntrack_cleanup_net(struct net *net);
+extern void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list);
 
 extern int nf_conntrack_proto_pernet_init(struct net *net);
 extern void nf_conntrack_proto_pernet_fini(struct net *net);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1068deb..007e8c4 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1365,30 +1365,48 @@ void nf_conntrack_cleanup_end(void)
  */
 void nf_conntrack_cleanup_net(struct net *net)
 {
+	LIST_HEAD(single);
+
+	list_add(&net->exit_list, &single);
+	nf_conntrack_cleanup_net_list(&single);
+}
+
+void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
+{
+	int busy;
+	struct net *net;
+
 	/*
 	 * This makes sure all current packets have passed through
 	 *  netfilter framework.  Roll on, two-stage module
 	 *  delete...
 	 */
 	synchronize_net();
- i_see_dead_people:
-	nf_ct_iterate_cleanup(net, kill_all, NULL);
-	nf_ct_release_dying_list(net);
-	if (atomic_read(&net->ct.count) != 0) {
+i_see_dead_people:
+	busy = 0;
+	list_for_each_entry(net, net_exit_list, exit_list) {
+		nf_ct_iterate_cleanup(net, kill_all, NULL);
+		nf_ct_release_dying_list(net);
+		if (atomic_read(&net->ct.count) != 0)
+			busy = 1;
+	}
+	if (busy) {
 		schedule();
 		goto i_see_dead_people;
 	}
 
-	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
-	nf_conntrack_proto_pernet_fini(net);
-	nf_conntrack_helper_pernet_fini(net);
-	nf_conntrack_ecache_pernet_fini(net);
-	nf_conntrack_tstamp_pernet_fini(net);
-	nf_conntrack_acct_pernet_fini(net);
-	nf_conntrack_expect_pernet_fini(net);
-	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
-	kfree(net->ct.slabname);
-	free_percpu(net->ct.stat);
+	list_for_each_entry(net, net_exit_list, exit_list) {
+		nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
+		nf_conntrack_proto_pernet_fini(net);
+		nf_conntrack_helper_pernet_fini(net);
+		nf_conntrack_ecache_pernet_fini(net);
+		nf_conntrack_tstamp_pernet_fini(net);
+		nf_conntrack_acct_pernet_fini(net);
+		nf_conntrack_expect_pernet_fini(net);
+		kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+		kfree(net->ct.slabname);
+		free_percpu(net->ct.stat);
+	}
 }
 
 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 6bcce40..6c69fbd 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -545,16 +545,20 @@ out_init:
 	return ret;
 }
 
-static void nf_conntrack_pernet_exit(struct net *net)
+static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
 {
-	nf_conntrack_standalone_fini_sysctl(net);
-	nf_conntrack_standalone_fini_proc(net);
-	nf_conntrack_cleanup_net(net);
+	struct net *net;
+
+	list_for_each_entry(net, net_exit_list, exit_list) {
+		nf_conntrack_standalone_fini_sysctl(net);
+		nf_conntrack_standalone_fini_proc(net);
+	}
+	nf_conntrack_cleanup_net_list(net_exit_list);
 }
 
 static struct pernet_operations nf_conntrack_net_ops = {
-	.init = nf_conntrack_pernet_init,
-	.exit = nf_conntrack_pernet_exit,
+	.init		= nf_conntrack_pernet_init,
+	.exit_batch	= nf_conntrack_pernet_exit,
 };
 
 static int __init nf_conntrack_standalone_init(void)
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 11/12] netfilter: nf_conntrack: add include to fix sparse warning
From: pablo @ 2013-03-25 12:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1364213752-3934-1-git-send-email-pablo@netfilter.org>

From: stephen hemminger <stephen@networkplumber.org>

Include header file to pickup prototype of nf_nat_seq_adjust_hook

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_core.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c8e001a..1068deb 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -48,6 +48,7 @@
 #include <net/netfilter/nf_conntrack_labels.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
 
 #define NF_CONNTRACK_VERSION	"0.5.0"
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 08/12] ipvs: fix some sparse warnings
From: pablo @ 2013-03-25 12:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1364213752-3934-1-git-send-email-pablo@netfilter.org>

From: Julian Anastasov <ja@ssi.bg>

Add missing __percpu annotations and make ip_vs_net_id static.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h             |    2 +-
 net/netfilter/ipvs/ip_vs_core.c |    8 +-------
 net/netfilter/ipvs/ip_vs_est.c  |    2 +-
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 68c69d5..29bc055 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -459,7 +459,7 @@ struct ip_vs_estimator {
 struct ip_vs_stats {
 	struct ip_vs_stats_user	ustats;		/* statistics */
 	struct ip_vs_estimator	est;		/* estimator */
-	struct ip_vs_cpu_stats	*cpustats;	/* per cpu counters */
+	struct ip_vs_cpu_stats __percpu	*cpustats;	/* per cpu counters */
 	spinlock_t		lock;		/* spin lock */
 	struct ip_vs_stats_user	ustats0;	/* reset values */
 };
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 47edf5a..3e5e80b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -69,10 +69,7 @@ EXPORT_SYMBOL(ip_vs_conn_put);
 EXPORT_SYMBOL(ip_vs_get_debug_level);
 #endif
 
-int ip_vs_net_id __read_mostly;
-#ifdef IP_VS_GENERIC_NETNS
-EXPORT_SYMBOL(ip_vs_net_id);
-#endif
+static int ip_vs_net_id __read_mostly;
 /* netns cnt used for uniqueness */
 static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
 
@@ -1181,9 +1178,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 						iph.len)))) {
 #ifdef CONFIG_IP_VS_IPV6
 				if (af == AF_INET6) {
-					struct net *net =
-						dev_net(skb_dst(skb)->dev);
-
 					if (!skb->dev)
 						skb->dev = net->loopback_dev;
 					icmpv6_send(skb,
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 0fac601..6bee6d0 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -56,7 +56,7 @@
  * Make a summary from each cpu
  */
 static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
-				 struct ip_vs_cpu_stats *stats)
+				 struct ip_vs_cpu_stats __percpu *stats)
 {
 	int i;
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 06/12] netfilter: nfnetlink_queue: use xor hash function to distribute instances
From: pablo @ 2013-03-25 12:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1364213752-3934-1-git-send-email-pablo@netfilter.org>

From: Pablo Neira Ayuso <pablo@netfilter.org>

Thanks to Eric Dumazet for suggesting this during the NFWS.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_queue_core.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 858fd52..350c50f 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -73,7 +73,7 @@ static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
 
 static inline u_int8_t instance_hashfn(u_int16_t queue_num)
 {
-	return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS;
+	return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
 }
 
 static struct nfqnl_instance *
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 05/12] netfilter: ebt_ulog: remove unnecessary spin lock protection
From: pablo @ 2013-03-25 12:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1364213752-3934-1-git-send-email-pablo@netfilter.org>

From: Gao feng <gaofeng@cn.fujitsu.com>

No need for spinlock to protect the netlink skb in the
ebt_ulog_fini path. We are sure there is noone using it
at that stage.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebt_ulog.c |    3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 3bf43f7..442b032 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -319,12 +319,11 @@ static void __exit ebt_ulog_fini(void)
 	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
 		ub = &ulog_buffers[i];
 		del_timer(&ub->timer);
-		spin_lock_bh(&ub->lock);
+
 		if (ub->skb) {
 			kfree_skb(ub->skb);
 			ub->skb = NULL;
 		}
-		spin_unlock_bh(&ub->lock);
 	}
 	netlink_kernel_release(ebtulognl);
 }
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 04/12] netfilter: nf_ct_ipv6: use ipv6_iface_scope_id in conntrack to return scope id
From: pablo @ 2013-03-25 12:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1364213752-3934-1-git-send-email-pablo@netfilter.org>

From: Hannes Frederic Sowa <hannes@stressinduktion.org>

As in (842df07 ipv6: use newly introduced __ipv6_addr_needs_scope_id and
ipv6_iface_scope_id).

Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |    8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 2b6c226..97bcf2b 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -330,12 +330,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 					sizeof(sin6.sin6_addr));
 
 	nf_ct_put(ct);
-
-	if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
-		sin6.sin6_scope_id = sk->sk_bound_dev_if;
-	else
-		sin6.sin6_scope_id = 0;
-
+	sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr,
+						 sk->sk_bound_dev_if);
 	return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
 }
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 03/12] bridge: netfilter: use PTR_RET instead of IS_ERR + PTR_ERR
From: pablo @ 2013-03-25 12:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1364213752-3934-1-git-send-email-pablo@netfilter.org>

From: Silviu-Mihai Popescu <silviupopescu1990@gmail.com>

This uses PTR_RET instead of IS_ERR and PTR_ERR in order to increase
readability.

Signed-off-by: Silviu-Mihai Popescu <silviupopescu1990@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtable_broute.c |    4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 40d8258..70f656c 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -64,9 +64,7 @@ static int ebt_broute(struct sk_buff *skb)
 static int __net_init broute_net_init(struct net *net)
 {
 	net->xt.broute_table = ebt_register_table(net, &broute_table);
-	if (IS_ERR(net->xt.broute_table))
-		return PTR_ERR(net->xt.broute_table);
-	return 0;
+	return PTR_RET(net->xt.broute_table);
 }
 
 static void __net_exit broute_net_exit(struct net *net)
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 10/12] netfilter: nfnetlink_queue: zero copy support
From: pablo @ 2013-03-25 12:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1364213752-3934-1-git-send-email-pablo@netfilter.org>

From: Eric Dumazet <edumazet@google.com>

nfqnl_build_packet_message() actually copy the packet
inside the netlink message, while it can instead use
zero copy.

Make sure the skb 'copy' is the last component of the
cooked netlink message, as we cant add anything after it.

Patch cooked in Copenhagen at Netfilter Workshop ;)

Still to be addressed in separate patches :

-GRO/GSO packets are segmented in nf_queue()
and checksummed in nfqnl_build_packet_message().

Proper support for GSO/GRO packets (no segmentation,
and no checksumming) needs application cooperation, if we
want no regressions.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_queue_core.c |   94 ++++++++++++++++++++++++++--------
 1 file changed, 72 insertions(+), 22 deletions(-)

diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 350c50f..da91b86 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -217,14 +217,59 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
 	spin_unlock_bh(&queue->lock);
 }
 
+static void
+nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
+{
+	int i, j = 0;
+	int plen = 0; /* length of skb->head fragment */
+	struct page *page;
+	unsigned int offset;
+
+	/* dont bother with small payloads */
+	if (len <= skb_tailroom(to)) {
+		skb_copy_bits(from, 0, skb_put(to, len), len);
+		return;
+	}
+
+	if (hlen) {
+		skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
+		len -= hlen;
+	} else {
+		plen = min_t(int, skb_headlen(from), len);
+		if (plen) {
+			page = virt_to_head_page(from->head);
+			offset = from->data - (unsigned char *)page_address(page);
+			__skb_fill_page_desc(to, 0, page, offset, plen);
+			get_page(page);
+			j = 1;
+			len -= plen;
+		}
+	}
+
+	to->truesize += len + plen;
+	to->len += len + plen;
+	to->data_len += len + plen;
+
+	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
+		if (!len)
+			break;
+		skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
+		skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
+		len -= skb_shinfo(to)->frags[j].size;
+		skb_frag_ref(to, j);
+		j++;
+	}
+	skb_shinfo(to)->nr_frags = j;
+}
+
 static struct sk_buff *
 nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			   struct nf_queue_entry *entry,
 			   __be32 **packet_id_ptr)
 {
-	sk_buff_data_t old_tail;
 	size_t size;
 	size_t data_len = 0, cap_len = 0;
+	int hlen = 0;
 	struct sk_buff *skb;
 	struct nlattr *nla;
 	struct nfqnl_msg_packet_hdr *pmsg;
@@ -246,8 +291,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 #endif
 		+ nla_total_size(sizeof(u_int32_t))	/* mark */
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
-		+ nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)
-		+ nla_total_size(sizeof(u_int32_t)));	/* cap_len */
+		+ nla_total_size(sizeof(u_int32_t));	/* cap_len */
+
+	if (entskb->tstamp.tv64)
+		size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
 
 	outdev = entry->outdev;
 
@@ -265,7 +312,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		if (data_len == 0 || data_len > entskb->len)
 			data_len = entskb->len;
 
-		size += nla_total_size(data_len);
+
+		if (!entskb->head_frag ||
+		    skb_headlen(entskb) < L1_CACHE_BYTES ||
+		    skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS)
+			hlen = skb_headlen(entskb);
+
+		if (skb_has_frag_list(entskb))
+			hlen = entskb->len;
+		hlen = min_t(int, data_len, hlen);
+		size += sizeof(struct nlattr) + hlen;
 		cap_len = entskb->len;
 		break;
 	}
@@ -277,7 +333,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	if (!skb)
 		return NULL;
 
-	old_tail = skb->tail;
 	nlh = nlmsg_put(skb, 0, 0,
 			NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
 			sizeof(struct nfgenmsg), 0);
@@ -382,31 +437,26 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			goto nla_put_failure;
 	}
 
+	if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
+		goto nla_put_failure;
+
+	if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
+		goto nla_put_failure;
+
 	if (data_len) {
 		struct nlattr *nla;
-		int sz = nla_attr_size(data_len);
 
-		if (skb_tailroom(skb) < nla_total_size(data_len)) {
-			printk(KERN_WARNING "nf_queue: no tailroom!\n");
-			kfree_skb(skb);
-			return NULL;
-		}
+		if (skb_tailroom(skb) < sizeof(*nla) + hlen)
+			goto nla_put_failure;
 
-		nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len));
+		nla = (struct nlattr *)skb_put(skb, sizeof(*nla));
 		nla->nla_type = NFQA_PAYLOAD;
-		nla->nla_len = sz;
+		nla->nla_len = nla_attr_size(data_len);
 
-		if (skb_copy_bits(entskb, 0, nla_data(nla), data_len))
-			BUG();
+		nfqnl_zcopy(skb, entskb, data_len, hlen);
 	}
 
-	if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
-		goto nla_put_failure;
-
-	if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
-		goto nla_put_failure;
-
-	nlh->nlmsg_len = skb->tail - old_tail;
+	nlh->nlmsg_len = skb->len;
 	return skb;
 
 nla_put_failure:
-- 
1.7.10.4


^ permalink raw reply related

* [PATCH 09/12] netfilter: ctnetlink: allow to dump expectation per master conntrack
From: pablo @ 2013-03-25 12:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1364213752-3934-1-git-send-email-pablo@netfilter.org>

From: Pablo Neira Ayuso <pablo@netfilter.org>

This patch adds the ability to dump all existing expectations
per master conntrack.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c |  100 ++++++++++++++++++++++++++++++++--
 1 file changed, 95 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 9904b15..6d0f8a1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2409,6 +2409,92 @@ out:
 	return skb->len;
 }
 
+static int
+ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_conntrack_expect *exp, *last;
+	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	struct nf_conn *ct = cb->data;
+	struct nf_conn_help *help = nfct_help(ct);
+	u_int8_t l3proto = nfmsg->nfgen_family;
+
+	if (cb->args[0])
+		return 0;
+
+	rcu_read_lock();
+	last = (struct nf_conntrack_expect *)cb->args[1];
+restart:
+	hlist_for_each_entry(exp, &help->expectations, lnode) {
+		if (l3proto && exp->tuple.src.l3num != l3proto)
+			continue;
+		if (cb->args[1]) {
+			if (exp != last)
+				continue;
+			cb->args[1] = 0;
+		}
+		if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).portid,
+					    cb->nlh->nlmsg_seq,
+					    IPCTNL_MSG_EXP_NEW,
+					    exp) < 0) {
+			if (!atomic_inc_not_zero(&exp->use))
+				continue;
+			cb->args[1] = (unsigned long)exp;
+			goto out;
+		}
+	}
+	if (cb->args[1]) {
+		cb->args[1] = 0;
+		goto restart;
+	}
+	cb->args[0] = 1;
+out:
+	rcu_read_unlock();
+	if (last)
+		nf_ct_expect_put(last);
+
+	return skb->len;
+}
+
+static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
+				 const struct nlmsghdr *nlh,
+				 const struct nlattr * const cda[])
+{
+	int err;
+	struct net *net = sock_net(ctnl);
+	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
+	u16 zone = 0;
+	struct netlink_dump_control c = {
+		.dump = ctnetlink_exp_ct_dump_table,
+		.done = ctnetlink_exp_done,
+	};
+
+	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
+	if (err < 0)
+		return err;
+
+	if (cda[CTA_EXPECT_ZONE]) {
+		err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+		if (err < 0)
+			return err;
+	}
+
+	h = nf_conntrack_find_get(net, zone, &tuple);
+	if (!h)
+		return -ENOENT;
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+	c.data = ct;
+
+	err = netlink_dump_start(ctnl, skb, nlh, &c);
+	nf_ct_put(ct);
+
+	return err;
+}
+
 static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
 	[CTA_EXPECT_MASTER]	= { .type = NLA_NESTED },
 	[CTA_EXPECT_TUPLE]	= { .type = NLA_NESTED },
@@ -2439,11 +2525,15 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	int err;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		struct netlink_dump_control c = {
-			.dump = ctnetlink_exp_dump_table,
-			.done = ctnetlink_exp_done,
-		};
-		return netlink_dump_start(ctnl, skb, nlh, &c);
+		if (cda[CTA_EXPECT_MASTER])
+			return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda);
+		else {
+			struct netlink_dump_control c = {
+				.dump = ctnetlink_exp_dump_table,
+				.done = ctnetlink_exp_done,
+			};
+			return netlink_dump_start(ctnl, skb, nlh, &c);
+		}
 	}
 
 	err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
-- 
1.7.10.4


^ permalink raw reply related

* [PATCH 07/12] ipvs: fix hashing in ip_vs_svc_hashkey
From: pablo @ 2013-03-25 12:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1364213752-3934-1-git-send-email-pablo@netfilter.org>

From: Julian Anastasov <ja@ssi.bg>

net is a pointer in host order, mix it properly
with other keys in network order. Fixes sparse warning.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_ctl.c |    8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c68198b..a528178 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -271,16 +271,18 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
 {
 	register unsigned int porth = ntohs(port);
 	__be32 addr_fold = addr->ip;
+	__u32 ahash;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6)
 		addr_fold = addr->ip6[0]^addr->ip6[1]^
 			    addr->ip6[2]^addr->ip6[3];
 #endif
-	addr_fold ^= ((size_t)net>>8);
+	ahash = ntohl(addr_fold);
+	ahash ^= ((size_t) net >> 8);
 
-	return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
-		& IP_VS_SVC_TAB_MASK;
+	return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
+	       IP_VS_SVC_TAB_MASK;
 }
 
 /*
-- 
1.7.10.4


^ permalink raw reply related

* [PATCH 02/12] ipv4: netfilter: use PTR_RET instead of IS_ERR + PTR_ERR
From: pablo @ 2013-03-25 12:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1364213752-3934-1-git-send-email-pablo@netfilter.org>

From: Silviu-Mihai Popescu <silviupopescu1990@gmail.com>

This uses PTR_RET instead of IS_ERR and PTR_ERR in order to increase
readability.

Signed-off-by: Silviu-Mihai Popescu <silviupopescu1990@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/arptable_filter.c |    4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 79ca5e7..eadab1e 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -48,9 +48,7 @@ static int __net_init arptable_filter_net_init(struct net *net)
 	net->ipv4.arptable_filter =
 		arpt_register_table(net, &packet_filter, repl);
 	kfree(repl);
-	if (IS_ERR(net->ipv4.arptable_filter))
-		return PTR_ERR(net->ipv4.arptable_filter);
-	return 0;
+	return PTR_RET(net->ipv4.arptable_filter);
 }
 
 static void __net_exit arptable_filter_net_exit(struct net *net)
-- 
1.7.10.4


^ permalink raw reply related

* [PATCH 01/12] netfilter: ip6t_NPT: Use csum_partial()
From: pablo @ 2013-03-25 12:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1364213752-3934-1-git-send-email-pablo@netfilter.org>

From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>

[ Some fixes went into mainstream before this patch, so I needed
  to rebase it upon the current tree, that's why it's different from
  the original one posted on the list --pablo ]

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/ip6t_NPT.c |   11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index 83acc14..59286a1 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -18,9 +18,8 @@
 static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
 {
 	struct ip6t_npt_tginfo *npt = par->targinfo;
-	__wsum src_sum = 0, dst_sum = 0;
 	struct in6_addr pfx;
-	unsigned int i;
+	__wsum src_sum, dst_sum;
 
 	if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
 		return -EINVAL;
@@ -33,12 +32,8 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
 	if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))
 		return -EINVAL;
 
-	for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
-		src_sum = csum_add(src_sum,
-				(__force __wsum)npt->src_pfx.in6.s6_addr16[i]);
-		dst_sum = csum_add(dst_sum,
-				(__force __wsum)npt->dst_pfx.in6.s6_addr16[i]);
-	}
+	src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0);
+	dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0);
 
 	npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));
 	return 0;
-- 
1.7.10.4


^ permalink raw reply related

* [PATCH 00/12] Netfilter updates for net-next
From: pablo @ 2013-03-25 12:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev

From: Pablo Neira Ayuso <pablo@netfilter.org>

Hi David,

The following patchset contains Netfilter/IPVS updates for
your net-next tree, they are:

* Better performance in nfnetlink_queue by avoiding copy from the
  packet to netlink message, from Eric Dumazet.

* Remove unnecessary locking in the exit path of ebt_ulog, from Gao Feng.

* Use new function ipv6_iface_scope_id in nf_ct_ipv6, from Hannes Frederic Sowa.

* A couple of sparse fixes for IPVS, from Julian Anastasov.

* Use xor hashing in nfnetlink_queue, as suggested by Eric Dumazet, from
  myself.

* Allow to dump expectations per master conntrack via ctnetlink, from myself.

* A couple of cleanups to use PTR_RET in module init path, from Silviu-Mihai
  Popescu.

* Remove nf_conntrack module a bit faster if netns are in use, from
  Vladimir Davydov.

* Use checksum_partial in ip6t_NPT, from YOSHIFUJI Hideaki.

* Sparse fix for nf_conntrack, from Stephen Hemminger.

You can pull these changes from:

git://1984.lsi.us.es/nf-next master

Thanks!

Eric Dumazet (1):
  netfilter: nfnetlink_queue: zero copy support

Gao feng (1):
  netfilter: ebt_ulog: remove unnecessary spin lock protection

Hannes Frederic Sowa (1):
  netfilter: nf_ct_ipv6: use ipv6_iface_scope_id in conntrack to return scope id

Julian Anastasov (2):
  ipvs: fix hashing in ip_vs_svc_hashkey
  ipvs: fix some sparse warnings

Pablo Neira Ayuso (2):
  netfilter: nfnetlink_queue: use xor hash function to distribute instances
  netfilter: ctnetlink: allow to dump expectation per master conntrack

Silviu-Mihai Popescu (2):
  ipv4: netfilter: use PTR_RET instead of IS_ERR + PTR_ERR
  bridge: netfilter: use PTR_RET instead of IS_ERR + PTR_ERR

Vladimir Davydov (1):
  netfilter: nf_conntrack: speed up module removal path if netns in use

YOSHIFUJI Hideaki (1):
  netfilter: ip6t_NPT: Use csum_partial()

stephen hemminger (1):
  netfilter: nf_conntrack: add include to fix sparse warning

 include/net/ip_vs.h                            |    2 +-
 include/net/netfilter/nf_conntrack_core.h      |    1 +
 net/bridge/netfilter/ebt_ulog.c                |    3 +-
 net/bridge/netfilter/ebtable_broute.c          |    4 +-
 net/ipv4/netfilter/arptable_filter.c           |    4 +-
 net/ipv6/netfilter/ip6t_NPT.c                  |   11 +--
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |    8 +-
 net/netfilter/ipvs/ip_vs_core.c                |    8 +-
 net/netfilter/ipvs/ip_vs_ctl.c                 |    8 +-
 net/netfilter/ipvs/ip_vs_est.c                 |    2 +-
 net/netfilter/nf_conntrack_core.c              |   47 +++++++----
 net/netfilter/nf_conntrack_netlink.c           |  100 ++++++++++++++++++++++--
 net/netfilter/nf_conntrack_standalone.c        |   16 ++--
 net/netfilter/nfnetlink_queue_core.c           |   96 +++++++++++++++++------
 14 files changed, 228 insertions(+), 82 deletions(-)

-- 
1.7.10.4


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox