Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 7/7] net/netfilter/nf_conntrack_netlink.c: fix error return code
From: Julia Lawall @ 2012-08-29 16:49 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: kernel-janitors, Patrick McHardy, David S. Miller,
	netfilter-devel, netfilter, coreteam, netdev, linux-kernel
In-Reply-To: <1346258957-7649-1-git-send-email-Julia.Lawall@lip6.fr>

From: Julia Lawall <Julia.Lawall@lip6.fr>

Initialize return variable before exiting on an error path.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
(
if@p1 (\(ret < 0\|ret != 0\))
 { ... return ret; }
|
ret@p1 = 0
)
... when != ret = e1
    when != &ret
*if(...)
{
  ... when != ret = e2
      when forall
 return ret;
}

// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>

---
 net/netfilter/nf_conntrack_netlink.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index da4fc37..9807f32 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2790,7 +2790,8 @@ static int __init ctnetlink_init(void)
 		goto err_unreg_subsys;
 	}
 
-	if (register_pernet_subsys(&ctnetlink_net_ops)) {
+	ret = register_pernet_subsys(&ctnetlink_net_ops);
+	if (ret < 0) {
 		pr_err("ctnetlink_init: cannot register pernet operations\n");
 		goto err_unreg_exp_subsys;
 	}

^ permalink raw reply related

* [PATCH 5/7] net/xfrm/xfrm_state.c: fix error return code
From: Julia Lawall @ 2012-08-29 16:49 UTC (permalink / raw)
  To: David S. Miller; +Cc: kernel-janitors, netdev, linux-kernel
In-Reply-To: <1346258957-7649-1-git-send-email-Julia.Lawall@lip6.fr>

From: Julia Lawall <Julia.Lawall@lip6.fr>

Initialize return variable before exiting on an error path.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
(
if@p1 (\(ret < 0\|ret != 0\))
 { ... return ret; }
|
ret@p1 = 0
)
... when != ret = e1
    when != &ret
*if(...)
{
  ... when != ret = e2
      when forall
 return ret;
}

// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>

---
 net/xfrm/xfrm_state.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7856c33..b68e0cc 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1994,8 +1994,10 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
 		goto error;
 
 	x->outer_mode = xfrm_get_mode(x->props.mode, family);
-	if (x->outer_mode == NULL)
+	if (x->outer_mode == NULL) {
+		err = -EPROTONOSUPPORT;
 		goto error;
+	}
 
 	if (init_replay) {
 		err = xfrm_init_replay(x);

^ permalink raw reply related

* [PATCH 4/7] net/bluetooth/rfcomm/core.c: fix error return code
From: Julia Lawall @ 2012-08-29 16:49 UTC (permalink / raw)
  To: Marcel Holtmann
  Cc: kernel-janitors-u79uwXL29TY76Z2rM5mHXA, Gustavo Padovan,
	Johan Hedberg, David S. Miller,
	linux-bluetooth-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1346258957-7649-1-git-send-email-Julia.Lawall-L2FTfq7BK8M@public.gmane.org>

From: Julia Lawall <Julia.Lawall-L2FTfq7BK8M@public.gmane.org>

Initialize return variable before exiting on an error path.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
(
if@p1 (\(ret < 0\|ret != 0\))
 { ... return ret; }
|
ret@p1 = 0
)
... when != ret = e1
    when != &ret
*if(...)
{
  ... when != ret = e2
      when forall
 return ret;
}

// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall-L2FTfq7BK8M@public.gmane.org>

---
 net/bluetooth/rfcomm/core.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index c75107e..30141c0 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2010,8 +2010,10 @@ static int rfcomm_add_listener(bdaddr_t *ba)
 
 	/* Add listening session */
 	s = rfcomm_session_add(sock, BT_LISTEN);
-	if (!s)
+	if (!s) {
+		err = -ENOMEM;
 		goto failed;
+	}
 
 	rfcomm_session_hold(s);
 	return 0;

^ permalink raw reply related

* [PATCH 2/7] net: ipv6: fix error return code
From: Julia Lawall @ 2012-08-29 16:49 UTC (permalink / raw)
  To: David S. Miller
  Cc: kernel-janitors, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev, linux-kernel
In-Reply-To: <1346258957-7649-1-git-send-email-Julia.Lawall@lip6.fr>

From: Julia Lawall <Julia.Lawall@lip6.fr>

Initialize return variable before exiting on an error path.

The initial initialization of the return variable is also dropped, because
that value is never used.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
(
if@p1 (\(ret < 0\|ret != 0\))
 { ... return ret; }
|
ret@p1 = 0
)
... when != ret = e1
    when != &ret
*if(...)
{
  ... when != ret = e2
      when forall
 return ret;
}

// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>

---
 net/ipv6/esp6.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6dc7fd3..282f372 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -167,8 +167,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	struct esp_data *esp = x->data;
 
 	/* skb is pure payload to encrypt */
-	err = -ENOMEM;
-
 	aead = esp->aead;
 	alen = crypto_aead_authsize(aead);
 
@@ -203,8 +201,10 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	}
 
 	tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
-	if (!tmp)
+	if (!tmp) {
+		err = -ENOMEM;
 		goto error;
+	}
 
 	seqhi = esp_tmp_seqhi(tmp);
 	iv = esp_tmp_iv(aead, tmp, seqhilen);

^ permalink raw reply related

* [PATCH 1/7] ipvs: fix error return code
From: Julia Lawall @ 2012-08-29 16:49 UTC (permalink / raw)
  To: Wensong Zhang
  Cc: kernel-janitors, Simon Horman, Julian Anastasov,
	Pablo Neira Ayuso, Patrick McHardy, David S. Miller, netdev,
	lvs-devel, netfilter-devel, netfilter, coreteam, linux-kernel
In-Reply-To: <1346258957-7649-1-git-send-email-Julia.Lawall@lip6.fr>

From: Julia Lawall <Julia.Lawall@lip6.fr>

Initialize return variable before exiting on an error path.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
(
if@p1 (\(ret < 0\|ret != 0\))
 { ... return ret; }
|
ret@p1 = 0
)
... when != ret = e1
    when != &ret
*if(...)
{
  ... when != ret = e2
      when forall
 return ret;
}

// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>

---
 net/netfilter/ipvs/ip_vs_ctl.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 3c60137..767cc12 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1171,8 +1171,10 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 		goto out_err;
 	}
 	svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
-	if (!svc->stats.cpustats)
+	if (!svc->stats.cpustats) {
+		ret = -ENOMEM;
 		goto out_err;
+	}
 
 	/* I'm the first user of the service */
 	atomic_set(&svc->usecnt, 0);


^ permalink raw reply related

* [PATCH 3.6-rc3] wlcore: Declare MODULE_FIRMWARE usage
From: Tim Gardner @ 2012-08-29 14:48 UTC (permalink / raw)
  To: linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: Tim Gardner, Luciano Coelho, John W. Linville, Eliad Peller,
	Arik Nemtsov, Eyal Shapira, linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA

Cc: Luciano Coelho <coelho-l0cyMroinI0@public.gmane.org>
Cc: "John W. Linville" <linville-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
Cc: Eliad Peller <eliad-Ix1uc/W3ht7QT0dZR+AlfA@public.gmane.org>
Cc: Arik Nemtsov <arik-Ix1uc/W3ht7QT0dZR+AlfA@public.gmane.org>
Cc: Eyal Shapira <eyal-Ix1uc/W3ht7QT0dZR+AlfA@public.gmane.org>
Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Signed-off-by: Tim Gardner <tim.gardner-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
---
 drivers/net/wireless/ti/wlcore/main.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 7254860..a55ace6 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5663,3 +5663,4 @@ MODULE_PARM_DESC(no_recovery, "Prevent HW recovery. FW will remain stuck.");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Luciano Coelho <coelho-l0cyMroinI0@public.gmane.org>");
 MODULE_AUTHOR("Juuso Oikarinen <juuso.oikarinen-xNZwKgViW5gAvxtiuMwx3w@public.gmane.org>");
+MODULE_FIRMWARE(WL12XX_NVS_NAME);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: IPv4 header extension for IPv6 translation support
From: Vlad Maraev @ 2012-08-29 13:15 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: netdev
In-Reply-To: <87bohu6cdt.fsf@xmission.com>

Dear Eric,

I'm glad to receive the detailed feedback from you.
But I can't agree with some of your arguments:

> You are going over territory that has been rather thoroughly explored
> already.
I know the territory but I don't see that the problems are solved. I
mean problems caused by NATs: scalability, complexity, connection
tracking, additional addresses for NAT device.

>   In fact it is more difficult
>   because ipv6 is present in most end devices today, it is the network
>   links that are the bottle-neck in rolling out ipv6.
The fact that the end devices are ready for IPv6 doesn't mean that ISP
customers will make transitions in a one period of time. That's why
NATs are needed and they are in use. Detailed motivations are stated
in RFC6144 ("why translation" section).

> For the best interoperatiblity the best transition strategy remains
> dual-stack.  With native ipv6 connectivity and some flavor of ipv4
> connectivity (native, dual-stack-lite, or 646xlate).
My solution doesn't compete with dual stack and ds-lite that is good
for creating overlaying network. I offer a strategy that is an
alternative to existing NAT solutions.

Thanks

Vlad


P.S.
I found that this problem that was stated in my proposal
> 1. Intermediate nodes can look below the Destination Address for the
> options if ihl>5 and will be confused with IPv4x data.
has a solution: we can use IPv4 option field for placing this data.
Intermediate nodes that know nothing about this option will skip it
and forward the packet.

^ permalink raw reply

* Re: [PATCH net-next V3 1/2] IB/ipoib: Add rtnl_link_ops support
From: Or Gerlitz @ 2012-08-29 12:59 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: davem, roland, netdev, Erez Shitrit
In-Reply-To: <Pine.GSO.4.63.1208291450330.1098@stinky-local.trash.net>

On 29/08/2012 15:53, Patrick McHardy wrote:
> On Thu, 23 Aug 2012, Or Gerlitz wrote:
>
>> new file mode 100644
>> index 0000000..18d6ac9
>> --- /dev/null
>> +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
>> ...
>> +
>> +enum {
>> +    IFLA_IPOIB_UNSPEC,
>> +    IFLA_IPOIB_CHILD_PKEY,
>> +    __IFLA_IPOIB_MAX
>> +};
>> +
>> +#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
>
> This should go into include/linux/if_link.h
>

sure, will fix

^ permalink raw reply

* Re: [PATCH net-next V3 1/2] IB/ipoib: Add rtnl_link_ops support
From: Patrick McHardy @ 2012-08-29 12:53 UTC (permalink / raw)
  To: Or Gerlitz; +Cc: davem, roland, netdev, Erez Shitrit
In-Reply-To: <1345724119-32110-2-git-send-email-ogerlitz@mellanox.com>

On Thu, 23 Aug 2012, Or Gerlitz wrote:

> new file mode 100644
> index 0000000..18d6ac9
> --- /dev/null
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
> ...
> +
> +enum {
> +	IFLA_IPOIB_UNSPEC,
> +	IFLA_IPOIB_CHILD_PKEY,
> +	__IFLA_IPOIB_MAX
> +};
> +
> +#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)

This should go into include/linux/if_link.h

^ permalink raw reply

* Re: [PATCH net-next V3 0/2] Add rtnl_link_ops support to IPoIB
From: Patrick McHardy @ 2012-08-29 12:45 UTC (permalink / raw)
  To: Or Gerlitz; +Cc: davem, roland, netdev
In-Reply-To: <503DE3AC.30701@mellanox.com>

On Wed, 29 Aug 2012, Or Gerlitz wrote:

> On 23/08/2012 15:15, Or Gerlitz wrote:
>> This is about adding rtnl_link_ops to IPoIB, primarly addressing feedback 
>> from Dave on a similar patch that was part of the eIPoIB submission.
>
> Dave, Roland
>
> Any comment on this? The patch was originally submitted through netdev so I 
> kept the fixed version to go through that path as well. If this makes things 
> easier, we can merge that through net-next.
>
> IPoIB has more proprietary sysfs entries, such as for setting the mode 
> (datagram or connected) -- in 2nd thought, non legacy ipoib child interfaces 
> created through rtnl shouldn't preserve these prop. mechanisms and use rtnl 
> for all sorts of configs, as done for macvlan/8021q etc devices, correct? if 
> this is indeed the case, I will fix that and submit V3.

This is how rtnl_link was intended to be used, so you can set and query
all attributes in an atomic fashion and get notifications on changes.

^ permalink raw reply

* Re[2]:  Re[3]: [PATCH 2/3] ipvs: Fix faulty IPv6 extension header handling in IPVS
From: Hans Schillstrom @ 2012-08-29 12:36 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: Jesper Dangaard Brouer, netdev, lvs-devel, Julian Anastasov,
	Simon Horman, Wensong Zhang, netfilter-devel

>
>On Wed, 29 Aug 2012, Jesper Dangaard Brouer wrote:
>
>> To Hans and Patrick,
>>
>> On Mon, 2012-08-27 at 14:02 +0200, Patrick McHardy wrote:
>>> On Mon, 27 Aug 2012, Hans Schillstrom wrote:
>>>
>>>>>>>
>>>>>>> On Mon, 20 Aug 2012, Jesper Dangaard Brouer wrote:
>>>>>>>
>>>>>>>> Based on patch from: Hans Schillstrom
>>>>>>>>
>>>>>>>> IPv6 headers must be processed in order of appearance,
>>>>>>>> neither can it be assumed that Upper layer headers is first.
>>>>>>>> If anything else than L4 is the first header IPVS will throw it.
>>>>>>>>
>>>>>>>> IPVS will write SNAT & DNAT modifications at a fixed pos which
>>>>>>>> will corrupt the message. Proper header position must be found
>>>>>>>> before writing modifying packet.
>>>>>>>>
>>>>>>>> This patch contains a lot of API changes.  This is done, to avoid
>>>>>>>> the costly scan of finding the IPv6 headers, via ipv6_find_hdr().
>>>>>>>> Finding the IPv6 headers is done as early as possible, and passed
>>>>>>>> on as a pointer "struct ip_vs_iphdr *" to the affected functions.
>>>>>>>
>>>>>>> How about we change netfilter to set up the skb's transport header
>>>>>>> at an early time so we can avoid all (most of) these header scans
>>>>>>> in netfilter?
>>>>>>
>>>>>> I think that would be great, maybe it should be global i.e. not only a netfilter issue.
>>>>>
>>>>> I think in most other cases the headers are supposed to be processed
>>>>> sequentially. One problem though - to be useful for netfilter/IPVS
>>>>> we'd also need to store the transport layer protocol somewhere.
>>>>
>>>> I guess that's the problem, adding it to the skb will not be popular ....
>>>> Right now I don't have a good solution, maybe a more generic netfilter ptr in the skb ...
>>>
>>> I guess inet6_skb_parm will be at least slightly more popular than
>>> adding it to the skb itself. The netfilter pointers are all used for
>>> optional things, so we can't really add it to any of those.
>>
>> Okay, but how do we go from here?
>>
>> Hans, should this hold back the patch ("ipvs: Fix faulty IPv6 extension
>> header handling in IPVS").  Or should we pursue our patch, and circle
>> back later once e.g. Patrick have found a generic solution for IPv6
>> transport header handling?
>
>I don't think we can do much better than using inet6_skb_parm. I think
>the main question is whether it is really worth it, the iteration 
>shouldn't be that expensive in most cases.

Well, if we start using it it could be worth it...

As a first sketch I think adding protocol and offset to inet6_skb_parm would be sufficient,
and  then scan the header in ipv6_defrag() which is a quite early ...

/Hans


^ permalink raw reply

* Re: [PATCH 2/3] ipvs: Fix faulty IPv6 extension  header handling in IPVS
From: Patrick McHardy @ 2012-08-29 12:34 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Hans Schillstrom, Hans Schillstrom, netdev, lvs-devel,
	Julian Anastasov, Simon Horman, netfilter-devel
In-Reply-To: <1346240770.3069.477.camel@localhost>

On Wed, 29 Aug 2012, Jesper Dangaard Brouer wrote:

> To Patrick,
>
> On Wed, 2012-08-29 at 11:47 +0200, Hans Schillstrom wrote:
>>>
>>> On Mon, 2012-08-27 at 14:02 +0200, Patrick McHardy wrote:
>>>> On Mon, 27 Aug 2012, Hans Schillstrom wrote:
>>>>
>>>>>>>> How about we change netfilter to set up the skb's transport header
>>>>>>>> at an early time so we can avoid all (most of) these header scans
>>>>>>>> in netfilter?
>>>>>>>
>>>>>>> I think that would be great, maybe it should be global i.e. not only a netfilter issue.
>>>>>>
>>>>>> I think in most other cases the headers are supposed to be processed
>>>>>> sequentially. One problem though - to be useful for netfilter/IPVS
>>>>>> we'd also need to store the transport layer protocol somewhere.
>>>>>
>>>>> I guess that's the problem, adding it to the skb will not be popular ....
>>>>> Right now I don't have a good solution, maybe a more generic netfilter ptr in the skb ...
>>>>
>>>> I guess inet6_skb_parm will be at least slightly more popular than
>>>> adding it to the skb itself. The netfilter pointers are all used for
>>>> optional things, so we can't really add it to any of those.
>>>
>>> Okay, but how do we go from here?
>>>
>>> Hans, should this hold back the patch ("ipvs: Fix faulty IPv6 extension
>>> header handling in IPVS").  Or should we pursue our patch, and circle
>>> back later once e.g. Patrick have found a generic solution for IPv6
>>> transport header handling?
>>
>> Should we give it a try to put it in inet6_skb_parm
>> and minimize what we put there ?
>> I think it could be worth it.
>
> Okay, but then I do need some help and guidance, especially from
> Patrick, think.
>
> First of all, where in the netfilter code, should we update the new
> fields in inet6_skb_parm?

Good question. I think we'd need at least three spots since every one
of these subsystems can be used indepedently from each other:

- conntrack/IPVS: PRE_ROUTING/LOCAL_OUT at lowest priority
- ip6tables: first time packet hits ip6t_do_table()?

Actually, looking at ipv6_rcv(), this might not work at all since it
sets skb->transport_header to the first header following the IPv6
header. This is used when processing extension headers by IPv6.



^ permalink raw reply

* Re: Re[3]: [PATCH 2/3] ipvs: Fix faulty IPv6 extension header handling in IPVS
From: Patrick McHardy @ 2012-08-29 12:28 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Hans Schillstrom, netdev, lvs-devel, Julian Anastasov,
	Simon Horman, Wensong Zhang, netfilter-devel
In-Reply-To: <1346233034.3069.450.camel@localhost>

On Wed, 29 Aug 2012, Jesper Dangaard Brouer wrote:

> To Hans and Patrick,
>
> On Mon, 2012-08-27 at 14:02 +0200, Patrick McHardy wrote:
>> On Mon, 27 Aug 2012, Hans Schillstrom wrote:
>>
>>>>>>
>>>>>> On Mon, 20 Aug 2012, Jesper Dangaard Brouer wrote:
>>>>>>
>>>>>>> Based on patch from: Hans Schillstrom
>>>>>>>
>>>>>>> IPv6 headers must be processed in order of appearance,
>>>>>>> neither can it be assumed that Upper layer headers is first.
>>>>>>> If anything else than L4 is the first header IPVS will throw it.
>>>>>>>
>>>>>>> IPVS will write SNAT & DNAT modifications at a fixed pos which
>>>>>>> will corrupt the message. Proper header position must be found
>>>>>>> before writing modifying packet.
>>>>>>>
>>>>>>> This patch contains a lot of API changes.  This is done, to avoid
>>>>>>> the costly scan of finding the IPv6 headers, via ipv6_find_hdr().
>>>>>>> Finding the IPv6 headers is done as early as possible, and passed
>>>>>>> on as a pointer "struct ip_vs_iphdr *" to the affected functions.
>>>>>>
>>>>>> How about we change netfilter to set up the skb's transport header
>>>>>> at an early time so we can avoid all (most of) these header scans
>>>>>> in netfilter?
>>>>>
>>>>> I think that would be great, maybe it should be global i.e. not only a netfilter issue.
>>>>
>>>> I think in most other cases the headers are supposed to be processed
>>>> sequentially. One problem though - to be useful for netfilter/IPVS
>>>> we'd also need to store the transport layer protocol somewhere.
>>>
>>> I guess that's the problem, adding it to the skb will not be popular ....
>>> Right now I don't have a good solution, maybe a more generic netfilter ptr in the skb ...
>>
>> I guess inet6_skb_parm will be at least slightly more popular than
>> adding it to the skb itself. The netfilter pointers are all used for
>> optional things, so we can't really add it to any of those.
>
> Okay, but how do we go from here?
>
> Hans, should this hold back the patch ("ipvs: Fix faulty IPv6 extension
> header handling in IPVS").  Or should we pursue our patch, and circle
> back later once e.g. Patrick have found a generic solution for IPv6
> transport header handling?

I don't think we can do much better than using inet6_skb_parm. I think
the main question is whether it is really worth it, the iteration 
shouldn't be that expensive in most cases.

^ permalink raw reply

* Re: [PATCH 03/19] netfilter: nf_conntrack_ipv6: improve fragmentation handling
From: Patrick McHardy @ 2012-08-29 12:27 UTC (permalink / raw)
  To: Jesper Dangaard Brouer; +Cc: Pablo Neira Ayuso, Netfilter Developers, netdev
In-Reply-To: <Pine.LNX.4.64.1208291007520.32281@ask.diku.dk>

On Wed, 29 Aug 2012, Jesper Dangaard Brouer wrote:

>
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
>
> And some nitpicks below...
>
>> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
>> index 5b2d63e..a4f6263 100644
>> --- a/net/ipv6/ip6_output.c
>> +++ b/net/ipv6/ip6_output.c
>> @@ -493,7 +493,8 @@ int ip6_forward(struct sk_buff *skb)
>> 	if (mtu < IPV6_MIN_MTU)
>> 		mtu = IPV6_MIN_MTU;
>> 
>> -	if (skb->len > mtu && !skb_is_gso(skb)) {
>> +	if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
>
> You use (!skb->local_df) to invalidate the use of skb->len, instead of 
> (!IP6CB(skb)->frag_max_size), (which is okay, because you set local_df 
> later).  Is there are reason this check is better?

Just that it's consistent with ip6_output and the regular local_df
handling. It saves one extra condition in ip6_output.

>> +	    (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
>
> Eric Dumazet would probably nitpick and say, it can be reduced to:
> (IP6CB(skb)->frag_max_size > mtu)
> ;-)

True. I'll fix that once Pablo has pulled in the patches.

^ permalink raw reply

* Re: [PATCH 02/19] Cleaning up the IPv6 MTU checking in the IPVS xmit code, by using a common helper function __mtu_check_toobig_v6().
From: Patrick McHardy @ 2012-08-29 12:24 UTC (permalink / raw)
  To: Jesper Dangaard Brouer; +Cc: Pablo Neira Ayuso, Netfilter Developers, netdev
In-Reply-To: <Pine.LNX.4.64.1208290959010.32281@ask.diku.dk>

On Wed, 29 Aug 2012, Jesper Dangaard Brouer wrote:

>
> Just a little nitpick.
>
> The original title/subj was:
> "ipvs: IPv6 MTU checking cleanup and bugfix"
>
> And the curr/used title/subj were part of the commit text.

Sorry, must have happened while importing the patch, I noticed it
afterwards, but was already too late.

^ permalink raw reply

* Re: [PATCH 2/3] ipvs: Fix faulty IPv6 extension  header handling in IPVS
From: Jesper Dangaard Brouer @ 2012-08-29 11:46 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: Hans Schillstrom, Hans Schillstrom, netdev, lvs-devel,
	Julian Anastasov, Simon Horman, netfilter-devel
In-Reply-To: <nhkcv8u.3c7ad6d61f90aef8e9467df88476311d@obelix.schillstrom.com>

To Patrick,

On Wed, 2012-08-29 at 11:47 +0200, Hans Schillstrom wrote:
> >
> >On Mon, 2012-08-27 at 14:02 +0200, Patrick McHardy wrote:
> >> On Mon, 27 Aug 2012, Hans Schillstrom wrote:
> >> 
> >> >>>>
> >> >>>> On Mon, 20 Aug 2012, Jesper Dangaard Brouer wrote:
> >> >>>>
> >> >>>>> Based on patch from: Hans Schillstrom
> >> >>>>>
> >> >>>>> IPv6 headers must be processed in order of appearance,
> >> >>>>> neither can it be assumed that Upper layer headers is first.
> >> >>>>> If anything else than L4 is the first header IPVS will throw it.
> >> >>>>>
> >> >>>>> IPVS will write SNAT & DNAT modifications at a fixed pos which
> >> >>>>> will corrupt the message. Proper header position must be found
> >> >>>>> before writing modifying packet.
> >> >>>>>
> >> >>>>> This patch contains a lot of API changes.  This is done, to avoid
> >> >>>>> the costly scan of finding the IPv6 headers, via ipv6_find_hdr().
> >> >>>>> Finding the IPv6 headers is done as early as possible, and passed
> >> >>>>> on as a pointer "struct ip_vs_iphdr *" to the affected functions.
> >> >>>>
> >> >>>> How about we change netfilter to set up the skb's transport header
> >> >>>> at an early time so we can avoid all (most of) these header scans
> >> >>>> in netfilter?
> >> >>>
> >> >>> I think that would be great, maybe it should be global i.e. not only a netfilter issue.
> >> >>
> >> >> I think in most other cases the headers are supposed to be processed
> >> >> sequentially. One problem though - to be useful for netfilter/IPVS
> >> >> we'd also need to store the transport layer protocol somewhere.
> >> >
> >> > I guess that's the problem, adding it to the skb will not be popular ....
> >> > Right now I don't have a good solution, maybe a more generic netfilter ptr in the skb ...
> >> 
> >> I guess inet6_skb_parm will be at least slightly more popular than
> >> adding it to the skb itself. The netfilter pointers are all used for
> >> optional things, so we can't really add it to any of those.
> >
> >Okay, but how do we go from here?
> >
> >Hans, should this hold back the patch ("ipvs: Fix faulty IPv6 extension
> >header handling in IPVS").  Or should we pursue our patch, and circle
> >back later once e.g. Patrick have found a generic solution for IPv6
> >transport header handling?
> 
> Should we give it a try to put it in inet6_skb_parm 
> and minimize what we put there ?
> I think it could be worth it.

Okay, but then I do need some help and guidance, especially from
Patrick, think.

First of all, where in the netfilter code, should we update the new
fields in inet6_skb_parm?




^ permalink raw reply

* Re[2]:  Re[3]: [PATCH 2/3] ipvs: Fix faulty IPv6 extension header handling in IPVS
From: Hans Schillstrom @ 2012-08-29  9:47 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Patrick McHardy, netdev, lvs-devel, Julian Anastasov,
	Simon Horman, Wensong Zhang, netfilter-devel

Hi,
>To Hans and Patrick,
>
>On Mon, 2012-08-27 at 14:02 +0200, Patrick McHardy wrote:
>> On Mon, 27 Aug 2012, Hans Schillstrom wrote:
>> 
>> >>>>
>> >>>> On Mon, 20 Aug 2012, Jesper Dangaard Brouer wrote:
>> >>>>
>> >>>>> Based on patch from: Hans Schillstrom
>> >>>>>
>> >>>>> IPv6 headers must be processed in order of appearance,
>> >>>>> neither can it be assumed that Upper layer headers is first.
>> >>>>> If anything else than L4 is the first header IPVS will throw it.
>> >>>>>
>> >>>>> IPVS will write SNAT & DNAT modifications at a fixed pos which
>> >>>>> will corrupt the message. Proper header position must be found
>> >>>>> before writing modifying packet.
>> >>>>>
>> >>>>> This patch contains a lot of API changes.  This is done, to avoid
>> >>>>> the costly scan of finding the IPv6 headers, via ipv6_find_hdr().
>> >>>>> Finding the IPv6 headers is done as early as possible, and passed
>> >>>>> on as a pointer "struct ip_vs_iphdr *" to the affected functions.
>> >>>>
>> >>>> How about we change netfilter to set up the skb's transport header
>> >>>> at an early time so we can avoid all (most of) these header scans
>> >>>> in netfilter?
>> >>>
>> >>> I think that would be great, maybe it should be global i.e. not only a netfilter issue.
>> >>
>> >> I think in most other cases the headers are supposed to be processed
>> >> sequentially. One problem though - to be useful for netfilter/IPVS
>> >> we'd also need to store the transport layer protocol somewhere.
>> >
>> > I guess that's the problem, adding it to the skb will not be popular ....
>> > Right now I don't have a good solution, maybe a more generic netfilter ptr in the skb ...
>> 
>> I guess inet6_skb_parm will be at least slightly more popular than
>> adding it to the skb itself. The netfilter pointers are all used for
>> optional things, so we can't really add it to any of those.
>
>Okay, but how do we go from here?
>
>Hans, should this hold back the patch ("ipvs: Fix faulty IPv6 extension
>header handling in IPVS").  Or should we pursue our patch, and circle
>back later once e.g. Patrick have found a generic solution for IPv6
>transport header handling?

Should we give it a try to put it in inet6_skb_parm 
and minimize what we put there ?
I think it could be worth it.




^ permalink raw reply

* Re: [PATCH net-next V3 0/2] Add rtnl_link_ops support to IPoIB
From: Or Gerlitz @ 2012-08-29  9:41 UTC (permalink / raw)
  To: davem, roland; +Cc: netdev
In-Reply-To: <1345724119-32110-1-git-send-email-ogerlitz@mellanox.com>

On 23/08/2012 15:15, Or Gerlitz wrote:
> This is about adding rtnl_link_ops to IPoIB, primarly addressing feedback from Dave on a similar patch that was part of the eIPoIB submission.

Dave, Roland

Any comment on this? The patch was originally submitted through netdev 
so I kept the fixed version to go through that path as well. If this 
makes things easier, we can merge that through net-next.

IPoIB has more proprietary sysfs entries, such as for setting the mode 
(datagram or connected) -- in 2nd thought, non legacy ipoib child 
interfaces created through rtnl shouldn't preserve these prop. 
mechanisms and use rtnl for all sorts of configs, as done for 
macvlan/8021q etc devices, correct? if this is indeed the case, I will 
fix that and submit V3.

Or.

>
> Also added the releated iproute2 patch, for the sake of the review and
> testing, e.g example usages:
>
> $ ip link add link ib0 name ib0.1 type ipoib
> $ ip link add link ib0 name ib0.8001 type ipoib pkey 0x8001
>
> and the obvious
>
> $ link delete dev ib0.8001
> $ link delete dev ib0.1
>
> Changes from V2:
>   - removed the notion of user defined index per child, since we can do well w.o it
>   - for that end, make (an internal to ipoib) distrinction between legacy childs created
>     through the old sysfs way to childs created using rtnl link ops
>
> Changes from V1:
>   - applied feedback from Dave Miller to avoid using sysfs
>   - added rtnl_link_ops support in ipoib and use them to add/delete childs
>
> Or Gerlitz (1):
>    IB/ipoib: Add rtnl_link_ops support
>
>   Documentation/infiniband/ipoib.txt           |    3 +
>   drivers/infiniband/ulp/ipoib/Makefile        |    3 +-
>   drivers/infiniband/ulp/ipoib/ipoib.h         |   13 +++
>   drivers/infiniband/ulp/ipoib/ipoib_main.c    |   25 ++++-
>   drivers/infiniband/ulp/ipoib/ipoib_netlink.c |  122 ++++++++++++++++++++++++++
>   drivers/infiniband/ulp/ipoib/ipoib_vlan.c    |  102 ++++++++++++----------
>   6 files changed, 217 insertions(+), 51 deletions(-)
>   create mode 100644 drivers/infiniband/ulp/ipoib/ipoib_netlink.c
>

^ permalink raw reply

* Re: Re[3]: [PATCH 2/3] ipvs: Fix faulty IPv6 extension header handling in IPVS
From: Jesper Dangaard Brouer @ 2012-08-29  9:37 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: Hans Schillstrom, netdev, lvs-devel, Julian Anastasov,
	Simon Horman, Wensong Zhang, netfilter-devel
In-Reply-To: <Pine.GSO.4.63.1208271401140.17922@stinky-local.trash.net>

To Hans and Patrick,

On Mon, 2012-08-27 at 14:02 +0200, Patrick McHardy wrote:
> On Mon, 27 Aug 2012, Hans Schillstrom wrote:
> 
> >>>>
> >>>> On Mon, 20 Aug 2012, Jesper Dangaard Brouer wrote:
> >>>>
> >>>>> Based on patch from: Hans Schillstrom
> >>>>>
> >>>>> IPv6 headers must be processed in order of appearance,
> >>>>> neither can it be assumed that Upper layer headers is first.
> >>>>> If anything else than L4 is the first header IPVS will throw it.
> >>>>>
> >>>>> IPVS will write SNAT & DNAT modifications at a fixed pos which
> >>>>> will corrupt the message. Proper header position must be found
> >>>>> before writing modifying packet.
> >>>>>
> >>>>> This patch contains a lot of API changes.  This is done, to avoid
> >>>>> the costly scan of finding the IPv6 headers, via ipv6_find_hdr().
> >>>>> Finding the IPv6 headers is done as early as possible, and passed
> >>>>> on as a pointer "struct ip_vs_iphdr *" to the affected functions.
> >>>>
> >>>> How about we change netfilter to set up the skb's transport header
> >>>> at an early time so we can avoid all (most of) these header scans
> >>>> in netfilter?
> >>>
> >>> I think that would be great, maybe it should be global i.e. not only a netfilter issue.
> >>
> >> I think in most other cases the headers are supposed to be processed
> >> sequentially. One problem though - to be useful for netfilter/IPVS
> >> we'd also need to store the transport layer protocol somewhere.
> >
> > I guess that's the problem, adding it to the skb will not be popular ....
> > Right now I don't have a good solution, maybe a more generic netfilter ptr in the skb ...
> 
> I guess inet6_skb_parm will be at least slightly more popular than
> adding it to the skb itself. The netfilter pointers are all used for
> optional things, so we can't really add it to any of those.

Okay, but how do we go from here?

Hans, should this hold back the patch ("ipvs: Fix faulty IPv6 extension
header handling in IPVS").  Or should we pursue our patch, and circle
back later once e.g. Patrick have found a generic solution for IPv6
transport header handling?

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Sr. Network Kernel Developer at Red Hat
  Author of http://www.iptv-analyzer.org
  LinkedIn: http://www.linkedin.com/in/brouer



^ permalink raw reply

* Re: [PATCH] userns: Add basic quota support v4
From: Eric W. Biederman @ 2012-08-29  9:31 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Jan Kara, linux-kernel, netdev, linux-fsdevel, Serge E. Hallyn,
	David Miller, Steven Whitehouse, Mark Fasheh, Joel Becker,
	Ben Myers, Alex Elder, Dmitry Monakhov, Abhijith Das
In-Reply-To: <20120829021029.GC13691@dastard>

Dave thanks for taking the time to take a detailed look at this code.

Dave Chinner <david@fromorbit.com> writes:

> On Tue, Aug 28, 2012 at 12:09:56PM -0700, Eric W. Biederman wrote:
>> 
>> Add the data type struct kqid which holds the kernel internal form of
>> the owning identifier of a quota.  struct kqid is a replacement for
>> the implicit union of uid, gid and project stored in an unsigned int
>> and the quota type field that is was used in the quota data
>> structures.  Making the data type explicit allows the kuid_t and
>> kgid_t type safety to propogate more thoroughly through the code,
>> revealing more places where uid/gid conversions need be made.
>> 
>> Along with the data type struct kqid comes the helper functions
>> qid_eq, qid_lt, from_kqid, from_kqid_munged, qid_valid, make_kqid,
>
> I think Jan's comment about from_kqid being named id_from_kgid is
> better, though I also think it would read better as kqid_to_id().
> ie:
>
> 	id = kqid_to_id(ns, qid);

kqid and qid are the same thing just in a different encoding.
Emphasizing the quota identifier instead of the kernel vs user encoding
change is paying attention to the wrong thing.

Using make_kqid and from_kqid follows the exact same conventions as I have
established for kuids and kgids.  So if you learn one you have learned
them all.

>> make_kqid_invalid, make_kqid_uid, make_kqid_gid.
>
> and these named something like uid_to_kqid()

The last two are indeed weird, and definitely not the common case,
since there is no precedent I can almost see doing something different
but I don't see a good case for a different name.

>> Change struct dquot dq_id to a struct kqid and remove the now
>> unecessary dq_type.
>> 
>> Update the signature of dqget, quota_send_warning, dquot_get_dqblk,
>> and dquot_set_dqblk to use struct kqid.
>> 
>> Make minimal changes to ext3, ext4, gfs2, ocfs2, and xfs to deal with
>> the change in quota structures and signatures.  The ocfs2 changes are
>> larger than most because of the extensive tracing throughout the ocfs2
>> quota code that prints out dq_id.
>
> How did you test that this all works?

By making it a compile error if you get a conversion wrong and making it
a rule not to make any logic changes.  That combined with code review
and running the code a bit to make certain I did not horribly mess up.

> e.g. run xfstests -g quota on
> each of those filesystems and check for no regressions? And if you
> wrote any tests, can you convert them to be part of xfstests so that
> namespace aware quotas get tested regularly?

I have not written any tests, and running the xfstests in a namespace
should roughly be a matter of "unshare -U xfstest -g quota"  It isn't
quite that easy because  /proc/self/uid_map and /proc/self/gid_map need
to be written first.

Right now only ext2 ext3 and ext4 compile with user namespace support
enabled.  xfs and gfs2 and ocfs2 still only compile with user namespace
support disabled because my patches to convert them are waiting in the
wings until I get the core subsystems primarily quota and posix acls
reviewed.

>> v4:
>>   - Rename struct qown struct kqid and associated changes
>>     to the naming of the helper functions.
>>   - Use qid_t to hold the userspace identifier representation
>>     of quota identifiers in all new code.
>> v3:
>>   - Add missing negation on qown_valid
>> v2:
>>   - Renamed qown_t struct qown
>>   - Added the quota type to struct qown.
>>   - Removed enum quota_type (In this patch it was just noise)
>>   - Added qown_lt, make_qown_invalid, make_qown_uid, make_qown_gid
>>   - Taught qown to handle xfs project ids (but only in init_user_ns).
>>     Q_XGETQUOTA calls .get_quotblk with project ids.
>
> Q_XSETQLIM was modified to handle project quotas as well, I assume?

I didn't break the project quota support for Q_XSETQLIM.

>> index fed504f..96944c0 100644
>> --- a/fs/xfs/xfs_quotaops.c
>> +++ b/fs/xfs/xfs_quotaops.c
>> @@ -97,28 +97,29 @@ xfs_fs_set_xstate(
>>  STATIC int
>>  xfs_fs_get_dqblk(
>>  	struct super_block	*sb,
>> -	int			type,
>> -	qid_t			id,
>> +	struct kqid		qid,
>>  	struct fs_disk_quota	*fdq)
>>  {
>>  	struct xfs_mount	*mp = XFS_M(sb);
>> +	xfs_dqid_t		xfs_id;
>>  
>>  	if (!XFS_IS_QUOTA_RUNNING(mp))
>>  		return -ENOSYS;
>>  	if (!XFS_IS_QUOTA_ON(mp))
>>  		return -ESRCH;
>>  
>> -	return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq);
>> +	xfs_id = from_kqid(&init_user_ns, qid);
>> +	return -xfs_qm_scall_getquota(mp, xfs_id, xfs_quota_type(qid.type), fdq);
>>  }
>
> Why a temporary variable? Why not just:
>
> 	return -xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
> 				      xfs_quota_type(qid.type), fdq);
>

Because I am not fond of very long lines and because I did the basic
conversion of gfs2 first where using a separate variable a larger
difference.

>From a code review perspective I am still more comfortable with
introducing a temporary variable as the fundamental change is
easier to see.

> Indeed, why not drive the struct kqid down another level into
> xfs_qm_scall_getquota() where all they are used for is parameters to
> the xfs_qm_dqget() function?

I have not driven struct kqid down another level because change
needs to wait until I add user namespace support to xfs.

I only touch xfs in this case because making the implicit union explicit
is needed for type safety and sanity, and unfortunately that requires
the prototype of the quota operations to change.

>>  STATIC int
>>  xfs_fs_set_dqblk(
>>  	struct super_block	*sb,
>> -	int			type,
>> -	qid_t			id,
>> +	struct kqid		qid,
>>  	struct fs_disk_quota	*fdq)
>>  {
>>  	struct xfs_mount	*mp = XFS_M(sb);
>> +	xfs_dqid_t		xfs_id;
>>  
>>  	if (sb->s_flags & MS_RDONLY)
>>  		return -EROFS;
>> @@ -127,7 +128,8 @@ xfs_fs_set_dqblk(
>>  	if (!XFS_IS_QUOTA_ON(mp))
>>  		return -ESRCH;
>>  
>> -	return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
>> +	xfs_id = from_kqid(&init_user_ns, qid);
>> +	return -xfs_qm_scall_setqlim(mp, xfs_id, xfs_quota_type(qid.type), fdq);
>>  }
>
> Same is true here....
>
>>  
>>  const struct quotactl_ops xfs_quotactl_operations = {
>> diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
>> index bcb6054..46de393 100644
>> --- a/fs/xfs/xfs_trans_dquot.c
>> +++ b/fs/xfs/xfs_trans_dquot.c
>> @@ -575,12 +575,14 @@ xfs_quota_warn(
>>  	struct xfs_dquot	*dqp,
>>  	int			type)
>>  {
>> +	int qtype;
>> +	struct kqid qid;
>>  	/* no warnings for project quotas - we just return ENOSPC later */
>>  	if (dqp->dq_flags & XFS_DQ_PROJ)
>>  		return;
>> -	quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
>> -			   be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
>> -			   type);
>> +	qtype = (dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA;
>> +	qid = make_kqid(&init_user_ns, qtype, be32_to_cpu(dqp->q_core.d_id));
>> +	quota_send_warning(qid, mp->m_super->s_dev, type);
>>  }
>>  
>>  /*
>> diff --git a/include/linux/quota.h b/include/linux/quota.h
>> index 524ede8..0e73250 100644
>> --- a/include/linux/quota.h
>> +++ b/include/linux/quota.h
>> @@ -181,10 +181,161 @@ enum {
>>  #include <linux/dqblk_v2.h>
>>  
>>  #include <linux/atomic.h>
>> +#include <linux/uidgid.h>
>>  
>>  typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
>>  typedef long long qsize_t;	/* Type in which we store sizes */
>
> From fs/xfs/xfs_types.h:
>
> typedef __uint32_t              prid_t;         /* project ID */
>
> Perhaps it would be better to have an official kprid_t definition
> here, i.e:

We can always improve.  For this patch I don't see it making a
useful difference.

prid_t isn't exported to non xfs code.  The project id is already
stored in an unsigned int or a qid_t.

>> +struct kqid {			/* Type in which we store the quota identifier */
>> +	union {
>> +		kuid_t uid;
>> +		kgid_t gid;
>> +		qid_t prj;
>
> 		kprid_t prid;
>

Hmm.  The naming here is interesting.  No one calls the project id prj.
So I have an avoidable inconsistency here.  xfs most commonly uses
projid and occassionally calls it prid.

So I am inclined to rename this field projid, but I don't know if it is
worth respinning the patch for something so trivial.

>> +	};
>> +	int type; /* USRQUOTA (uid) or GRPQUOTA (gid) or XQM_PRJQUOTA (prj) */
>> +};
>> +
>> +static inline bool qid_eq(struct kqid left, struct kqid right)
>> +{
>> +	if (left.type != right.type)
>> +		return false;
>> +	switch(left.type) {
>> +	case USRQUOTA:
>> +		return uid_eq(left.uid, right.uid);
>> +	case GRPQUOTA:
>> +		return gid_eq(left.gid, right.gid);
>> +	case XQM_PRJQUOTA:
>> +		return left.prj == right.prj;
>> +	default:
>> +		BUG();
>
> BUG()? Seriously? The most this justifies is a WARN_ON_ONCE() to
> indicate a potential programming error, not bringing down the entire
> machine.

Dead serious.  Any other type value is impossible and unsupported.

BUG is not panic.  BUG is an oops. BUG won't bring down the machine
unless the sysadmin wants it too.

Beyond that I am not interesting in supporting exploitable abuse
of this type.

>> +static inline bool qid_lt(struct kqid left, struct kqid right)
>> +{
>> +	if (left.type < right.type)
>> +		return true;
>> +	if (left.type > right.type)
>> +		return false;
>> +	switch (left.type) {
>> +	case USRQUOTA:
>> +		return uid_lt(left.uid, right.uid);
>> +	case GRPQUOTA:
>> +		return gid_lt(left.gid, right.gid);
>> +	case XQM_PRJQUOTA:
>> +		return left.prj < right.prj;
>> +	default:
>> +		BUG();
>> +	}
>> +}
>
> What is this function used for? it's not referenced at all by the
> patch, and there's no documentation/comments explaining why it
> exists or how it is intended to be used....

This function is introduced early, but it is needed for gfs2 as
performs a sort of quota identifiers.

>> +static inline qid_t from_kqid(struct user_namespace *user_ns, struct kqid qid)
>> +{
>> +	switch (qid.type) {
>> +	case USRQUOTA:
>> +		return from_kuid(user_ns, qid.uid);
>> +	case GRPQUOTA:
>> +		return from_kgid(user_ns, qid.gid);
>> +	case XQM_PRJQUOTA:
>> +		return (user_ns == &init_user_ns) ? qid.prj : -1;
>> +	default:
>> +		BUG();
>> +	}
>> +}
>
> Oh, this can return an error. That's only checked in a coupl eof
> places this function is called. it needs tobe checked everywhere,
> otherwise we now have the possibility of quota usage being accounted
> to uid/gid/prid 0xffffffff when namespace matches are not found.

No this is not an error condition.  Returning -1 is the mapping that is
used when there is not a mapping entry.

Depending on the circumstances not having a mapping can be an error,
but it can also be a don't care condition.

All in kernel values are defined as having a mapping into the initial
user namespace.

Looking at my tree the only calls of from_kqid not mapping the id
into the initial user namespace are from_kqid_munged and they report
to userspace.  from_kqid_munged returns fs_overflowuid or fs_overflowgid
in case of a mapping failure.

projects ids can only be accessed if capable(CAP_SYS_ADMIN) which
you can only have in the initial user namespace so for now there will
alwasy be a mapping for project ids.

>> +static inline qid_t from_kqid_munged(struct user_namespace *user_ns,
>> +				   struct kqid qid)
>
> What does munging do to the return value? how is it different to
> from_kqid()? Document your API....

This bit certainly certainly could use a bit more explanation.

Mapping of uids and gids from one domain to another is not new in linux.
It originates with the transition from 16bit identifiers to 32bit
identifiers.  In most places when there is a 32bit identifier that can
not be represented we return a fs_overflowid aka (u16)-2 aka nobody.

So in general when we want to pass a value to userspace from_kqid_munged
is called, and if there is a mapping failure we report the value that
userspace set fs_overflowuid or fs_overflowgid to.  For project ids it
which are restricted to the initial user namespace no mapping failures
can occur.

>> +static inline struct kqid make_kqid(struct user_namespace *user_ns,
>> +				    int type, qid_t qid)
>> +{
>> +	struct kqid kqid;
>> +
>> +	kqid.type = type;
>> +	switch (type) {
>> +	case USRQUOTA:
>> +		kqid.uid = make_kuid(user_ns, qid);
>> +		break;
>> +	case GRPQUOTA:
>> +		kqid.gid = make_kgid(user_ns, qid);
>> +		break;
>> +	case XQM_PRJQUOTA:
>> +		if (user_ns == &init_user_ns)
>> +			kqid.prj = qid;
>> +		else
>> +			kqid.prj = -1;
>> +		break;
>
> 		kqid.prj = (user_ns == &init_user_ns) ? qid : -1;

That is an interesting inconsitency. Given that implemented the others
on one line.

>> +	default:
>> +		BUG();
>> +	}
>> +	return kqid;
>> +}
>> +
>> +static inline struct kqid make_kqid_invalid(int type)
>> +{
>> +	struct kqid kqid;
>> +
>> +	kqid.type = type;
>> +	switch (type) {
>> +	case USRQUOTA:
>> +		kqid.uid = INVALID_UID;
>> +		break;
>> +	case GRPQUOTA:
>> +		kqid.gid = INVALID_GID;
>> +		break;
>> +	case XQM_PRJQUOTA:
>> +		kqid.prj = -1;
>> +		break;
>> +	default:
>> +		BUG();
>> +	}
>> +	return kqid;
>> +}
>> +
>> +static inline struct kqid make_kqid_uid(kuid_t uid)
>> +{
>> +	struct kqid kqid = {
>> +		.type = USRQUOTA,
>> +		.uid  = uid,
>> +	};
>> +	return kqid;
>> +}
>
> Isn't this sort of construct frowned upon? i.e. returning a
> structure out of scope? It may be inline code and hence work, but
> this strikes me as a landmine waiting for someone to tread on....

Not at all because I am returning the structure by value.

Return a structure by reference is the case that doesn't work.

Eric

^ permalink raw reply

* Re: [PATCH] batman-adv: remove pointless conditional before kfree_skb()
From: Antonio Quartulli @ 2012-08-29  9:14 UTC (permalink / raw)
  To: Wei Yongjun
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	b.a.t.m.a.n-ZwoEplunGu2X36UT3dwllkB+6BGkLq7r,
	siwu-MaAgPAbsBIVS8oHt8HbXEIQuADTiUCJX,
	yongjun_wei-zrsr2BFq86L20UzCJQGyNP8+0UxHXcjY,
	lindner_marek-LWAfsSFWpa4, davem-fT/PcQaiUtIeIZ0/mPfg9Q
In-Reply-To: <CAPgLHd8jAk-4C1w4Z-q=WeMT_SgB99HCrGCtEPa5Xe4VYnZCOA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 502 bytes --]

On Tue, Aug 28, 2012 at 09:06:08PM +0800, Wei Yongjun wrote:
> From: Wei Yongjun <yongjun_wei-zrsr2BFq86L20UzCJQGyNP8+0UxHXcjY@public.gmane.org>
> 
> Remove pointless conditional before kfree_skb().
> 
> Signed-off-by: Wei Yongjun <yongjun_wei-zrsr2BFq86L20UzCJQGyNP8+0UxHXcjY@public.gmane.org>

Acked-by: Antonio Quartulli <ordex-GaUfNO9RBHfsrOwW+9ziJQ@public.gmane.org>

Thank you Wei!

Regards,


-- 
Antonio Quartulli

..each of us alone is worth nothing..
Ernesto "Che" Guevara

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply

* Re: [PATCH V2 2/2] ipvs: Extend MTU check to account for IPv6 NAT defrag changes
From: Jesper Dangaard Brouer @ 2012-08-29  9:04 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, Patrick McHardy, lvs-devel, Julian Anastasov,
	Simon Horman, Pablo Neira Ayuso, Hans Schillstrom, Wensong Zhang,
	netfilter-devel
In-Reply-To: <1346229785.2522.8.camel@edumazet-glaptop>

On Wed, 2012-08-29 at 01:43 -0700, Eric Dumazet wrote:
> On Wed, 2012-08-29 at 09:02 +0200, Jesper Dangaard Brouer wrote:
> > On Tue, 2012-08-28 at 07:49 -0700, Eric Dumazet wrote:
> > > On Tue, 2012-08-28 at 16:23 +0200, Jesper Dangaard Brouer wrote:
> > > > This patch is necessary, to make IPVS work, after Patrick McHardys
> > > > IPv6 NAT defragmentation changes.
> > > > 
> > > > Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> > > > ---
> > > > In V2: the tunnel mode is no longer a special case.
> > > > 
> > > >  net/netfilter/ipvs/ip_vs_xmit.c |    9 ++++++++-
> > > >  1 files changed, 8 insertions(+), 1 deletions(-)
> > > > 
> > > > diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
> > > > index 67a3978..56f6d5d 100644
> > > > --- a/net/netfilter/ipvs/ip_vs_xmit.c
> > > > +++ b/net/netfilter/ipvs/ip_vs_xmit.c
> > > > @@ -88,7 +88,14 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
> > > >  static inline bool
> > > >  __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
> > > >  {
> > > > -	if (skb->len > mtu && !skb_is_gso(skb)) {
> > > > +	if (IP6CB(skb)->frag_max_size) {
> > > > +		/* frag_max_size tell us that, this packet have been
> > > > +		 * defragmented by netfilter IPv6 conntrack module.
> > > > +		 */
> > > > +		if (IP6CB(skb)->frag_max_size > mtu)
> > > > +			return true; /* largest fragment violate MTU */
> > Implicit:         else
> >      			return false
> > 
> > (if it makes it more clear, not sure)
> > > > +	}
> > > > +	else if (skb->len > mtu && !skb_is_gso(skb)) {
> > > >  		return true; /* Packet size violate MTU size */
> > > >  	}
> > > 
> > > Couldnt you use a single test ?
> > > 
> > > if (IP6CB(skb)->frag_max_size > mtu)
> > > 	return true;
> > > 
> > > if (skb->len > mtu && !skb_is_gso(skb))
> > > 	return true;
> > > 
> > 
> > Nope, this will not work.
> > 
> > If (IP6CB(skb)->frag_max_size > 0) then we have a defragmented packet,
> > this means that skb->len cannot be used for MTU checking, because
> > skb->len is now the total length of all the fragments (which your
> > solution will fall-through to)
> > 
> 
> If the packet was not fragmented, its was a single frame.
> 
> But if this frame length is above mtu, packet is not too big ?

Nope... not if its a defragmented/reassembled packet.

> Sorry if its a stupid question.

These changes have to be seen together with Patrick's patch:
 "netfilter: nf_conntrack_ipv6: improve fragmentation handling"
 http://thread.gmane.org/gmane.linux.network/241517/focus=241518

The IPv6 packet arriving have been defragmented/reassembled by the
nf_conntrack_ipv6 module.  Thus, they look like a normal, but big,
packet to us.   We let it through, because it will be re-fragmented
again later, but first we need to check if the largest fragment would
violate the MTU.

Hope it makes it more clear.



^ permalink raw reply

* Re: [PATCH 1/1] tcp: Wrong timeout for SYN segments
From: Eric Dumazet @ 2012-08-29  8:51 UTC (permalink / raw)
  To: H.K. Jerry Chu; +Cc: Alexander Bergmann, David Miller, netdev, linux-kernel
In-Reply-To: <CAFbMe2Pgjbttt+Z3RQ9KL05=GHqZYk5ubNH2G1-EUn_f_RBaEQ@mail.gmail.com>

On Tue, 2012-08-28 at 21:34 -0700, H.K. Jerry Chu wrote:

> IMHO 31secs seem a little short. Why not change it to 6 as well because 63
> secs still beats 93secs with 3sec initRTO and 5 retries.
> 
> Jerry
> 

My rationale was that such increase were going to amplify SYN attacks
impact by 20% (if we count number of useless SYNACK sent)

If the active side sends SYN packets for 180 seconds, do we really want
to also send SYNACKS for additional 100 seconds ?

Sure, RFC numbers are what they are, but in practice, I doubt someone
will really miss the extra SYNACK sent after ~32 seconds, since it would
matter only for the last SYN attempted.

^ permalink raw reply

* Re: [PATCH V2 2/2] ipvs: Extend MTU check to account for IPv6 NAT defrag changes
From: Eric Dumazet @ 2012-08-29  8:43 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: netdev, Patrick McHardy, lvs-devel, Julian Anastasov,
	Simon Horman, Pablo Neira Ayuso, Hans Schillstrom, Wensong Zhang,
	netfilter-devel
In-Reply-To: <1346223759.3069.429.camel@localhost>

On Wed, 2012-08-29 at 09:02 +0200, Jesper Dangaard Brouer wrote:
> On Tue, 2012-08-28 at 07:49 -0700, Eric Dumazet wrote:
> > On Tue, 2012-08-28 at 16:23 +0200, Jesper Dangaard Brouer wrote:
> > > This patch is necessary, to make IPVS work, after Patrick McHardys
> > > IPv6 NAT defragmentation changes.
> > > 
> > > Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> > > ---
> > > In V2: the tunnel mode is no longer a special case.
> > > 
> > >  net/netfilter/ipvs/ip_vs_xmit.c |    9 ++++++++-
> > >  1 files changed, 8 insertions(+), 1 deletions(-)
> > > 
> > > diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
> > > index 67a3978..56f6d5d 100644
> > > --- a/net/netfilter/ipvs/ip_vs_xmit.c
> > > +++ b/net/netfilter/ipvs/ip_vs_xmit.c
> > > @@ -88,7 +88,14 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
> > >  static inline bool
> > >  __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
> > >  {
> > > -	if (skb->len > mtu && !skb_is_gso(skb)) {
> > > +	if (IP6CB(skb)->frag_max_size) {
> > > +		/* frag_max_size tell us that, this packet have been
> > > +		 * defragmented by netfilter IPv6 conntrack module.
> > > +		 */
> > > +		if (IP6CB(skb)->frag_max_size > mtu)
> > > +			return true; /* largest fragment violate MTU */
> Implicit:         else
>      			return false
> 
> (if it makes it more clear, not sure)
> > > +	}
> > > +	else if (skb->len > mtu && !skb_is_gso(skb)) {
> > >  		return true; /* Packet size violate MTU size */
> > >  	}
> > 
> > Couldnt you use a single test ?
> > 
> > if (IP6CB(skb)->frag_max_size > mtu)
> > 	return true;
> > 
> > if (skb->len > mtu && !skb_is_gso(skb))
> > 	return true;
> > 
> 
> Nope, this will not work.
> 
> If (IP6CB(skb)->frag_max_size > 0) then we have a defragmented packet,
> this means that skb->len cannot be used for MTU checking, because
> skb->len is now the total length of all the fragments (which your
> solution will fall-through to)
> 

If the packet was not fragmented, its was a single frame.

But if this frame length is above mtu, packet is not too big ?

Sorry if its a stupid question.




^ permalink raw reply

* Re: [PATCH 03/19] netfilter: nf_conntrack_ipv6: improve fragmentation handling
From: Jesper Dangaard Brouer @ 2012-08-29  8:21 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Pablo Neira Ayuso, Netfilter Developers, netdev
In-Reply-To: <1346190539-9963-4-git-send-email-kaber@trash.net>


Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

And some nitpicks below...

On Tue, 28 Aug 2012, Patrick McHardy wrote:

> The IPv6 conntrack fragmentation currently has a couple of shortcomings.
> Fragmentes are collected in PREROUTING/OUTPUT, are defragmented, the
> defragmented packet is then passed to conntrack, the resulting conntrack
> information is attached to each original fragment and the fragments then
> continue their way through the stack.
>
> Helper invocation occurs in the POSTROUTING hook, at which point only
> the original fragments are available. The result of this is that
> fragmented packets are never passed to helpers.
>
> This patch improves the situation in the following way:
>
> - If a reassembled packet belongs to a connection that has a helper
>  assigned, the reassembled packet is passed through the stack instead
>  of the original fragments.
>
> - During defragmentation, the largest received fragment size is stored.
>  On output, the packet is refragmented if required. If the largest
>  received fragment size exceeds the outgoing MTU, a "packet too big"
>  message is generated, thus behaving as if the original fragments
>  were passed through the stack from an outside point of view.
>
> - The ipv6_helper() hook function can't receive fragments anymore for
>  connections using a helper, so it is switched to use ipv6_skip_exthdr()
>  instead of the netfilter specific nf_ct_ipv6_skip_exthdr() and the
>  reassembled packets are passed to connection tracking helpers.
>
> The result of this is that we can properly track fragmented packets, but
> still generate ICMPv6 Packet too big messages if we would have before.
>
> This patch is also required as a precondition for IPv6 NAT, where NAT
> helpers might enlarge packets up to a point that they require
> fragmentation. In that case we can't generate Packet too big messages
> since the proper MTU can't be calculated in all cases (f.i. when
> changing textual representation of a variable amount of addresses),
> so the packet is transparently fragmented iff the original packet or
> fragments would have fit the outgoing MTU.
>
> IPVS parts by Jesper Dangaard Brouer <brouer@redhat.com>.
>
> Signed-off-by: Patrick McHardy <kaber@trash.net>
> ---
> include/linux/ipv6.h                           |    1 +
> net/ipv6/ip6_output.c                          |    7 +++-
> net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |   41 ++++++++++++++++++-----
> net/ipv6/netfilter/nf_conntrack_reasm.c        |   19 +++++++++--
> net/netfilter/ipvs/ip_vs_xmit.c                |    9 +++++-
> 5 files changed, 62 insertions(+), 15 deletions(-)
>
> diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
> index 879db26..0b94e91 100644
> --- a/include/linux/ipv6.h
> +++ b/include/linux/ipv6.h
> @@ -256,6 +256,7 @@ struct inet6_skb_parm {
> #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
> 	__u16			dsthao;
> #endif
> +	__u16			frag_max_size;
>
> #define IP6SKB_XFRM_TRANSFORMED	1
> #define IP6SKB_FORWARDED	2
> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index 5b2d63e..a4f6263 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c
> @@ -493,7 +493,8 @@ int ip6_forward(struct sk_buff *skb)
> 	if (mtu < IPV6_MIN_MTU)
> 		mtu = IPV6_MIN_MTU;
>
> -	if (skb->len > mtu && !skb_is_gso(skb)) {
> +	if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||

You use (!skb->local_df) to invalidate the use of skb->len, instead of 
(!IP6CB(skb)->frag_max_size), (which is okay, because you set local_df 
later).  Is there are reason this check is better?

> +	    (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {

Eric Dumazet would probably nitpick and say, it can be reduced to:
  (IP6CB(skb)->frag_max_size > mtu)
;-)


> 		/* Again, force OUTPUT device used as source address */
> 		skb->dev = dst->dev;
> 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
> @@ -636,7 +637,9 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
> 	/* We must not fragment if the socket is set to force MTU discovery
> 	 * or if the skb it not generated by a local socket.
> 	 */
> -	if (unlikely(!skb->local_df && skb->len > mtu)) {
> +	if (unlikely(!skb->local_df && skb->len > mtu) ||
> +		     (IP6CB(skb)->frag_max_size &&
> +		      IP6CB(skb)->frag_max_size > mtu)) {
> 		if (skb->sk && dst_allfrag(skb_dst(skb)))
> 			sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
>
[cut]

> --- a/net/ipv6/netfilter/nf_conntrack_reasm.c
> +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
> @@ -190,6 +190,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
> 			     const struct frag_hdr *fhdr, int nhoff)
> {
> 	struct sk_buff *prev, *next;
> +	unsigned int payload_len;
> 	int offset, end;
>
> 	if (fq->q.last_in & INET_FRAG_COMPLETE) {
> @@ -197,8 +198,10 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
> 		goto err;
> 	}
>
> +	payload_len = ntohs(ipv6_hdr(skb)->payload_len);
> +
> 	offset = ntohs(fhdr->frag_off) & ~0x7;
> -	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
> +	end = offset + (payload_len -
> 			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
>
> 	if ((unsigned int)end > IPV6_MAXPLEN) {
> @@ -307,6 +310,8 @@ found:
> 	skb->dev = NULL;
> 	fq->q.stamp = skb->tstamp;
> 	fq->q.meat += skb->len;
> +	if (payload_len > fq->q.max_size)
> +		fq->q.max_size = payload_len;
> 	atomic_add(skb->truesize, &nf_init_frags.mem);
>
> 	/* The first fragment.
> @@ -412,10 +417,12 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
> 	}
> 	atomic_sub(head->truesize, &nf_init_frags.mem);
>
> +	head->local_df = 1;

/me pointing to where local_df is being set.


> 	head->next = NULL;
> 	head->dev = dev;
> 	head->tstamp = fq->q.stamp;
> 	ipv6_hdr(head)->payload_len = htons(payload_len);
> +	IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
>
> 	/* Yes, and fold redundant checksum back. 8) */
> 	if (head->ip_summed == CHECKSUM_COMPLETE)

[cut]

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox