Netdev List
 help / color / mirror / Atom feed
* Re: the next chunk of iov_iter-net stuff for review
From: David Miller @ 2014-12-10 18:25 UTC (permalink / raw)
  To: viro; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Tue, 9 Dec 2014 22:49:28 +0000

> OK, rebase is in
> git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git for-davem-2,
> individual patches - in followups (first 13 - iov_iter, then 12 net-related)

All looks good to me, pulled, thanks Al!

^ permalink raw reply

* [PATCH] irda: Convert function pointer arrays and uses to const
From: Joe Perches @ 2014-12-10 18:28 UTC (permalink / raw)
  To: Samuel Ortiz; +Cc: David S. Miller, netdev, LKML

Making things const is a good thing.

(x86-64 defconfig with all irda)
$ size net/irda/built-in.o*
   text	   data	    bss	    dec	    hex	filename
 109276	   1868	    244	 111388	  1b31c	net/irda/built-in.o.new
 108828	   2316	    244	 111388	  1b31c	net/irda/built-in.o.old

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/net/irda/parameters.h  | 6 +++---
 net/irda/ircomm/ircomm_param.c | 8 ++++----
 net/irda/irttp.c               | 6 ++++--
 net/irda/parameters.c          | 8 ++++----
 net/irda/qos.c                 | 6 +++---
 5 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/include/net/irda/parameters.h b/include/net/irda/parameters.h
index 42713c9..2d9cd00 100644
--- a/include/net/irda/parameters.h
+++ b/include/net/irda/parameters.h
@@ -71,17 +71,17 @@ typedef int (*PV_HANDLER)(void *self, __u8 *buf, int len, __u8 pi,
 			  PV_TYPE type, PI_HANDLER func);
 
 typedef struct {
-	PI_HANDLER func;  /* Handler for this parameter identifier */
+	const PI_HANDLER func;  /* Handler for this parameter identifier */
 	PV_TYPE    type;  /* Data type for this parameter */
 } pi_minor_info_t;
 
 typedef struct {
-	pi_minor_info_t *pi_minor_call_table;
+	const pi_minor_info_t *pi_minor_call_table;
 	int len;
 } pi_major_info_t;
 
 typedef struct {
-	pi_major_info_t *tables;
+	const pi_major_info_t *tables;
 	int              len;
 	__u8             pi_mask;
 	int              pi_major_offset;
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index 27be782..3c4caa6 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -61,12 +61,12 @@ static int ircomm_param_dte(void *instance, irda_param_t *param, int get);
 static int ircomm_param_dce(void *instance, irda_param_t *param, int get);
 static int ircomm_param_poll(void *instance, irda_param_t *param, int get);
 
-static pi_minor_info_t pi_minor_call_table_common[] = {
+static const pi_minor_info_t pi_minor_call_table_common[] = {
 	{ ircomm_param_service_type, PV_INT_8_BITS },
 	{ ircomm_param_port_type,    PV_INT_8_BITS },
 	{ ircomm_param_port_name,    PV_STRING }
 };
-static pi_minor_info_t pi_minor_call_table_non_raw[] = {
+static const pi_minor_info_t pi_minor_call_table_non_raw[] = {
 	{ ircomm_param_data_rate,    PV_INT_32_BITS | PV_BIG_ENDIAN },
 	{ ircomm_param_data_format,  PV_INT_8_BITS },
 	{ ircomm_param_flow_control, PV_INT_8_BITS },
@@ -74,13 +74,13 @@ static pi_minor_info_t pi_minor_call_table_non_raw[] = {
 	{ ircomm_param_enq_ack,      PV_INT_16_BITS },
 	{ ircomm_param_line_status,  PV_INT_8_BITS }
 };
-static pi_minor_info_t pi_minor_call_table_9_wire[] = {
+static const pi_minor_info_t pi_minor_call_table_9_wire[] = {
 	{ ircomm_param_dte,          PV_INT_8_BITS },
 	{ ircomm_param_dce,          PV_INT_8_BITS },
 	{ ircomm_param_poll,         PV_NO_VALUE },
 };
 
-static pi_major_info_t pi_major_call_table[] = {
+static const pi_major_info_t pi_major_call_table[] = {
 	{ pi_minor_call_table_common,  3 },
 	{ pi_minor_call_table_non_raw, 6 },
 	{ pi_minor_call_table_9_wire,  3 }
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 3ef0b08..b6ab41d 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -71,11 +71,13 @@ static void irttp_status_indication(void *instance,
 				    LINK_STATUS link, LOCK_STATUS lock);
 
 /* Information for parsing parameters in IrTTP */
-static pi_minor_info_t pi_minor_call_table[] = {
+static const pi_minor_info_t pi_minor_call_table[] = {
 	{ NULL, 0 },                                             /* 0x00 */
 	{ irttp_param_max_sdu_size, PV_INTEGER | PV_BIG_ENDIAN } /* 0x01 */
 };
-static pi_major_info_t pi_major_call_table[] = { { pi_minor_call_table, 2 } };
+static const pi_major_info_t pi_major_call_table[] = {
+	{ pi_minor_call_table, 2 }
+};
 static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 };
 
 /************************ GLOBAL PROCEDURES ************************/
diff --git a/net/irda/parameters.c b/net/irda/parameters.c
index 006786b..16ce32f 100644
--- a/net/irda/parameters.c
+++ b/net/irda/parameters.c
@@ -52,7 +52,7 @@ static int irda_insert_no_value(void *self, __u8 *buf, int len, __u8 pi,
 static int irda_param_unpack(__u8 *buf, char *fmt, ...);
 
 /* Parameter value call table. Must match PV_TYPE */
-static PV_HANDLER pv_extract_table[] = {
+static const PV_HANDLER pv_extract_table[] = {
 	irda_extract_integer, /* Handler for any length integers */
 	irda_extract_integer, /* Handler for 8  bits integers */
 	irda_extract_integer, /* Handler for 16 bits integers */
@@ -62,7 +62,7 @@ static PV_HANDLER pv_extract_table[] = {
 	irda_extract_no_value /* Handler for no value parameters */
 };
 
-static PV_HANDLER pv_insert_table[] = {
+static const PV_HANDLER pv_insert_table[] = {
 	irda_insert_integer, /* Handler for any length integers */
 	irda_insert_integer, /* Handler for 8  bits integers */
 	irda_insert_integer, /* Handler for 16 bits integers */
@@ -449,7 +449,7 @@ static int irda_param_unpack(__u8 *buf, char *fmt, ...)
 int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len,
 		      pi_param_info_t *info)
 {
-	pi_minor_info_t *pi_minor_info;
+	const pi_minor_info_t *pi_minor_info;
 	__u8 pi_minor;
 	__u8 pi_major;
 	int type;
@@ -504,7 +504,7 @@ EXPORT_SYMBOL(irda_param_insert);
 static int irda_param_extract(void *self, __u8 *buf, int len,
 			      pi_param_info_t *info)
 {
-	pi_minor_info_t *pi_minor_info;
+	const pi_minor_info_t *pi_minor_info;
 	__u8 pi_minor;
 	__u8 pi_major;
 	int type;
diff --git a/net/irda/qos.c b/net/irda/qos.c
index 5ed6c9a..25ba850 100644
--- a/net/irda/qos.c
+++ b/net/irda/qos.c
@@ -122,7 +122,7 @@ static __u32 max_line_capacities[10][4] = {
 	{ 800000, 400000, 160000, 80000 }, /* 16000000 bps */
 };
 
-static pi_minor_info_t pi_minor_call_table_type_0[] = {
+static const pi_minor_info_t pi_minor_call_table_type_0[] = {
 	{ NULL, 0 },
 /* 01 */{ irlap_param_baud_rate,       PV_INTEGER | PV_LITTLE_ENDIAN },
 	{ NULL, 0 },
@@ -134,7 +134,7 @@ static pi_minor_info_t pi_minor_call_table_type_0[] = {
 /* 08 */{ irlap_param_link_disconnect, PV_INT_8_BITS }
 };
 
-static pi_minor_info_t pi_minor_call_table_type_1[] = {
+static const pi_minor_info_t pi_minor_call_table_type_1[] = {
 	{ NULL, 0 },
 	{ NULL, 0 },
 /* 82 */{ irlap_param_max_turn_time,   PV_INT_8_BITS },
@@ -144,7 +144,7 @@ static pi_minor_info_t pi_minor_call_table_type_1[] = {
 /* 86 */{ irlap_param_min_turn_time,   PV_INT_8_BITS },
 };
 
-static pi_major_info_t pi_major_call_table[] = {
+static const pi_major_info_t pi_major_call_table[] = {
 	{ pi_minor_call_table_type_0, 9 },
 	{ pi_minor_call_table_type_1, 7 },
 };

^ permalink raw reply related

* Re: [ovs-dev] [PATCHv11 net-next 2/2] openvswitch: Add support for unique flow IDs.
From: Pravin Shelar @ 2014-12-10 18:29 UTC (permalink / raw)
  To: Joe Stringer; +Cc: dev@openvswitch.org, netdev, LKML
In-Reply-To: <CAOftzPjp3++6X50m27fhnAqVPsNRRfrhtmWGoeVi_8v3KCHWhA@mail.gmail.com>

On Wed, Dec 10, 2014 at 10:15 AM, Joe Stringer <joestringer@nicira.com> wrote:
> On 9 December 2014 at 22:11, Pravin Shelar <pshelar@nicira.com> wrote:
>> On Tue, Dec 9, 2014 at 4:25 PM, Joe Stringer <joestringer@nicira.com> wrote:
>>> On 9 December 2014 at 10:32, Pravin Shelar <pshelar@nicira.com> wrote:
>>>> On Tue, Dec 2, 2014 at 6:56 PM, Joe Stringer <joestringer@nicira.com> wrote:
>>>>> @@ -424,10 +475,9 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
>>>>>         ovs_flow_mask_key(&masked_key, unmasked, mask);
>>>>>         hash = flow_hash(&masked_key, key_start, key_end);
>>>>>         head = find_bucket(ti, hash);
>>>>> -       hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
>>>>> -               if (flow->mask == mask && flow->hash == hash &&
>>>>> -                   flow_cmp_masked_key(flow, &masked_key,
>>>>> -                                         key_start, key_end))
>>>>> +       hlist_for_each_entry_rcu(flow, head, flow_hash.node[ti->node_ver]) {
>>>>> +               if (flow->mask == mask && flow->flow_hash.hash == hash &&
>>>>> +                   flow_cmp_masked_key(flow, &masked_key, key_start, key_end))
>>>>>                         return flow;
>>>>>         }
>>>>>         return NULL;
>>>>> @@ -469,7 +519,40 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
>>>>>         /* Always called under ovs-mutex. */
>>>>>         list_for_each_entry(mask, &tbl->mask_list, list) {
>>>>>                 flow = masked_flow_lookup(ti, match->key, mask);
>>>>> -               if (flow && ovs_flow_cmp_unmasked_key(flow, match))  /* Found */
>>>>> +               if (flow && !flow->ufid &&
>>>> why not NULL check for flow->unmasked_key here rather than ufid?
>>>
>>> In this version, I tried to consistently use flow->ufid as the switch
>>> for whether UFID exists or not. In the next version, this statement
>>> would refer to flow->id->ufid_len.
>>>
>>> The current approach means that ovs_flow_tbl_lookup_exact() is really
>>> ovs_flow_tbl_lookup_unmasked_key(). Do you think this should remain
>>> specific to unmasked key or should it be made to check that the
>>> identifier (ufid OR unmasked key) is the same?
>>
>> It is easier to read code if we check for flow->unmasked_key here.
>> ovs_flow_cmp_unmasked_key() has assert on ufid anyways.
>
> With the change to put UFID/unmasked key in the same struct, there
> will be no such pointer to check, only ufid_len.
>
right, Lets keep the check for ufid_len.

> However, we could shift this check at the start of the function instead.

^ permalink raw reply

* Re: atomic operations bottleneck in the IPv6 stack
From: Hannes Frederic Sowa @ 2014-12-10 17:58 UTC (permalink / raw)
  To: cristian.bercaru@freescale.com
  Cc: netdev@vger.kernel.org, R89243@freescale.com,
	Madalin-Cristian Bucur, Razvan.Ungureanu@freescale.com
In-Reply-To: <1418231763.24395.2.camel@redhat.com>

On Mi, 2014-12-10 at 18:16 +0100, Hannes Frederic Sowa wrote:
> On Mi, 2014-12-10 at 16:56 +0000, cristian.bercaru@freescale.com wrote:
> > 
> > It seems to me that the atomic operations on the IPv6 forwarding path
> > are a bottleneck and they are not scalable with the number of cores.
> > Am I right? What improvements can be brought to the IPv6 kernel code
> > to make it less dependent of atomic operations/variables?
> 
> For a starter, something like the following commit:
> 
> commit d26b3a7c4b3b26319f18bb645de93eba8f4bdcd5
> Author: Eric Dumazet <edumazet@google.com>
> Date:   Tue Jul 31 05:45:30 2012 +0000
> 
>     ipv4: percpu nh_rth_output cache

Actually, we should be able to remove the atomics in input and
forwarding path by just relying on RCU. I'll have a look.

Bye,
Hannes

^ permalink raw reply

* Re: [net-next PATCH 0/6] net: Alloc NAPI page frags from their own pool
From: David Miller @ 2014-12-10 18:32 UTC (permalink / raw)
  To: alexander.h.duyck; +Cc: netdev, ast, eric.dumazet, brouer
In-Reply-To: <20141210033902.2114.68658.stgit@ahduyck-vm-fedora20>

From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Tue, 09 Dec 2014 19:40:35 -0800

> This patch series implements a means of allocating page fragments without
> the need for the local_irq_save/restore in __netdev_alloc_frag.  By doing
> this I am able to decrease packet processing time by 11ns per packet in my
> test environment.

Looks great, series applied, thanks Alex.

^ permalink raw reply

* Re: [PATCH v2] if_bridge: fix conflict with glibc
From: David Miller @ 2014-12-10 18:34 UTC (permalink / raw)
  To: stephen; +Cc: gregory.0xf0, f.fainelli, xiyou.wangcong, netdev
In-Reply-To: <20141209214122.76b15851@urahara>

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Tue, 9 Dec 2014 21:41:22 -0800

> Even though kernel and glibc headers have same effective values
> Gcc complains about redefinitions. Since this is a header expected
> to be used by userspace, use glibc header.
> 
> This supersedes change in commit 66f1c44887ba4f47d617f8ae21cf8e04e1892bd7
> and fixes build of iproute2 with Glibc-2.19. This follows similar usage
> in include/uapi/linux for l2tp.h
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> 
> --- a/include/uapi/linux/if_bridge.h	2014-12-09 20:25:21.656016605 -0800
> +++ b/include/uapi/linux/if_bridge.h	2014-12-09 21:32:01.391034756 -0800
> @@ -15,7 +15,9 @@
>  
>  #include <linux/types.h>
>  #include <linux/if_ether.h>
> -#include <linux/in6.h>
> +#ifndef __KERNEL__
> +#include <netinet/in.h>
> +#endif

No, we really want to incluse the linux/in6.h header, as that's where all
the special GLIBC CPP checks are, such as:

#if __UAPI_DEF_IN6_ADDR_ALT

Please research how we have resolved the conflict between GLIBC and the
kernel's exported headers.  We really need to use linux/in6.h for all of
this to work.

I understand that it is upsetting that iproute2 stopped building for you,
but I'd like to kindly ask that you look more deeply into this and think
more longer term, rather than having a knee jerk reaction and looking for
quick fixes.

Thanks.

^ permalink raw reply

* Re: [bisected] xfrm: TCP connection initiating PMTU discovery stalls on v3.
From: Wolfgang Walter @ 2014-12-10 18:34 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Thomas Jarosch, netdev, Eric Dumazet, Herbert Xu,
	Steffen Klassert
In-Reply-To: <1418161219.27198.10.camel@edumazet-glaptop2.roam.corp.google.com>

Am Dienstag, 9. Dezember 2014, 13:40:19 schrieb Eric Dumazet:
> On Tue, 2014-12-09 at 21:36 +0100, Wolfgang Walter wrote:
> > How would that be done? I found no way to disable it especially for xfrm.
> > I
> > disabled gso for the interface which serves the ipsec traffic but this
> > does
> > not help. tcp still uses gso for the esp tunnel.
> > 
> > I put a view printk's in net/xfrm/xfrm_output.c and net/ipv4/tcp_output.c.
> > (I try to understand where in the xfrm transformation gso is handeled).
> > 
> > What I can say yet is:
> > 
> > xfrm_output() is used with ipsec (esp) tunnel mode but at I never see gso
> > packets here. xfrm_output_gso() is never called.
> > 
> > Everytime tcp_set_skb_tso_segs() is called for a tcp connection over the
> > esp- tunnel and it is a gso case then the tcp connection hangs. Those
> > packets always have skb->len 1398 and mss_now is 1374. I see a call of
> > xfrm_output() afterwards but for a packet of skb->len 52 (maybe ACK from
> > other direction?).
> > 
> > As long as the tcp-connection over the ipsec-tunnel works and if I send
> > bulk traffic xfrm_output() is called 3 times with packet skb->len 1426
> > and then one time with 78 (maybe other direction?), don't know if that is
> > of any interest.
> > 
> > 
> > With non-ipsec-traffic gso works fine: in this case the skb->len() varies
> > a
> > lot and mss_now is always 1288.
> 
> Presumably something happens so that sk_can_gso() returns false.
> 
> But apparently, this happens _after_ tcp_sendmsg(), ie a bit too late.
> 
> Note that after https://patchwork.ozlabs.org/patch/418506/ is applied,
> fix would simply be :
> 
> diff --git a/net/core/sock.c b/net/core/sock.c
> index
> 9a56b2000c3f374fb95aedada3327447816a9512..678ef8393680dc781445c5f121719ea8e
> a7bb7c1 100644 --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -1585,6 +1585,8 @@ void sk_setup_caps(struct sock *sk, struct dst_entry
> *dst) sk->sk_gso_max_size = dst->dev->gso_max_size;
>  			sk->sk_gso_max_segs = dst->dev->gso_max_segs;
>  		}
> +	} else {
> +		sk->sk_gso_max_segs = 1;
>  	}
>  }
>  EXPORT_SYMBOL_GPL(sk_setup_caps);

Ok. I tried that. I backported https://patchwork.ozlabs.org/patch/418506/ to 
3.14.26 and then applied this patch. Still hangs. At that stage sk_can_gso(sk) 
is true. The reason is that the interface of *dst in sk_setup_caps() is the 
interface which serves the ipsec traffic.

There is a difference, though. If I disable gso for the interface which serves 
the ipsec traffic then the hang disappears now. This seems to be because of 
the patch above: sk_can_gso(sk) in sk_setup_caps then returns false and the ne 
else branch is taken. So there may be a hidden bug in the non-gso case.

Back to xfrm case with hang:

I see: a call of sk_setup_caps() which takes the path
		sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
		...

So gso is on. When the hang happens sk_setup_caps is called from		
inet_sk_rebuild_header(). Now the path

		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;

is taken as dst->header_len is now non zero.

This is the reason why later calls of sk_can_gso() return false.

I'll try to change the above patch to

@@ -1585,6 +1585,8 @@ void sk_setup_caps(struct sock *sk, struct dst_entry
*dst) sk->sk_gso_max_size = dst->dev->gso_max_size;
 			sk->sk_gso_max_segs = dst->dev->gso_max_segs;
 		}
	}
+   if (sk_can_gso(sk)) {
+		sk->sk_gso_max_segs = 1;
 	}
 }
 EXPORT_SYMBOL_GPL(sk_setup_caps);

so that the case that GSO is disabled because of dst->header_len != 0 sets 
sk_gso_max_segs, too.



Regards,
-- 
Wolfgang Walter
Studentenwerk München
Anstalt des öffentlichen Rechts

^ permalink raw reply

* Re: [PATCH net] Fix race condition between vxlan_sock_add and vxlan_sock_release
From: Marcelo Ricardo Leitner @ 2014-12-10 18:34 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20141210.131133.729833991671277249.davem@davemloft.net>

On 10-12-2014 16:11, David Miller wrote:
> From: Marcelo Ricardo Leitner <mleitner@redhat.com>
> Date: Tue,  9 Dec 2014 12:28:28 -0200
>
>> Currently, when trying to reuse a socket, vxlan_sock_add will grab
>> vn->sock_lock, locate a reusable socket, inc refcount and release
>> vn->sock_lock.
>>
>> But vxlan_sock_release() will first decrement refcount, and then grab
>> that lock. refcnt operations are atomic but as currently we have
>> deferred works which hold vs->refcnt each, this might happen, leading to
>> a use after free (specially after vxlan_igmp_leave):
>>
>>    CPU 1                            CPU 2
>>
>> deferred work                    vxlan_sock_add
>
> Just make vxlan_sock_add() do atomic_add_unless(x, 1, 0), that way
> if vxlan_sock_add() sees the count at zero it can just act as if
> no such reusable socket exists.

Interesting, I had thought of this, but it seemed a bit messy.
But okay, I see the pros on it, will go that way. Thanks!

   Marcelo

^ permalink raw reply

* Re: xen-netback: make feature-rx-notify mandatory -- Breaks stubdoms
From: David Vrabel @ 2014-12-10 18:39 UTC (permalink / raw)
  To: Ian Campbell
  Cc: John, Xen-devel@lists.xen.org, Wei Liu, netdev@vger.kernel.org
In-Reply-To: <1418228446.3505.81.camel@citrix.com>

On 10/12/14 16:20, Ian Campbell wrote:
> On Wed, 2014-12-10 at 15:29 +0000, David Vrabel wrote:
>> On 10/12/14 15:07, Ian Campbell wrote:
>>> On Wed, 2014-12-10 at 14:12 +0000, David Vrabel wrote:
>>>> On 10/12/14 13:42, John wrote:
>>>>> David,
>>>>>
>>>>> This patch you put into 3.18.0 appears to break the latest version of
>>>>> stubdomains. I found this out today when I tried to update a machine to
>>>>> 3.18.0 and all of the domUs crashed on start with the dmesg output like
>>>>> this:
>>>>
>>>> Cc'ing the lists and relevant netback maintainers.
>>>>
>>>> I guess the stubdoms are using minios's netfront?  This is something I
>>>> forgot about when deciding if it was ok to make this feature mandatory.
>>>
>>> Oh bum, me too :/
>>>
>>>> The patch cannot be reverted as it's a prerequisite for a critical
>>>> (security) bug fix.  I am also unconvinced that the no-feature-rx-notify
>>>> support worked correctly anyway.
>>>>
>>>> This can be resolved by:
>>>>
>>>> - Fixing minios's netfront to support feature-rx-notify. This should be
>>>> easy but wouldn't help existing Xen deployments.
>>>
>>> I think this is worth doing in its own right, but as you say it doesn't
>>> help existing users.
>>>
>>>> - Reimplement feature-rx-notify support.  I think the easiest way is to
>>>> queue packets on the guest Rx internal queue with a short expiry time.
>>>
>>> Right, I don't think we especially need to make this case good (so long
>>> as it doesn't reintroduce a security hole!).
>>>
>>> In principal we aren't really obliged to queue at all, but since all the
>>> infrastructure for queuing and timing out all exists I suppose it would
>>> be simple enough to implement and a bit less harsh.
>>>
>>> Given we now have XENVIF_RX_QUEUE_BYTES and rx_drain_timeout_jiffies we
>>> don't have the infinite queue any more. So does the expiry in this case
>>> actually need to be shorter than the norm? Does it cause any extra
>>> issues to keep them around for tx_drain_timeout_jiffies rather than some
>>> shorter time?
>>
>> If the internal guest rx queue fills and the (host) tx queue is stopped,
>> it will take tx_drain_timeout for the thread to wake up and notice if
>> the frontend placed any rx requests on the ring.  This could potentially
>> end up where you shovel 512k through stall for 10 s, put another 512k
>> through, stall for 10 s again and so on.
> 
> Ah, true, that's not so great.
> 
> What about if we don't queue at all(*) if rx-notify isn't supported, i.e
> just drop the packet on the floor in start_xmit if the ring is full?
> Would that be so bad? It would surely be simple...

There needs to be a queue between start_xmit and the rx thread so
checking for ring state in start_xmit doesn't help here since the
internal queue can fill before the thread wakes and begins to drain it.

netback could complete the request directly in start_xmit, avoiding the
internal queue but not allowing for any batching but I don't think it is
a good idea to add a different data path for this mode.

> (*) Not counting the "queue" which is the ring itself.
> 
>> The rx stall detection will also need to be disabled since there would
>> be no way for the frontend to signal rx ready.
> 
> Agreed.
> 
> Could be trivially argued to be safe if we were just dropping packets on
> ring overflow...

David

^ permalink raw reply

* Re: [PATCH v2] net: introduce helper macro for_each_cmsghdr
From: David Miller @ 2014-12-10 18:48 UTC (permalink / raw)
  To: guz.fnst; +Cc: netdev, linux-kernel, joe
In-Reply-To: <5487DBD9.1010908@cn.fujitsu.com>

From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Wed, 10 Dec 2014 13:36:25 +0800

> Introduce helper macro for_each_cmsghdr as a wrapper of the enumerating
> cmsghdr from msghdr, just cleanup. 
> 
> Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next v2] enic: add support for set/get rss hash key
From: David Miller @ 2014-12-10 18:49 UTC (permalink / raw)
  To: _govind; +Cc: netdev, ssujith, benve
In-Reply-To: <1418199023-12451-1-git-send-email-_govind@gmx.com>

From: Govindarajulu Varadarajan <_govind@gmx.com>
Date: Wed, 10 Dec 2014 13:40:23 +0530

> This patch adds support for setting/getting rss hash key using ethtool.
> 
> v2:
> respin patch to support RSS hash function changes.
> 
> Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next RESEND 0/2] fix rculist sparse errors
From: David Miller @ 2014-12-10 18:49 UTC (permalink / raw)
  To: ying.xue
  Cc: paulmck, eric.dumazet, jon.maloy, erik.hugne, netdev, kbuild-all,
	linux-kernel
In-Reply-To: <1418201167-9591-1-git-send-email-ying.xue@windriver.com>

From: Ying Xue <ying.xue@windriver.com>
Date: Wed, 10 Dec 2014 16:46:05 +0800

> When hlist_for_each_entry_continue_rcu_bh() gets "next" pointer of
> hlist_node structure through rcu_dereference_bh(), sparse warning
> appears as the "next" pointer is not annotated as __rcu. So if
> the "next" pointer is accessed with hlist_next_rcu() macro, the
> __rcu annotation will be added to the pointer. As a consequence,
> sparse warning is eliminated too.
> 
> The similar errors also appear in hlist_for_each_entry_continue_rcu()
> and hlist_for_each_entry_from_rcu().
> 
> In this version, CC more people like Paul E. McKenney and lkml mail
> list.

The rculist.h changes should go via Paul's tree, not mine.

^ permalink raw reply

* Re: [bisected] xfrm: TCP connection initiating PMTU discovery stalls on v3.
From: Eric Dumazet @ 2014-12-10 19:10 UTC (permalink / raw)
  To: Wolfgang Walter
  Cc: Thomas Jarosch, netdev, Eric Dumazet, Herbert Xu,
	Steffen Klassert
In-Reply-To: <6700748.kZ07xrNvHX@h2o.as.studentenwerk.mhn.de>

On Wed, 2014-12-10 at 19:34 +0100, Wolfgang Walter wrote:

> So gso is on. When the hang happens sk_setup_caps is called from		
> inet_sk_rebuild_header(). Now the path
> 
> 		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
> 
> is taken as dst->header_len is now non zero.
> 
> This is the reason why later calls of sk_can_gso() return false.
> 
> I'll try to change the above patch to
> 
> @@ -1585,6 +1585,8 @@ void sk_setup_caps(struct sock *sk, struct dst_entry
> *dst) sk->sk_gso_max_size = dst->dev->gso_max_size;
>  			sk->sk_gso_max_segs = dst->dev->gso_max_segs;
>  		}
> 	}
> +   if (sk_can_gso(sk)) {
> +		sk->sk_gso_max_segs = 1;
>  	}
>  }
>  EXPORT_SYMBOL_GPL(sk_setup_caps);
> 
> so that the case that GSO is disabled because of dst->header_len != 0 sets 
> sk_gso_max_segs, too.

Sounds good, or maybe simply :

diff --git a/net/core/sock.c b/net/core/sock.c
index 9a56b2000c3f374fb95aedada3327447816a9512..edca31319dfee57cabe5b376505324bea07f767a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1577,6 +1577,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 	if (sk->sk_route_caps & NETIF_F_GSO)
 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
 	sk->sk_route_caps &= ~sk->sk_route_nocaps;
+	sk->sk_gso_max_segs = 1;
 	if (sk_can_gso(sk)) {
 		if (dst->header_len) {
 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;

^ permalink raw reply related

* Re: Multicast packets being lost (3.10 stable)
From: Linus Lüssing @ 2014-12-10 19:16 UTC (permalink / raw)
  To: David Miller
  Cc: Stephen Hemminger, netdev, bridge, Greg Kroah-Hartman,
	openwrt-devel, David Miller
In-Reply-To: <20140910133341.GI7058@odroid>

Hi David,

did you have a chance to look into backporting these fixes for
stable yet? (if I read the docs correctly, I should query you for
suggestions for stable kernels, right?)

Also, an eighth patch I'd suggest for stable now:

8) bridge: fix netfilter/NF_BR_LOCAL_OUT for own, locally generated queries
   -> f0b4eeced (since 3.18)


If there's anything unclear, just let me know. Thanks :)!

Cheers, Linus


On Wed, Sep 10, 2014 at 03:33:41PM +0200, Linus Lüssing wrote:
> I just got a complaint about bridges, multicast and a
> 3.10 kernel again. Seems like nobody had any objections about
> queueing these two patches for stable ( 2)+3) )?
> 
> Also I'm still missing some more fixes in the stable branches.
> Especially 5), 6) and 7) are of high priority (next to 2) and 3) )
> in my opinion as otherwise IPv6 in general could be broken for people
> using 3.12 or 3.13 (as 3.12 contains a patch which activates
> multicast snooping for link-local addresses, too: 3c3769e63).
> 
> Here is a more ordered list of patches I'd suggest to be queued for
> stable:
> 
> 1) bridge: fix switched interval for MLD Query types
>    -> 32de868cb (present since 3.10)
> 2) bridge: disable snooping if there is no querier
>    -> b00589af3 (present since 3.11)
> 3) bridge: don't try to update timers in case of broken MLD queries
>    -> 248ba8ec0 (present since 3.11)
> 4) Revert "bridge: only expire the mdb entry when query is received"
>    -> 454594f3b (present since 3.12)
> 5) bridge: multicast: add sanity check for query source addresses
>    -> 6565b9eee (present since 3.14)
> 6) bridge: multicast: add sanity check for general query destination
>    -> 9ed973cc4 (present since 3.14)
> 7) bridge: multicast: enable snooping on general queries only
>    -> 20a599bec (present since 3.14)
> 
> Let me know what you'd think about that or if there's any trouble
> applying them to older kernels.
> 
> Cheers, Linus
> 
> 
> On Tue, Mar 25, 2014 at 02:06:07PM +0100, Linus Lüssing wrote:
> > That commit is supposed to be a fix and seems to be a easily
> > cherry-pickable on top of 3.10. So I think it's suitable for
> > stable
> > 
> > There are two follow-up commit for this particular patch that I'm aware
> > of: "bridge: separate querier and query timer into IGMP/IPv4
> > and MLD/IPv6 ones" (cc0fdd80). That's just an optimization
> > and can be ignored for stable.
> > 
> > The second one is "bridge: don't try to update timers in case of
> > broken MLD queries" (248ba8ec0). Which is a direct fix for
> > b00589af3 and should therefore go into stable, too, if b00589af3
> > goes into stable.
> > 
> > Cheers, Linus
> > 
> > 
> > On Mon, Mar 24, 2014 at 09:41:07AM -0700, Stephen Hemminger wrote:
> > > We are seeing multicast snooping related issues.
> > > Is there some reason this commit never went into stable (3.10)
> > > 
> > > commit b00589af3b04736376f24625ab0b394642e89e29
> > > Author: Linus Lüssing <linus.luessing@web.de>
> > > Date:   Thu Aug 1 01:06:20 2013 +0200
> > > 
> > >     bridge: disable snooping if there is no querier
> > >     
> > >     If there is no querier on a link then we won't get periodic reports and
> > >     therefore won't be able to learn about multicast listeners behind ports,
> > >     potentially leading to lost multicast packets, especially for multicast
> > >     listeners that joined before the creation of the bridge.
> > >     
> > >     These lost multicast packets can appear since c5c23260594
> > >     ("bridge: Add multicast_querier toggle and disable queries by default")
> > >     in particular.
> > >     
> > >     With this patch we are flooding multicast packets if our querier is
> > >     disabled and if we didn't detect any other querier.
> > >     
> > >     A grace period of the Maximum Response Delay of the querier is added to
> > >     give multicast responses enough time to arrive and to be learned from
> > >     before disabling the flooding behaviour again.
> > >     
> > >     Signed-off-by: Linus Lüssing <linus.luessing@web.de>
> > >     Signed-off-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply

* Re: [PATCH net-next] net: Do not call ndo_dflt_fdb_dump if ndo_fdb_dump is defined.
From: Sergei Shtylyov @ 2014-12-10 19:28 UTC (permalink / raw)
  To: Hubert Sokolowski, netdev
In-Reply-To: <55af638eadcc08cbaa78f82ff6a43cb3.squirrel@poczta.wsisiz.edu.pl>

Hello.

On 12/10/2014 08:20 PM, Hubert Sokolowski wrote:

> This change restores the semantic that was present
> before 5e6d243587990a588143b9da3974833649595587

    Please also specify that commit's  summary line in parens.

> on how ndo_dflt_fdb_dump is called.
> This semantic is still used for add and del operations
> so let's keep it consistent.
> Driver can still call ndo_dflt_fdb_dump from inside
> its own fdb_dump routine when needed.

    You didn't sign off on the patch, so it can't be applied.

[...]

WBR, Sergei

^ permalink raw reply

* Re: [PATCH] irda: Convert function pointer arrays and uses to const
From: Rustad, Mark D @ 2014-12-10 19:32 UTC (permalink / raw)
  To: Joe Perches; +Cc: Samuel Ortiz, David S. Miller, netdev, LKML
In-Reply-To: <1418236138.18092.13.camel@perches.com>

[-- Attachment #1: Type: text/plain, Size: 7604 bytes --]

On Dec 10, 2014, at 10:28 AM, Joe Perches <joe@perches.com> wrote:

> Making things const is a good thing.
> 
> (x86-64 defconfig with all irda)
> $ size net/irda/built-in.o*
>   text	   data	    bss	    dec	    hex	filename
> 109276	   1868	    244	 111388	  1b31c	net/irda/built-in.o.new
> 108828	   2316	    244	 111388	  1b31c	net/irda/built-in.o.old
> 
> Signed-off-by: Joe Perches <joe@perches.com>
> ---
> include/net/irda/parameters.h  | 6 +++---
> net/irda/ircomm/ircomm_param.c | 8 ++++----
> net/irda/irttp.c               | 6 ++++--
> net/irda/parameters.c          | 8 ++++----
> net/irda/qos.c                 | 6 +++---
> 5 files changed, 18 insertions(+), 16 deletions(-)
> 
> diff --git a/include/net/irda/parameters.h b/include/net/irda/parameters.h
> index 42713c9..2d9cd00 100644
> --- a/include/net/irda/parameters.h
> +++ b/include/net/irda/parameters.h
> @@ -71,17 +71,17 @@ typedef int (*PV_HANDLER)(void *self, __u8 *buf, int len, __u8 pi,
> 			  PV_TYPE type, PI_HANDLER func);
> 
> typedef struct {
> -	PI_HANDLER func;  /* Handler for this parameter identifier */
> +	const PI_HANDLER func;  /* Handler for this parameter identifier */
> 	PV_TYPE    type;  /* Data type for this parameter */
> } pi_minor_info_t;
> 
> typedef struct {
> -	pi_minor_info_t *pi_minor_call_table;
> +	const pi_minor_info_t *pi_minor_call_table;

Might you want to go a little further and make it:
	const pi_minor_into_t * const pi_minor_call_table;
so that the pointer itself is also constant? That could apply to some others below as well.

> 	int len;
> } pi_major_info_t;
> 
> typedef struct {
> -	pi_major_info_t *tables;
> +	const pi_major_info_t *tables;
> 	int              len;
> 	__u8             pi_mask;
> 	int              pi_major_offset;
> diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
> index 27be782..3c4caa6 100644
> --- a/net/irda/ircomm/ircomm_param.c
> +++ b/net/irda/ircomm/ircomm_param.c
> @@ -61,12 +61,12 @@ static int ircomm_param_dte(void *instance, irda_param_t *param, int get);
> static int ircomm_param_dce(void *instance, irda_param_t *param, int get);
> static int ircomm_param_poll(void *instance, irda_param_t *param, int get);
> 
> -static pi_minor_info_t pi_minor_call_table_common[] = {
> +static const pi_minor_info_t pi_minor_call_table_common[] = {
> 	{ ircomm_param_service_type, PV_INT_8_BITS },
> 	{ ircomm_param_port_type,    PV_INT_8_BITS },
> 	{ ircomm_param_port_name,    PV_STRING }
> };
> -static pi_minor_info_t pi_minor_call_table_non_raw[] = {
> +static const pi_minor_info_t pi_minor_call_table_non_raw[] = {
> 	{ ircomm_param_data_rate,    PV_INT_32_BITS | PV_BIG_ENDIAN },
> 	{ ircomm_param_data_format,  PV_INT_8_BITS },
> 	{ ircomm_param_flow_control, PV_INT_8_BITS },
> @@ -74,13 +74,13 @@ static pi_minor_info_t pi_minor_call_table_non_raw[] = {
> 	{ ircomm_param_enq_ack,      PV_INT_16_BITS },
> 	{ ircomm_param_line_status,  PV_INT_8_BITS }
> };
> -static pi_minor_info_t pi_minor_call_table_9_wire[] = {
> +static const pi_minor_info_t pi_minor_call_table_9_wire[] = {
> 	{ ircomm_param_dte,          PV_INT_8_BITS },
> 	{ ircomm_param_dce,          PV_INT_8_BITS },
> 	{ ircomm_param_poll,         PV_NO_VALUE },
> };
> 
> -static pi_major_info_t pi_major_call_table[] = {
> +static const pi_major_info_t pi_major_call_table[] = {
> 	{ pi_minor_call_table_common,  3 },
> 	{ pi_minor_call_table_non_raw, 6 },
> 	{ pi_minor_call_table_9_wire,  3 }
> diff --git a/net/irda/irttp.c b/net/irda/irttp.c
> index 3ef0b08..b6ab41d 100644
> --- a/net/irda/irttp.c
> +++ b/net/irda/irttp.c
> @@ -71,11 +71,13 @@ static void irttp_status_indication(void *instance,
> 				    LINK_STATUS link, LOCK_STATUS lock);
> 
> /* Information for parsing parameters in IrTTP */
> -static pi_minor_info_t pi_minor_call_table[] = {
> +static const pi_minor_info_t pi_minor_call_table[] = {
> 	{ NULL, 0 },                                             /* 0x00 */
> 	{ irttp_param_max_sdu_size, PV_INTEGER | PV_BIG_ENDIAN } /* 0x01 */
> };
> -static pi_major_info_t pi_major_call_table[] = { { pi_minor_call_table, 2 } };
> +static const pi_major_info_t pi_major_call_table[] = {
> +	{ pi_minor_call_table, 2 }
> +};
> static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 };
> 
> /************************ GLOBAL PROCEDURES ************************/
> diff --git a/net/irda/parameters.c b/net/irda/parameters.c
> index 006786b..16ce32f 100644
> --- a/net/irda/parameters.c
> +++ b/net/irda/parameters.c
> @@ -52,7 +52,7 @@ static int irda_insert_no_value(void *self, __u8 *buf, int len, __u8 pi,
> static int irda_param_unpack(__u8 *buf, char *fmt, ...);
> 
> /* Parameter value call table. Must match PV_TYPE */
> -static PV_HANDLER pv_extract_table[] = {
> +static const PV_HANDLER pv_extract_table[] = {
> 	irda_extract_integer, /* Handler for any length integers */
> 	irda_extract_integer, /* Handler for 8  bits integers */
> 	irda_extract_integer, /* Handler for 16 bits integers */
> @@ -62,7 +62,7 @@ static PV_HANDLER pv_extract_table[] = {
> 	irda_extract_no_value /* Handler for no value parameters */
> };
> 
> -static PV_HANDLER pv_insert_table[] = {
> +static const PV_HANDLER pv_insert_table[] = {
> 	irda_insert_integer, /* Handler for any length integers */
> 	irda_insert_integer, /* Handler for 8  bits integers */
> 	irda_insert_integer, /* Handler for 16 bits integers */
> @@ -449,7 +449,7 @@ static int irda_param_unpack(__u8 *buf, char *fmt, ...)
> int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len,
> 		      pi_param_info_t *info)
> {
> -	pi_minor_info_t *pi_minor_info;
> +	const pi_minor_info_t *pi_minor_info;
> 	__u8 pi_minor;
> 	__u8 pi_major;
> 	int type;
> @@ -504,7 +504,7 @@ EXPORT_SYMBOL(irda_param_insert);
> static int irda_param_extract(void *self, __u8 *buf, int len,
> 			      pi_param_info_t *info)
> {
> -	pi_minor_info_t *pi_minor_info;
> +	const pi_minor_info_t *pi_minor_info;
> 	__u8 pi_minor;
> 	__u8 pi_major;
> 	int type;
> diff --git a/net/irda/qos.c b/net/irda/qos.c
> index 5ed6c9a..25ba850 100644
> --- a/net/irda/qos.c
> +++ b/net/irda/qos.c
> @@ -122,7 +122,7 @@ static __u32 max_line_capacities[10][4] = {
> 	{ 800000, 400000, 160000, 80000 }, /* 16000000 bps */
> };
> 
> -static pi_minor_info_t pi_minor_call_table_type_0[] = {
> +static const pi_minor_info_t pi_minor_call_table_type_0[] = {
> 	{ NULL, 0 },
> /* 01 */{ irlap_param_baud_rate,       PV_INTEGER | PV_LITTLE_ENDIAN },
> 	{ NULL, 0 },
> @@ -134,7 +134,7 @@ static pi_minor_info_t pi_minor_call_table_type_0[] = {
> /* 08 */{ irlap_param_link_disconnect, PV_INT_8_BITS }
> };
> 
> -static pi_minor_info_t pi_minor_call_table_type_1[] = {
> +static const pi_minor_info_t pi_minor_call_table_type_1[] = {
> 	{ NULL, 0 },
> 	{ NULL, 0 },
> /* 82 */{ irlap_param_max_turn_time,   PV_INT_8_BITS },
> @@ -144,7 +144,7 @@ static pi_minor_info_t pi_minor_call_table_type_1[] = {
> /* 86 */{ irlap_param_min_turn_time,   PV_INT_8_BITS },
> };
> 
> -static pi_major_info_t pi_major_call_table[] = {
> +static const pi_major_info_t pi_major_call_table[] = {
> 	{ pi_minor_call_table_type_0, 9 },
> 	{ pi_minor_call_table_type_1, 7 },
> };
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Mark Rustad, Networking Division, Intel Corporation


[-- Attachment #2: Message signed with OpenPGP using GPGMail --]
[-- Type: application/pgp-signature, Size: 841 bytes --]

^ permalink raw reply

* [PATCH net-next RESEND] net: Do not call ndo_dflt_fdb_dump if ndo_fdb_dump is defined.
From: Hubert Sokolowski @ 2014-12-10 19:37 UTC (permalink / raw)
  To: netdev

This change restores the semantic that was present
before 5e6d243587990a588143b9da3974833649595587
"bridge: netlink dump interface at par with brctl"
on how ndo_dflt_fdb_dump is called.
This semantic is still used for add and del operations
so let's keep it consistent.
Driver can still call ndo_dflt_fdb_dump from inside
its own fdb_dump routine when needed.

Signed-off-by: Hubert Sokolowski <h.sokolowski@wit.edu.pl>
---
 net/core/rtnetlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index eaa057f..a9e5c37 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2692,10 +2692,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 							 idx);
 		}

-		idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
 		if (dev->netdev_ops->ndo_fdb_dump)
 			idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev,
 							    idx);
+		else
+			idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);

 		cops = NULL;
 	}
-- 
1.9.3

^ permalink raw reply related

* Re: [PATCH] irda: Convert function pointer arrays and uses to const
From: Joe Perches @ 2014-12-10 19:38 UTC (permalink / raw)
  To: Rustad, Mark D; +Cc: Samuel Ortiz, David S. Miller, netdev, LKML
In-Reply-To: <3333024A-D21C-41E2-80ED-7590CE6E9BD1@intel.com>

On Wed, 2014-12-10 at 19:32 +0000, Rustad, Mark D wrote:
> On Dec 10, 2014, at 10:28 AM, Joe Perches <joe@perches.com> wrote:
[]
> > diff --git a/include/net/irda/parameters.h b/include/net/irda/parameters.h
[]
> > typedef struct {
> > -	pi_minor_info_t *pi_minor_call_table;
> > +	const pi_minor_info_t *pi_minor_call_table;
> 
> Might you want to go a little further and make it:
> 	const pi_minor_into_t * const pi_minor_call_table;
> so that the pointer itself is also constant? That could apply to some others below as well.
> 
> > 	int len;
> > } pi_major_info_t;

I don't think that's necessary as all the pi_major_info_t uses
become const and this is a typedef, but if you want to, go ahead.

^ permalink raw reply

* Re: [PATCH net v6 7/7] libcxgbi: free skb after debug prints
From: Sergei Shtylyov @ 2014-12-10 19:38 UTC (permalink / raw)
  To: Karen Xie, linux-scsi, netdev
  Cc: hariprasad, anish, hch, James.Bottomley, michaelc, davem
In-Reply-To: <201412101625.sBAGPSXC011110@localhost6.localdomain6>

Hello.

On 12/10/2014 07:25 PM, Karen Xie wrote:

> [PATCH net v6 7/7] libcxgbi: free skb after debug prints

    Please, do not duplicate the subject in the changelog -- DaveM would have 
to edit it out by hand.

> From: Karen Xie <kxie@chelsio.com>

> The debug print was accessing the skb after it was freed.

> Signed-off-by: Karen Xie <kxie@chelsio.com>

WBR, Sergei

^ permalink raw reply

* Re: [PATCH v2] net: introduce helper macro for_each_cmsghdr
From: David Miller @ 2014-12-10 19:44 UTC (permalink / raw)
  To: guz.fnst; +Cc: netdev, linux-kernel, joe
In-Reply-To: <20141210.134804.1383875260695429574.davem@davemloft.net>

From: David Miller <davem@davemloft.net>
Date: Wed, 10 Dec 2014 13:48:04 -0500 (EST)

> From: Gu Zheng <guz.fnst@cn.fujitsu.com>
> Date: Wed, 10 Dec 2014 13:36:25 +0800
> 
>> Introduce helper macro for_each_cmsghdr as a wrapper of the enumerating
>> cmsghdr from msghdr, just cleanup. 
>> 
>> Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
> 
> Applied, thanks.

This breaks the build, I'm reverting.

You cannot use your new macros in
Documentation/networking/timestamping/txtimestamp.c, that is a
userland program and the header you are adding your helper to is not
available to userspace.

This also means you didn't sufficiently test the build of your
changes.

Documentation/networking/timestamping/timestamping.c: In function ‘printpacket’:
Documentation/networking/timestamping/timestamping.c:172:2: warning: implicit declaration of function ‘for_each_cmsghdr’ [-Wimplicit-function-declaration]
Documentation/networking/timestamping/timestamping.c:172:30: error: expected ‘;’ before ‘{’ token
Documentation/networking/timestamping/timestamping.c:161:18: warning: unused variable ‘ts’ [-Wunused-variable]
Documentation/networking/timestamping/timestamping.c:160:17: warning: unused variable ‘tv’ [-Wunused-variable]
make[3]: *** [Documentation/networking/timestamping/timestamping] Error 1
make[3]: *** Waiting for unfinished jobs....
Documentation/networking/timestamping/txtimestamp.c: In function ‘__recv_errmsg_cmsg’:
Documentation/networking/timestamping/txtimestamp.c:187:2: warning: implicit declaration of function ‘for_each_cmsghdr’ [-Wimplicit-function-declaration]
Documentation/networking/timestamping/txtimestamp.c:187:19: error: ‘cmsg’ undeclared (first use in this function)
Documentation/networking/timestamping/txtimestamp.c:187:19: note: each undeclared identifier is reported only once for each function it appears in
Documentation/networking/timestamping/txtimestamp.c:187:30: error: expected ‘;’ before ‘{’ token
Documentation/networking/timestamping/txtimestamp.c:185:18: warning: unused variable ‘cm’ [-Wunused-variable]
Documentation/networking/timestamping/txtimestamp.c:184:27: warning: unused variable ‘tss’ [-Wunused-variable]
Documentation/networking/timestamping/txtimestamp.c:183:28: warning: unused variable ‘serr’ [-Wunused-variable]
Documentation/networking/timestamping/txtimestamp.c: At top level:
Documentation/networking/timestamping/txtimestamp.c:123:13: warning: ‘print_timestamp’ defined but not used [-Wunused-function]
Documentation/networking/timestamping/txtimestamp.c:159:13: warning: ‘print_pktinfo’ defined but not used [-Wunused-function]


^ permalink raw reply

* Re: [PATCH net-next] tipc: fix broadcast wakeup contention after congestion
From: David Miller @ 2014-12-10 19:46 UTC (permalink / raw)
  To: richard.alpe; +Cc: netdev, tipc-discussion, erik.hugne
In-Reply-To: <1418201214-30767-1-git-send-email-richard.alpe@ericsson.com>

From: <richard.alpe@ericsson.com>
Date: Wed, 10 Dec 2014 09:46:54 +0100

> From: Richard Alpe <richard.alpe@ericsson.com>
> 
> commit 908344cdda80 ("tipc: fix bug in multicast congestion handling")
> introduced a race in the broadcast link wakeup functionality.
> 
> This patch eliminates this broadcast link wakeup race caused by
> operation on the wakeup list without proper locking. If this race
> hit and corrupted the list all subsequent wakeup messages would be
> lost, resulting in a considerable memory leak.
> 
> Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
> Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>

Applied, thank you.

^ permalink raw reply

* Re: [PATCH] net: openvswitch: Support masked set actions.
From: David Miller @ 2014-12-10 19:48 UTC (permalink / raw)
  To: jrajahalme; +Cc: netdev, dev
In-Reply-To: <1418170225-9328-1-git-send-email-jrajahalme@nicira.com>

From: Jarno Rajahalme <jrajahalme@nicira.com>
Date: Tue,  9 Dec 2014 16:10:25 -0800

> OVS userspace already probes the openvswitch kernel module for
> OVS_ACTION_ATTR_SET_MASKED support.  This patch adds the kernel module
> implementation of masked set actions.
> 
> The existing set action sets many fields at once.  When only a subset
> of the IP header fields, for example, should be modified, all the IP
> fields need to be exact matched so that the other field values can be
> copied to the set action.  A masked set action allows modification of
> an arbitrary subset of the supported header bits without requiring the
> rest to be matched.
> 
> Masked set action is now supported for all writeable key types, except
> for the tunnel key.  The set tunnel action is an exception as any
> input tunnel info is cleared before action processing starts, so there
> is no tunnel info to mask.
> 
> The kernel module converts all (non-tunnel) set actions to masked set
> actions.  This makes action processing more uniform, and results in
> less branching and duplicating the action processing code.  When
> returning actions to userspace, the fully masked set actions are
> converted back to normal set actions.  We use a kernel internal action
> code to be able to tell the userspace provided and converted masked
> set actions apart.
> 
> Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>

How does this work, should I be waiting for a signoff or ACK from
Pravin before applying this directly to my tree?

^ permalink raw reply

* Re: linux-next: manual merge of the net-next tree with the net tree
From: David Miller @ 2014-12-10 19:49 UTC (permalink / raw)
  To: sfr; +Cc: netdev, Thomas.Lendacky, linux-next, linux-kernel
In-Reply-To: <20141210122036.133457c9@canb.auug.org.au>

From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 10 Dec 2014 12:20:36 +1100

> Today's linux-next merge of the net-next tree got a conflict in
> drivers/net/ethernet/amd/xgbe/xgbe-desc.c between commit 03ccc4c0a9da
> ("amd-xgbe: Do not clear interrupt indicator") from the net tree and
> commit c9f140ebb008 ("amd-xgbe: Separate Tx/Rx ring data fields into
> new structs") from the net-next tree.
> 
> I fixed it up (see below) and can carry the fix as necessary (no action
> is required).

As you have seen there are a lot of merge hassles to sort out between
the 'net' and 'net-next' tree right now, probably I just added a few
more :-)

I'll try to do the merge by the end of today so that these headaches
go away for you.

Thanks.

^ permalink raw reply

* Re: [PATCH net-next] bridge: Add ability to always enable TSO/UFO
From: David Miller @ 2014-12-10 19:50 UTC (permalink / raw)
  To: makita.toshiaki; +Cc: netdev, bridge
In-Reply-To: <1418179394-4470-2-git-send-email-makita.toshiaki@lab.ntt.co.jp>

From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Wed, 10 Dec 2014 11:43:14 +0900

> -	features &= ~NETIF_F_ONE_FOR_ALL;
> +	features &= ~NETIF_F_ONE_FOR_ALL | NETIF_F_GSO_SOFTWARE;

I don't think this is the expression you intend to use.

I think you meant:

	features &= ~(NETIF_F_ONE_FOR_ALL | NETIF_F_GSO_SOFTWARE);

Or:

	features = ~NETIF_F_ONE_FOR_ALL;
	features |= NETIF_F_GSO_SOFTWARE;

^ permalink raw reply

* Re: [RFC PATCH 0/3] Faster than SLAB caching of SKBs with qmempool (backed by alf_queue)
From: Christoph Lameter @ 2014-12-10 19:51 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: netdev, linux-kernel, linux-mm, linux-api, Eric Dumazet,
	David S. Miller, Hannes Frederic Sowa, Alexander Duyck,
	Alexei Starovoitov, Paul E. McKenney, Mathieu Desnoyers,
	Steven Rostedt
In-Reply-To: <20141210141332.31779.56391.stgit@dragon>

On Wed, 10 Dec 2014, Jesper Dangaard Brouer wrote:

> One of the building blocks for achieving this speedup is a cmpxchg
> based Lock-Free queue that supports bulking, named alf_queue for
> Array-based Lock-Free queue.  By bulking elements (pointers) from the
> queue, the cost of the cmpxchg (approx 8 ns) is amortized over several
> elements.

This is a bit of an issue since the design of the SLUB allocator is such
that you should pick up an object, apply some processing and then take the
next one. The fetching of an object warms up the first cacheline and this
is tied into the way free objects are linked in SLUB.

So a bulk fetch from SLUB will not that effective and cause the touching
of many cachelines if we are dealing with just a few objects. If we are
looking at whole slab pages with all objects then SLUB can be effective
since we do not have to build up the linked pointer structure in each
page. SLAB has a different architecture there and a bulk fetch there is
possible without touching objects even for small sets since the freelist
management is separate from the objects.

If you do this bulking then you will later access cache cold objects?
Doesnt that negate the benefit that you gain? Or are these objects written
to by hardware and therefore by necessity cache cold?

We could provide a faster bulk alloc/free function.

	int kmem_cache_alloc_array(struct kmem_cache *s, gfp_t flags,
		size_t objects, void **array)

and this could be optimized by each slab allocator to provide fast
population of objects in that array. We then assume that the number of
objects is in the hundreds or so right?

The corresponding free function

	void kmem_cache_free_array(struct kmem_cache *s,
		size_t objects, void **array)


I think the queue management of the array can be improved by using a
similar technique as used the SLUB allocator using the cmpxchg_local.
cmpxchg_local is much faster than a full cmpxchg and we are operating on
per cpu structures anyways. So the overhead could still be reduced.


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox