Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 1/4] net: tcp: refactor reinitialization of congestion control
From: Daniel Borkmann @ 2014-12-05 15:24 UTC (permalink / raw)
  To: davem; +Cc: hannes, fw, netdev
In-Reply-To: <1417793092-6263-1-git-send-email-dborkman@redhat.com>

We can just move this to an extra function and make the code
a bit more readable, no functional change.

Joint work with Florian Westphal.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
---
 net/ipv4/tcp_cong.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 27ead0d..38f2f8a 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -107,6 +107,18 @@ void tcp_init_congestion_control(struct sock *sk)
 		icsk->icsk_ca_ops->init(sk);
 }
 
+static void tcp_reinit_congestion_control(struct sock *sk,
+					  const struct tcp_congestion_ops *ca)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	tcp_cleanup_congestion_control(sk);
+	icsk->icsk_ca_ops = ca;
+
+	if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
+		icsk->icsk_ca_ops->init(sk);
+}
+
 /* Manage refcounts on socket close. */
 void tcp_cleanup_congestion_control(struct sock *sk)
 {
@@ -262,21 +274,13 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 #endif
 	if (!ca)
 		err = -ENOENT;
-
 	else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
 		   ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
 		err = -EPERM;
-
 	else if (!try_module_get(ca->owner))
 		err = -EBUSY;
-
-	else {
-		tcp_cleanup_congestion_control(sk);
-		icsk->icsk_ca_ops = ca;
-
-		if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
-			icsk->icsk_ca_ops->init(sk);
-	}
+	else
+		tcp_reinit_congestion_control(sk, ca);
  out:
 	rcu_read_unlock();
 	return err;
-- 
1.7.11.7

^ permalink raw reply related

* Re: [PATCH iproute2 REGRESSIONS] ss: Fix layout issues introduced by regression
From: Vadim Kochan @ 2014-12-05 15:14 UTC (permalink / raw)
  To: netdev@vger.kernel.org; +Cc: Vadim Kochan
In-Reply-To: <1417778169-12339-1-git-send-email-vadim4j@gmail.com>

On Fri, Dec 5, 2014 at 1:16 PM, Vadim Kochan <vadim4j@gmail.com> wrote:
> This patch fixes the following issues which was introduced
> by me in commits:
>
>     #1 (2dc854854b7f1b) ss: Fixed broken output for Netlink 'Peer Address:Port' column
>     ISSUE: Broken layout when all sockets are printed out
>
>     #2 (eef43b5052afb7) ss: Identify more netlink protocol names
>     ISSUE: PID is not printed if only numbers output was specified (-n)
>
> Also aligned the width of the local/peer ports to be more wider.
>
> I tested with a lot of option combinations (I may miss some test cases),
> but layout seems to me better even on the previous released version
> of iproute2/ss.
>
> Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
> ---
>  misc/ss.c | 30 ++++++++++--------------------
>  1 file changed, 10 insertions(+), 20 deletions(-)

Send v2 with updated commit message and subject.

Ragards,

^ permalink raw reply

* [PATCH iproute2 REGRESSIONS v2] ss: Fix layout/output issues introduced by regression
From: Vadim Kochan @ 2014-12-05 15:03 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan

This patch fixes the following issues which was introduced by me in commits:

    #1 (2dc854854b7f1b) ss: Fixed broken output for Netlink 'Peer Address:Port' column
    ISSUE: Broken layout when all sockets are printed out

    #2 (eef43b5052afb7) ss: Identify more netlink protocol names
    ISSUE: Protocol id is not printed if 'numbers only' output was specified (-n)

Also aligned the width of the local/peer ports to be more wider.

I tested with a lot of option combinations (I may miss some test cases),
but layout seems to me better than the previous released version of iproute2/ss.

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
---
 misc/ss.c | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/misc/ss.c b/misc/ss.c
index a99294d..8abaaff 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -101,8 +101,6 @@ int state_width;
 int addrp_width;
 int addr_width;
 int serv_width;
-int paddr_width;
-int pserv_width;
 int screen_width;
 
 static const char *TCP_PROTO = "tcp";
@@ -2912,11 +2910,12 @@ static void netlink_show_one(struct filter *f,
 		printf("%-*s ", state_width, "UNCONN");
 	printf("%-6d %-6d ", rq, wq);
 
-	if (resolve_services)
-	{
+	if (resolve_services) {
 		printf("%*s:", addr_width, nl_proto_n2a(prot, prot_name,
 					sizeof(prot_name)));
-	}
+	} else
+		printf("%*d:", addr_width, prot);
+
 
 	if (pid == -1) {
 		printf("%-*s ", serv_width, "*");
@@ -2947,10 +2946,10 @@ static void netlink_show_one(struct filter *f,
 
 	if (state == NETLINK_CONNECTED) {
 		printf("%*d:%-*d",
-		       paddr_width, dst_group, pserv_width, dst_pid);
+		       addr_width, dst_group, serv_width, dst_pid);
 	} else {
 		printf("%*s*%-*s",
-		       paddr_width, "", pserv_width, "");
+		       addr_width, "", serv_width, "");
 	}
 
 	char *pid_context = NULL;
@@ -3684,22 +3683,13 @@ int main(int argc, char *argv[])
 		printf("%-*s ", state_width, "State");
 	printf("%-6s %-6s ", "Recv-Q", "Send-Q");
 
-	paddr_width = addr_width;
-	pserv_width = serv_width;
-
-	/* Netlink service column can be resolved as process name/pid thus it
-	 * can be much wider than address column which is just a
-	 * protocol name/id.
-	 */
-	if (current_filter.dbs & (1<<NETLINK_DB)) {
-		serv_width = addr_width - 10;
-		paddr_width = 13;
-		pserv_width = 13;
-	}
+	/* Make enough space for the local/remote port field */
+	addr_width -= 13;
+	serv_width += 13;
 
 	printf("%*s:%-*s %*s:%-*s\n",
 	       addr_width, "Local Address", serv_width, "Port",
-	       paddr_width, "Peer Address", pserv_width, "Port");
+	       addr_width, "Peer Address", serv_width, "Port");
 
 	fflush(stdout);
 
-- 
2.1.3

^ permalink raw reply related

* Re: [PATCH 0/2] DMA API usage fixes in gianfar
From: Claudiu Manoil @ 2014-12-05 14:48 UTC (permalink / raw)
  To: Arseny Solokha; +Cc: netdev, linux-kernel, haokexin
In-Reply-To: <1417775874-17775-1-git-send-email-asolokha@kb.kras.ru>

On 12/5/2014 12:37 PM, Arseny Solokha wrote:
> Hello.
>
> This patch set fixes DMA API usage issues in gianfar ethernet driver
> reported by the kernel w/ DMA API debug enabled.
>
> There were even reports that the kernel sometimes oopsed in the past
> because of kernel paging request handling failures, though it was likely
> observed on some ancient versions. And while I personally doesn't have
> any strong evidence of this, there's no reason to let these possible
> failures live any longer.
>
> Arseny Solokha (2):
>    gianfar: handle map error in gfar_new_rxbdp()
>    gianfar: handle map error in gfar_start_xmit()
>
>   drivers/net/ethernet/freescale/gianfar.c | 49 ++++++++++++++++++++++++++------
>   1 file changed, 41 insertions(+), 8 deletions(-)
>

Thanks but please note that Kevin Hao already provided a fix for this issue:
http://permalink.gmane.org/gmane.linux.network/336274

His patch was only deferred for testing (and bandwidth) reasons.
I will try to resend his patch to the netdev list today if possible,
I apologize for the delay.
Also note that there are some issues with your patches.
As said before, I will resubmit Kevin Hao's patch.

Thanks,
Claudiu

^ permalink raw reply

* Re: [PATCH 2/2] gianfar: handle map error in gfar_start_xmit()
From: Claudiu Manoil @ 2014-12-05 14:51 UTC (permalink / raw)
  To: Arseny Solokha; +Cc: netdev, linux-kernel
In-Reply-To: <1417775874-17775-3-git-send-email-asolokha@kb.kras.ru>

On 12/5/2014 12:37 PM, Arseny Solokha wrote:
> From: Arseny Solokha <asolokha@kb.kras.ru>
>
> When DMA-API debugging is enabled in the kernel, it spews the following
> upon upping the link:
>
> WARNING: at lib/dma-debug.c:1135
> Modules linked in:
> CPU: 0 PID: 0 Comm: swapper/0 Tainted: G        W  O   3.18.0-rc7 #1
> task: c0720340 ti: effe2000 task.ti: c0750000
> NIP: c01d7c1c LR: c01d7c1c CTR: c02250fc
> REGS: effe3d40 TRAP: 0700   Tainted: G        W  O    (3.18.0-rc7)
> MSR: 00021000 <CE,ME>  CR: 22044242  XER: 20000000
>
> GPR00: c01d7c1c effe3df0 c0720340 00000095 c201e404 c201e9f0 00021000 01a9d000
> GPR08: 00000007 00000000 01a9d000 00000313 22044242 00583f60 05f41012 00000000
> GPR16: 00000000 000000ff 00000000 00000000 00000000 c5dc3b40 c5677720 00000001
> GPR24: c0730000 00029000 c0d0d828 c072c394 effe3e48 c075baec c0d0f020 ee31e600
> NIP [c01d7c1c] check_unmap+0x5b4/0xae4
> LR [c01d7c1c] check_unmap+0x5b4/0xae4
> Call Trace:
> [effe3df0] [c01d7c1c] check_unmap+0x5b4/0xae4 (unreliable)
> [effe3e40] [c01d81c4] debug_dma_unmap_page+0x78/0x8c
> [effe3ec0] [c0286ba0] gfar_clean_tx_ring+0x120/0x3c0
> [effe3f30] [c0286f90] gfar_poll_tx_sq+0x48/0x94
> o[effe3f50] [c030c388] net_rx_action+0x130/0x1ac
> [effe3f80] [c00319e0] __do_softirq+0x134/0x240
> [effe3fe0] [c0031dd0] irq_exit+0xa4/0xc8
> [effe3ff0] [c000e01c] call_do_irq+0x24/0x3c
> [c0751e70] [c0004a04] do_IRQ+0x8c/0x108
> [c0751e90] [c0010068] ret_from_except+0x0/0x18
> --- interrupt: 501 at arch_cpu_idle+0x24/0x5c
>      LR = arch_cpu_idle+0x24/0x5c
> [c0751f50] [c007d2e4] rcu_idle_enter+0xc8/0xcc (unreliable)
> [c0751f60] [c006587c] cpu_startup_entry+0x1d4/0x29c
> [c0751fb0] [c054399c] start_kernel+0x338/0x34c
> [c0751ff0] [c000046c] set_ivor+0x154/0x190
> Instruction dump:
> 394adb30 80fc0018 811c001c 3c60c04f 5529103a 7cca482e 38639e60 813c0020
> 815c0024 90c10008 4cc63182 48240b01 <0fe00000> 3c60c04f 3863971c 4cc63182
> ---[ end trace 3eb7bf62ba1b80f9 ]---
> Mapped at:
>   [<c0287420>] gfar_start_xmit+0x424/0x910
>   [<c030e964>] dev_hard_start_xmit+0x20c/0x3d8
>   [<c032cc3c>] sch_direct_xmit+0x124/0x22c
>   [<c030ede8>] __dev_queue_xmit+0x2b8/0x674
>
> Or the following upon starting transmission of some large chunks
> of data:
>
> fsl-gianfar ffe25000.ethernet: DMA-API: device driver failed to check map error[device address=0x0000000005fa8000]
> ------------[ cut here ]------------
> WARNING: at lib/dma-debug.c:1135
> Modules linked in:
> CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           O   3.18.0-rc7 #35
> task: c071d340 ti: effe2000 task.ti: c074e000
> NIP: c01d7c1c LR: c01d7c1c CTR: c022339c
> REGS: effe3d40 TRAP: 0700   Tainted: G           O    (3.18.0-rc7)
> MSR: 00021000 <CE,ME>  CR: 22044242  XER: 20000000
>
> GPR00: c01d7c1c effe3df0 c071d340 00000094 00000001 c0071750 00000000 00000001
> GPR08: 00000000 00000000 effe2000 00000000 20044242 00581f60 05fa8000 000001c4
> GPR16: 00000000 000000ff 00000000 000000e4 00000039 c5b1a9c0 c5679c60 00000002
> GPR24: c0730000 00029000 c0d0c528 c0729394 effe3e48 c0759aec c0d0d020 ee315900
> NIP [c01d7c1c] check_unmap+0x5b4/0xae4
> LR [c01d7c1c] check_unmap+0x5b4/0xae4
> Call Trace:
> [effe3df0] [c01d7c1c] check_unmap+0x5b4/0xae4 (unreliable)
> [effe3e40] [c01d81c4] debug_dma_unmap_page+0x78/0x8c
> [effe3ec0] [c0284c78] gfar_clean_tx_ring+0x1b4/0x3c0
> [effe3f30] [c0284fd4] gfar_poll_tx_sq+0x48/0x94
> [effe3f50] [c030a5c4] net_rx_action+0x130/0x1ac
> [effe3f80] [c00319e0] __do_softirq+0x134/0x240
> [effe3fe0] [c0031dd0] irq_exit+0xa4/0xc8
> [effe3ff0] [c000e01c] call_do_irq+0x24/0x3c
> [c074fe70] [c0004a04] do_IRQ+0x8c/0x108
> [c074fe90] [c0010068] ret_from_except+0x0/0x18
> --- interrupt: 501 at arch_cpu_idle+0x24/0x5c
>      LR = arch_cpu_idle+0x24/0x5c
> [c074ff50] [c007d2e4] rcu_idle_enter+0xc8/0xcc (unreliable)
> [c074ff60] [c006587c] cpu_startup_entry+0x1d4/0x29c
> [c074ffb0] [c054199c] start_kernel+0x338/0x34c
> [c074fff0] [c000046c] set_ivor+0x154/0x190
> Instruction dump:
> 394abb30 80fc0018 811c001c 3c60c04e 5529103a 7cca482e 386379f0 813c0020
> 815c0024 90c10008 4cc63182 4823ed39 <0fe00000> 3c60c04e 386372ac 4cc63182
> ---[ end trace 008c59ca7ca1f712 ]---
> Mapped at:
>   [<c0285264>] gfar_start_xmit+0x224/0x95c
>   [<c030cba0>] dev_hard_start_xmit+0x20c/0x3d8
>   [<c032ae78>] sch_direct_xmit+0x124/0x22c
>   [<c032b008>] __qdisc_run+0x88/0x1c0
>   [<c0307920>] net_tx_action+0xf0/0x19c
>
> Ignore these mapping failures in hope we'll have more luck next time.
>
> Signed-off-by: Arseny Solokha <asolokha@kb.kras.ru>
> ---
>   drivers/net/ethernet/freescale/gianfar.c | 17 +++++++++++++++--
>   1 file changed, 15 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
> index f34ca55..9ea887e 100644
> --- a/drivers/net/ethernet/freescale/gianfar.c
> +++ b/drivers/net/ethernet/freescale/gianfar.c
> @@ -2296,6 +2296,12 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
>   						   0,
>   						   frag_len,
>   						   DMA_TO_DEVICE);
> +			if (unlikely(dma_mapping_error(priv->dev, bufaddr))) {
> +				/* As DMA mapping failed, pretend the TX path
> +				 * is busy to retry later
> +				 */
> +				return NETDEV_TX_BUSY;

This is not right.
Proper bailout code missing: un-mapping of skb fragments and 
de-allocation of resources.
This is not a TX_BUSY error condition, it's a system failure.
(will resubmit this one:
http://permalink.gmane.org/gmane.linux.network/336274)

> +			}
>
>   			/* set the TxBD length and buffer pointer */
>   			txbdp->bufPtr = bufaddr;
> @@ -2345,8 +2351,15 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
>   		fcb->ptp = 1;
>   	}
>
> -	txbdp_start->bufPtr = dma_map_single(priv->dev, skb->data,
> -					     skb_headlen(skb), DMA_TO_DEVICE);
> +	bufaddr = dma_map_single(priv->dev, skb->data, skb_headlen(skb),
> +				 DMA_TO_DEVICE);
> +	if (unlikely(dma_mapping_error(priv->dev, bufaddr))) {
> +		/* As DMA mapping failed, pretend the TX path is busy to retry
> +		 * later
> +		 */
> +		return NETDEV_TX_BUSY;

same here

> +	}
> +	txbdp_start->bufPtr = bufaddr;
>
>   	/* If time stamping is requested one additional TxBD must be set up. The
>   	 * first TxBD points to the FCB and must have a data length of
>

^ permalink raw reply

* [patch net-next 2/2] net: sched: cls: use nla_nest_cancel instead of nlmsg_trim
From: Jiri Pirko @ 2014-12-05 14:50 UTC (permalink / raw)
  To: netdev; +Cc: davem, jhs
In-Reply-To: <1417791023-28124-1-git-send-email-jiri@resnulli.us>

To cancel nesting, this function is more convenient.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
 net/sched/cls_cgroup.c  | 3 +--
 net/sched/cls_flow.c    | 2 +-
 net/sched/cls_fw.c      | 3 +--
 net/sched/cls_route.c   | 3 +--
 net/sched/cls_rsvp.h    | 3 +--
 net/sched/cls_tcindex.c | 3 +--
 6 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 741bfa7..221697a 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -177,7 +177,6 @@ static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long
 			   struct sk_buff *skb, struct tcmsg *t)
 {
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
-	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 
 	t->tcm_handle = head->handle;
@@ -198,7 +197,7 @@ static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long
 	return skb->len;
 
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 8e22718..15d68f2 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -638,7 +638,7 @@ static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 nla_put_failure:
-	nlmsg_trim(skb, nest);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 23fda2a..a5269f7 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -356,7 +356,6 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f = (struct fw_filter *)fh;
-	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 
 	if (f == NULL)
@@ -397,7 +396,7 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 098a273..2ecd246 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -593,7 +593,6 @@ static int route4_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 		       struct sk_buff *skb, struct tcmsg *t)
 {
 	struct route4_filter *f = (struct route4_filter *)fh;
-	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 	u32 id;
 
@@ -635,7 +634,7 @@ static int route4_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index b7af362..edd8ade 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -653,7 +653,6 @@ static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 {
 	struct rsvp_filter *f = (struct rsvp_filter *)fh;
 	struct rsvp_session *s;
-	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 	struct tc_rsvp_pinfo pinfo;
 
@@ -694,7 +693,7 @@ static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 0d9d891..48d5e7b 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -489,7 +489,6 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
-	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 
 	pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
@@ -543,7 +542,7 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
-- 
1.9.3

^ permalink raw reply related

* [patch net-next 1/2] net: sched: cls_basic: fix error path in basic_change()
From: Jiri Pirko @ 2014-12-05 14:50 UTC (permalink / raw)
  To: netdev; +Cc: davem, jhs

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
 net/sched/cls_basic.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 7cf0a62..5aed341 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -178,10 +178,9 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 			return -EINVAL;
 	}
 
-	err = -ENOBUFS;
 	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
-	if (fnew == NULL)
-		goto errout;
+	if (!fnew)
+		return -ENOBUFS;
 
 	tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
 	err = -EINVAL;
-- 
1.9.3

^ permalink raw reply related

* Re: [PATCH] net: fix the flow limitation computation
From: Eric Dumazet @ 2014-12-05 14:49 UTC (permalink / raw)
  To: roy.qing.li, Willem de Bruijn; +Cc: netdev
In-Reply-To: <1417772919-17744-1-git-send-email-roy.qing.li@gmail.com>

On Fri, 2014-12-05 at 17:48 +0800, roy.qing.li@gmail.com wrote:
> From: Li RongQing <roy.qing.li@gmail.com>
> 
> Once RPS is enabled, the skb maybe enqueue to different CPU, so the
> flow limitation computation should use the enqueued CPU, not the local
> CPU
> 
> Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
> ---

CC Willem de Bruijn <willemb@google.com>, author of this mechanism, for
comments.

>  net/core/dev.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 945bbd0..e70507d 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -3250,7 +3250,7 @@ static int rps_ipi_queued(struct softnet_data *sd)
>  int netdev_flow_limit_table_len __read_mostly = (1 << 12);
>  #endif
>  
> -static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
> +static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen, int cpu)



>  {
>  #ifdef CONFIG_NET_FLOW_LIMIT
>  	struct sd_flow_limit *fl;
> @@ -3260,7 +3260,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
>  	if (qlen < (netdev_max_backlog >> 1))
>  		return false;
>  
> -	sd = this_cpu_ptr(&softnet_data);
> +	sd = &per_cpu(softnet_data, cpu);

What about passing sd instead of cpu, so that we do not have to compute
this again ?

^ permalink raw reply

* Re: [PATCH 1/1] net: macb: Remove obsolete comment from Kconfig
From: Nicolas Ferre @ 2014-12-05 14:42 UTC (permalink / raw)
  To: James Byrne, netdev, David Miller; +Cc: Cyrille Pitchen
In-Reply-To: <1417784633-13360-1-git-send-email-james.byrne@origamienergy.com>

Le 05/12/2014 14:03, James Byrne a écrit :
> The Kconfig file says that Gigabit mode is not supported, but it has been
> supported since commit 140b7552fdff04bbceeb842f0e04f0b4015fe97b ("net/macb:
> Add support for Gigabit Ethernet mode").
> 
> Signed-off-by: James Byrne <james.byrne@origamienergy.com>

Yes indeed:
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>

> ---
>  drivers/net/ethernet/cadence/Kconfig | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig
> index 9e089d2..6932be0 100644
> --- a/drivers/net/ethernet/cadence/Kconfig
> +++ b/drivers/net/ethernet/cadence/Kconfig
> @@ -35,8 +35,8 @@ config MACB
>  	---help---
>  	  The Cadence MACB ethernet interface is found on many Atmel AT32 and
>  	  AT91 parts.  This driver also supports the Cadence GEM (Gigabit
> -	  Ethernet MAC found in some ARM SoC devices).  Note: the Gigabit mode
> -	  is not yet supported.  Say Y to include support for the MACB/GEM chip.
> +	  Ethernet MAC found in some ARM SoC devices).  Say Y to include
> +	  support for the MACB/GEM chip.
>  
>  	  To compile this driver as a module, choose M here: the module
>  	  will be called macb.
> 


-- 
Nicolas Ferre

^ permalink raw reply

* Re: [PATCH 2/3] bridge: offload bridge port attributes to switch asic if feature flag set
From: Roopa Prabhu @ 2014-12-05 14:37 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: sfeldma, jhs, bcrl, tgraf, john.fastabend, stephen, linville,
	nhorman, nicolas.dichtel, vyasevic, f.fainelli, buytenh, aviadr,
	netdev, davem, shm, gospo
In-Reply-To: <20141205073840.GA1866@nanopsycho.orion>

On 12/4/14, 11:38 PM, Jiri Pirko wrote:
> Fri, Dec 05, 2014 at 03:26:40AM CET, roopa@cumulusnetworks.com wrote:
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> This allows offloading to switch asic without having the user to set
>> any flag. And this is done in the bridge driver to rollback kernel settings
>> on hw offload failure if required in the future.
>>
>> With this, it also makes sure a notification goes out only after the
>> attributes are set both in the kernel and hw.
>> ---
>> net/bridge/br_netlink.c |   27 ++++++++++++++++++++++++++-
>> 1 file changed, 26 insertions(+), 1 deletion(-)
>>
>> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
>> index 9f5eb55..ce173f0 100644
>> --- a/net/bridge/br_netlink.c
>> +++ b/net/bridge/br_netlink.c
>> @@ -407,9 +407,21 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
>> 				afspec, RTM_SETLINK);
>> 	}
>>
>> +	if ((dev->features & NETIF_F_HW_SWITCH_OFFLOAD) &&
>> +			dev->netdev_ops->ndo_bridge_setlink) {
>> +		int ret = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
> 	This (and I suspect other patches as well) has many issues which
> 	are pointed out by scripts/checkpatch.pl sctipts. For example
> 	here, there should be an empty line. Please run your patches by
> 	this script before you send them.
>
>> +		if (ret && ret != -EOPNOTSUPP) {
>> +			/* XXX Fix this in the future to rollback
>> +			 * kernel settings and return error
>> +			 */
>> +			br_warn(p->br, "error offloading bridge attributes "
>> +					"on port %u(%s)\n", (unsigned int) p->port_no,
>> +					p->dev->name);
>> +		}
>> +	}
>> +
>> 	if (err == 0)
>> 		br_ifinfo_notify(RTM_NEWLINK, p);
>> -
>> out:
>> 	return err;
>> }
>> @@ -433,6 +445,19 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)
>> 	err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
>> 			afspec, RTM_DELLINK);
>>
>> +	if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD
>> +			&& dev->netdev_ops->ndo_bridge_setlink) {
>> +		int ret = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
> 	c&p issue, there should be check for dellink here.
>
>> +		if (ret && ret != -EOPNOTSUPP) {
>> +			/* XXX Fix this in the future to rollback
>> +			 * kernel settings and return error
>> +			 */
>> +			br_warn(p->br, "error offloading bridge attributes "
>> +					"on port %u(%s)\n", (unsigned int) p->port_no,
>> +					p->dev->name);
>> +		}
>> +	}
>> +
>
> 	I agree with Scott that this code should be moved to rtnetlink.c

I moved it here to make rollback easier. Plus i don't want software 
notification to go out before hardware is programmed.
And the software notification goes out from here.

I don't intend to implement rollback in v2 (that will be a separate series).
  But, i do want to take care of the notification problem.

^ permalink raw reply

* Re: [PATCH][net-next] net: avoid to call skb_queue_len again
From: Eric Dumazet @ 2014-12-05 14:31 UTC (permalink / raw)
  To: Sergei Shtylyov; +Cc: roy.qing.li, netdev
In-Reply-To: <5481BBF2.40806@cogentembedded.com>

On Fri, 2014-12-05 at 17:06 +0300, Sergei Shtylyov wrote:
> Hello.
> 
> On 12/5/2014 12:49 PM, roy.qing.li@gmail.com wrote:
> 
> > From: Li RongQing <roy.qing.li@gmail.com>
> 
> > the queue length of sd->input_pkt_queue has been putted into qlen,
> 
>     s/putted/put/, it's irregular verb.
> 
> > and impossible to change, since hold the lock
> 
>     I can't parse that. Who holds the lock?

This thread/cpu holds the lock to manipulate input_pkt_queue.

Otherwise, the following would break horribly....

__skb_queue_tail(&sd->input_pkt_queue, skb);

^ permalink raw reply

* Re: [PATCH 1/3] netdev: introduce new NETIF_F_HW_SWITCH_OFFLOAD feature flag for switch device offloads
From: Roopa Prabhu @ 2014-12-05 14:16 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: sfeldma, jhs, bcrl, tgraf, john.fastabend, stephen, linville,
	nhorman, nicolas.dichtel, vyasevic, f.fainelli, buytenh, aviadr,
	netdev, davem, shm, gospo
In-Reply-To: <20141205074127.GB1866@nanopsycho.orion>

On 12/4/14, 11:41 PM, Jiri Pirko wrote:
> Fri, Dec 05, 2014 at 03:26:39AM CET, roopa@cumulusnetworks.com wrote:
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> This is a generic high level feature flag for all switch asic features today.
>>
>> switch drivers set this flag on switch ports. Logical devices like
>> bridge, bonds, vxlans can inherit this flag from their slaves/ports.
>
> Can you please elaborate on how exactly would this inheritance look
> like?

My thought there was, when a port with the hw offload flag is added to 
the bridge, the same flag gets set on the bridge. And, for any bridge 
attributes (not port attributes), this flag on the bridge can be used to 
offload those bridge attributes.
bridge attribute examples: IFLA_BR_FORWARD_DELAY, IFLA_BR_HELLO_TIME, 
IFLA_BR_MAX_AGE.
I don't think offloads for these are handled today. I was going to look 
at them as part of continued work on this.
The current patches only target bridge port attributes and the flag for 
this is already set by the port driver.

I believe netdev_update_features() takes care of the updating the flag 
on the bridge part. I plan to check on that.


>
>
>> I had to use SWITCH in the name to avoid ambiguity with other feature
>> flags. But, since i have been harping about not calling it 'switch',
>> I am welcome to any suggestions :)
>>
>> An alternative to using a feature flag is to use a IFF_HW_OFFLOAD
>> in net_device_flags.
>> ---
>> include/linux/netdev_features.h |    2 ++
>> 1 file changed, 2 insertions(+)
>>
>> diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
>> index 8e30685..68db1de 100644
>> --- a/include/linux/netdev_features.h
>> +++ b/include/linux/netdev_features.h
>> @@ -66,6 +66,7 @@ enum {
>> 	NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */
>> 	NETIF_F_HW_L2FW_DOFFLOAD_BIT,	/* Allow L2 Forwarding in Hardware */
>> 	NETIF_F_BUSY_POLL_BIT,		/* Busy poll */
>> +	NETIF_F_HW_SWITCH_OFFLOAD_BIT,  /* HW switch offload */
>>
>> 	/*
>> 	 * Add your fresh new feature above and remember to update
>> @@ -124,6 +125,7 @@ enum {
>> #define NETIF_F_HW_VLAN_STAG_TX	__NETIF_F(HW_VLAN_STAG_TX)
>> #define NETIF_F_HW_L2FW_DOFFLOAD	__NETIF_F(HW_L2FW_DOFFLOAD)
>> #define NETIF_F_BUSY_POLL	__NETIF_F(BUSY_POLL)
>> +#define NETIF_F_HW_SWITCH_OFFLOAD	__NETIF_F(HW_SWITCH_OFFLOAD)
>>
>> /* Features valid for ethtool to change */
>> /* = all defined minus driver/device-class-related */
>> -- 
>> 1.7.10.4
>>

^ permalink raw reply

* [PATCH net-next] tcp: refine TSO autosizing
From: Eric Dumazet @ 2014-12-05 14:15 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Neal Cardwell, Yuchung Cheng, Nandita Dukkipati

From: Eric Dumazet <edumazet@google.com>

Commit 95bd09eb2750 ("tcp: TSO packets automatic sizing") tried to
control TSO size, but did this at the wrong place (sendmsg() time)

At sendmsg() time, we might have a pessimistic view of flow rate,
and we end up building very small skbs (with 2 MSS per skb).

This is bad because :

 - It sends small TSO packets even in Slow Start where rate quickly
   increases.
 - It tends to make socket write queue very big, increasing tcp_ack()
   processing time, but also increasing memory needs, not necessarily
   accounted for, as fast clones overhead is currently ignored.
 - Lower GRO efficiency and more ACK packets.

Servers with a lot of small lived connections suffer from this.

Lets instead fill skbs as much as possible (64KB of payload), but split
them at xmit time, when we have a precise idea of the flow rate.
skb split is actually quite efficient.

Patch looks bigger than necessary, because TCP Small Queue decision now
has to take place after the eventual split.

Tested:

40 ms rtt link

nstat >/dev/null
netperf -H remote -Cc -l -2000000 -- -s 1000000
nstat | egrep "IpInReceives|IpOutRequests|TcpOutSegs|IpExtOutOctets"

Before patch :

Recv   Send    Send                          Utilization       Service Demand
Socket Socket  Message  Elapsed              Send     Recv     Send    Recv
Size   Size    Size     Time     Throughput  local    remote   local   remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB   us/KB

 87380 2000000 2000000    0.36         44.22   0.00     0.06     0.000   5.007  
IpInReceives                    600                0.0
IpOutRequests                   599                0.0
TcpOutSegs                      1397               0.0
IpExtOutOctets                  2033249            0.0


After patch :

Recv   Send    Send                          Utilization       Service Demand
Socket Socket  Message  Elapsed              Send     Recv     Send    Recv
Size   Size    Size     Time     Throughput  local    remote   local   remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB   us/KB

 87380 2000000 2000000    0.36         44.09   0.00     0.00     0.000   0.000  
IpInReceives                    257                0.0
IpOutRequests                   226                0.0
TcpOutSegs                      1399               0.0
IpExtOutOctets                  2013777            0.0


Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/tcp.c        |   14 ++------------
 net/ipv4/tcp_output.c |   38 ++++++++++++++++++++++++--------------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dc13a3657e8e..91e6c1406313 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -840,24 +840,14 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
 	xmit_size_goal = mss_now;
 
 	if (large_allowed && sk_can_gso(sk)) {
-		u32 gso_size, hlen;
+		u32 hlen;
 
 		/* Maybe we should/could use sk->sk_prot->max_header here ? */
 		hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
 		       inet_csk(sk)->icsk_ext_hdr_len +
 		       tp->tcp_header_len;
 
-		/* Goal is to send at least one packet per ms,
-		 * not one big TSO packet every 100 ms.
-		 * This preserves ACK clocking and is consistent
-		 * with tcp_tso_should_defer() heuristic.
-		 */
-		gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
-		gso_size = max_t(u32, gso_size,
-				 sysctl_tcp_min_tso_segs * mss_now);
-
-		xmit_size_goal = min_t(u32, gso_size,
-				       sk->sk_gso_max_size - 1 - hlen);
+		xmit_size_goal = sk->sk_gso_max_size - 1 - hlen;
 
 		xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f5bd4bd3f7e6..dac9991bccbf 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1533,6 +1533,16 @@ static unsigned int tcp_mss_split_point(const struct sock *sk,
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	u32 partial, needed, window, max_len;
+	u32 bytes = sk->sk_pacing_rate >> 10;
+	u32 segs;
+
+	/* Goal is to send at least one packet per ms,
+	 * not one big TSO packet every 100 ms.
+	 * This preserves ACK clocking and is consistent
+	 * with tcp_tso_should_defer() heuristic.
+	 */
+	segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs);
+	max_segs = min_t(u32, max_segs, segs);
 
 	window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
 	max_len = mss_now * max_segs;
@@ -2008,6 +2018,18 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 				break;
 		}
 
+		limit = mss_now;
+		if (tso_segs > 1 && !tcp_urg_mode(tp))
+			limit = tcp_mss_split_point(sk, skb, mss_now,
+						    min_t(unsigned int,
+							  cwnd_quota,
+							  sk->sk_gso_max_segs),
+						    nonagle);
+
+		if (skb->len > limit &&
+		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
+			break;
+
 		/* TCP Small Queues :
 		 * Control number of packets in qdisc/devices to two packets / or ~1 ms.
 		 * This allows for :
@@ -2018,8 +2040,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 		 * of queued bytes to ensure line rate.
 		 * One example is wifi aggregation (802.11 AMPDU)
 		 */
-		limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes,
-			      sk->sk_pacing_rate >> 10);
+		limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
+		limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
 
 		if (atomic_read(&sk->sk_wmem_alloc) > limit) {
 			set_bit(TSQ_THROTTLED, &tp->tsq_flags);
@@ -2032,18 +2054,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 				break;
 		}
 
-		limit = mss_now;
-		if (tso_segs > 1 && !tcp_urg_mode(tp))
-			limit = tcp_mss_split_point(sk, skb, mss_now,
-						    min_t(unsigned int,
-							  cwnd_quota,
-							  sk->sk_gso_max_segs),
-						    nonagle);
-
-		if (skb->len > limit &&
-		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
-			break;
-
 		if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
 			break;
 

^ permalink raw reply related

* Re: [PATCH][net-next] net: avoid to call skb_queue_len again
From: Sergei Shtylyov @ 2014-12-05 14:06 UTC (permalink / raw)
  To: roy.qing.li, netdev
In-Reply-To: <1417772948-17909-1-git-send-email-roy.qing.li@gmail.com>

Hello.

On 12/5/2014 12:49 PM, roy.qing.li@gmail.com wrote:

> From: Li RongQing <roy.qing.li@gmail.com>

> the queue length of sd->input_pkt_queue has been putted into qlen,

    s/putted/put/, it's irregular verb.

> and impossible to change, since hold the lock

    I can't parse that. Who holds the lock?

> Signed-off-by: Li RongQing <roy.qing.li@gmail.com>

WBR, Sergei

^ permalink raw reply

* Re: [PATCH 2/3] bridge: offload bridge port attributes to switch asic if feature flag set
From: Roopa Prabhu @ 2014-12-05 14:04 UTC (permalink / raw)
  To: Sergei Shtylyov
  Cc: jiri, sfeldma, jhs, bcrl, tgraf, john.fastabend, stephen,
	linville, nhorman, nicolas.dichtel, vyasevic, f.fainelli, buytenh,
	aviadr, netdev, davem, shm, gospo
In-Reply-To: <5481B16C.8050904@cogentembedded.com>

ack again, will fix the formatting issues. thanks

On 12/5/14, 5:21 AM, Sergei Shtylyov wrote:
> Hello.
>
> On 12/5/2014 5:26 AM, roopa@cumulusnetworks.com wrote:
>
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>
>> This allows offloading to switch asic without having the user to set
>> any flag. And this is done in the bridge driver to rollback kernel 
>> settings
>> on hw offload failure if required in the future.
>
>> With this, it also makes sure a notification goes out only after the
>> attributes are set both in the kernel and hw.
>> ---
>>   net/bridge/br_netlink.c |   27 ++++++++++++++++++++++++++-
>>   1 file changed, 26 insertions(+), 1 deletion(-)
>
>> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
>> index 9f5eb55..ce173f0 100644
>> --- a/net/bridge/br_netlink.c
>> +++ b/net/bridge/br_netlink.c
>> @@ -407,9 +407,21 @@ int br_setlink(struct net_device *dev, struct 
>> nlmsghdr *nlh)
>>                   afspec, RTM_SETLINK);
>>       }
>>
>> +    if ((dev->features & NETIF_F_HW_SWITCH_OFFLOAD) &&
>> +            dev->netdev_ops->ndo_bridge_setlink) {
>> +        int ret = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
>
>    Need empty line after declaration.
>
>> +        if (ret && ret != -EOPNOTSUPP) {
>> +            /* XXX Fix this in the future to rollback
>> +             * kernel settings and return error
>> +             */
>> +            br_warn(p->br, "error offloading bridge attributes "
>> +                    "on port %u(%s)\n", (unsigned int) p->port_no,
>
>    Don't break up the message strings. Also, your continuation lines 
> should start under the next character after ( on the first line.
>
>> +                    p->dev->name);
>> +        }
>> +    }
>> +
>>       if (err == 0)
>>           br_ifinfo_notify(RTM_NEWLINK, p);
>> -
>
>    Why?
>
>>   out:
>>       return err;
>>   }
>> @@ -433,6 +445,19 @@ int br_dellink(struct net_device *dev, struct 
>> nlmsghdr *nlh)
>>       err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
>>               afspec, RTM_DELLINK);
>>
>> +    if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD
>> +            && dev->netdev_ops->ndo_bridge_setlink) {
>> +        int ret = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
>> +        if (ret && ret != -EOPNOTSUPP) {
>> +            /* XXX Fix this in the future to rollback
>> +             * kernel settings and return error
>> +             */
>> +            br_warn(p->br, "error offloading bridge attributes "
>> +                    "on port %u(%s)\n", (unsigned int) p->port_no,
>> +                    p->dev->name);
>
>    Same comments here.
>
>> +        }
>> +    }
>> +
>>       return err;
>>   }
>>   static int br_validate(struct nlattr *tb[], struct nlattr *data[])
>
> WBR, Sergei
>
> -- 
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 3/3] rocker: set feature NETIF_F_HW_SWITCH_OFFLOAD
From: Roopa Prabhu @ 2014-12-05 14:04 UTC (permalink / raw)
  To: Sergei Shtylyov
  Cc: jiri, sfeldma, jhs, bcrl, tgraf, john.fastabend, stephen,
	linville, nhorman, nicolas.dichtel, vyasevic, f.fainelli, buytenh,
	aviadr, netdev, davem, shm, gospo
In-Reply-To: <5481B0A2.9070109@cogentembedded.com>

ack, will fix them.

On 12/5/14, 5:18 AM, Sergei Shtylyov wrote:
> Hello.
>
> On 12/5/2014 5:26 AM, roopa@cumulusnetworks.com wrote:
>
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>
>> This patch just sets the feature flag on rocker ports
>
>    You didn't sign off on any of your patches, hence they can't be 
> applied.
>
>> ---
>>   drivers/net/ethernet/rocker/rocker.c |    3 ++-
>>   1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/ethernet/rocker/rocker.c 
>> b/drivers/net/ethernet/rocker/rocker.c
>> index fded127..3fe19b0 100644
>> --- a/drivers/net/ethernet/rocker/rocker.c
>> +++ b/drivers/net/ethernet/rocker/rocker.c
>> @@ -4003,7 +4003,8 @@ static int rocker_probe_port(struct rocker 
>> *rocker, unsigned int port_number)
>>                  NAPI_POLL_WEIGHT);
>>       rocker_carrier_init(rocker_port);
>>
>> -    dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
>> +    dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
>> +                     NETIF_F_HW_SWITCH_OFFLOAD;
>
>    There was no need tho break the line.
>
> [...]
>
> WBR, Sergei
>
> -- 
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 2/3] bridge: offload bridge port attributes to switch asic if feature flag set
From: Roopa Prabhu @ 2014-12-05 14:03 UTC (permalink / raw)
  To: Jamal Hadi Salim
  Cc: John Fastabend, Scott Feldman, Jiří Pírko,
	Benjamin LaHaise, Thomas Graf, stephen@networkplumber.org,
	John Linville, nhorman@tuxdriver.com, Nicolas Dichtel,
	vyasevic@redhat.com, Florian Fainelli, buytenh@wantstofly.org,
	Aviad Raveh, Netdev, David S. Miller, shm, Andy Gospodarek
In-Reply-To: <5481A7E1.7070708@mojatatu.com>

On 12/5/14, 4:41 AM, Jamal Hadi Salim wrote:
> On 12/05/14 02:10, Roopa Prabhu wrote:
>> On 12/4/14, 10:55 PM, John Fastabend wrote:
>>> On 12/04/2014 10:41 PM, Scott Feldman wrote:
>>>> On Thu, Dec 4, 2014 at 6:26 PM, <roopa@cumulusnetworks.com> wrote:
>>>>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>>>
>>>>> This allows offloading to switch asic without having the user to set
>>>>> any flag. And this is done in the bridge driver to rollback kernel
>>>>> settings
>>>>> on hw offload failure if required in the future.
>>>>>
>>>>> With this, it also makes sure a notification goes out only after the
>>>>> attributes are set both in the kernel and hw.
>>>>
>>>> I like this approach as it streamlines the steps for the user in
>>>> setting port flags.  There is one case for FLOODING where you'll have
>>>> to turn off flooding for both, and then turn on flooding in hw. You
>>>> don't want flooding turned on on kernel and hw.
>>>>
>>>>> ---
>>>>>   net/bridge/br_netlink.c |   27 ++++++++++++++++++++++++++-
>>>>>   1 file changed, 26 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
>>>>> index 9f5eb55..ce173f0 100644
>>>>> --- a/net/bridge/br_netlink.c
>>>>> +++ b/net/bridge/br_netlink.c
>>>>> @@ -407,9 +407,21 @@ int br_setlink(struct net_device *dev, struct
>>>>> nlmsghdr *nlh)
>>>>>                                  afspec, RTM_SETLINK);
>>>>>          }
>>>>>
>>>>> +       if ((dev->features & NETIF_F_HW_SWITCH_OFFLOAD) &&
>>>>> + dev->netdev_ops->ndo_bridge_setlink) {
>>>>> +               int ret = dev->netdev_ops->ndo_bridge_setlink(dev,
>>>>> nlh);
>>>>
>>>> I think you want to up-level this to net/core/rtnetlink.c because
>>>> you're only enabling the feature for one instance of a driver that
>>>> implements ndo_bridge_setlink: the bridge driver.  If another driver
>>>> was MASTER and implemented ndo_bridge_setlink, you'd want same check
>>>> to push setting down to SELF port driver.
>>>
>>> Also if the user set SELF && MASTER flags && had HW_SWITCH_OFFLOAD bit
>>> set we would call ndo_bridge_setlink twice on the dev. Maybe you can
>>> catch this case in rtnetlink.c and only call it once.
>>
>> yes, thought about this and when i looked at iproute2 code, it is either
>> master
>> or self today and i don't think anybody else uses both flags with the
>> current
>> kernel implementation. But yes, that does not stop anybody from setting
>> both flags.
>> I will handle it better in v2.
>
> folks, can we have probably 2-3 sets of patches?
> #1 introduces the flags and doesnt change anything.
> Others to introduce other features.
sure, I was thinking about doing that. I plan to send the "swdev" mode 
related patches first.
And the others as separate sets.




> -- 
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 6/6] net-PPP: Delete another unnecessary assignment in mppe_alloc()
From: Dan Carpenter @ 2014-12-05 13:58 UTC (permalink / raw)
  To: SF Markus Elfring
  Cc: Sergei Shtylyov, Paul Mackerras, linux-ppp, netdev, Eric Dumazet,
	LKML, kernel-janitors, Julia Lawall
In-Reply-To: <20141205122315.GC4912@mwanda>

On Fri, Dec 05, 2014 at 03:23:15PM +0300, Dan Carpenter wrote:
> On Thu, Dec 04, 2014 at 11:20:21PM +0100, SF Markus Elfring wrote:
> > From: Markus Elfring <elfring@users.sourceforge.net>
> > Date: Thu, 4 Dec 2014 22:42:30 +0100
> > 
> > The data structure element "sha1" was assigned a null pointer by the
> > mppe_alloc() after a function call "crypto_alloc_hash" failed.
> 
> This patch is also buggy.

Actually it's ok.  I was just confused by the changelog.

Sorry about that, Markus.

regards,
dan carpenter

^ permalink raw reply

* Re: [PATCH v2 5/6] net-PPP: Delete an unnecessary assignment in mppe_alloc()
From: Dan Carpenter @ 2014-12-05 13:57 UTC (permalink / raw)
  To: SF Markus Elfring
  Cc: Sergei Shtylyov, Paul Mackerras, linux-ppp, netdev, Eric Dumazet,
	LKML, kernel-janitors, Julia Lawall
In-Reply-To: <5481A8AE.9050506@users.sourceforge.net>

On Fri, Dec 05, 2014 at 01:44:30PM +0100, SF Markus Elfring wrote:
> >> The data structure element "arc4" was assigned a null pointer by the
> >> mppe_alloc() function if a previous function call "crypto_alloc_blkcipher"
> >> failed.
> > 
> > crypto_alloc_blkcipher() returns error pointers and not NULL.
> 
> That is true.
> 

Oh.  In that case, I misunderstood what you wrote.  Looking at it now,
this patch is actually ok.

regards,
dan carpenter


^ permalink raw reply

* Re: 3.12.33 - BUG xfrm_selector_match+0x25/0x2f6
From: Smart Weblications GmbH - Florian Wiessner @ 2014-12-05 13:55 UTC (permalink / raw)
  To: Julian Anastasov
  Cc: Steffen Klassert, netdev, LKML, stable, Simon Horman, lvs-devel
In-Reply-To: <alpine.LFD.2.11.1412051126090.2522@ja.home.ssi.bg>

Hi,

Am 05.12.2014 10:55, schrieb Julian Anastasov:

> 
> On Fri, 5 Dec 2014, Smart Weblications GmbH - Florian Wiessner wrote:
> 
>> i tried with 3.12.33 without any XFRM and now got this one (which is reproducable):
>>
>> [  233.956012] BUG: unable to handle kernel NULL pointer dereference at 00000000
>>                                    00000014
>> [  233.956218] IP: [<ffffffffa013a470>] nf_ct_seqadj_set+0x60/0x90 [nf_conntrack
> 
> 	It seems fix from 3.13 was not sent to 3.12 stable:
> 
> commit b25adce1606427fd8 ("ipvs: correct usage/allocation of seqadj ext in 
> ipvs")
> 
> 	There was related change but it is not needed
> for stable kernels:
> 
> commit db12cf27435356017e ("netfilter: WARN about wrong usage of sequence 
> number adjustments"
> 
> 	Simon, can we try commit b25adce1606427fd8 for 3.12?


>> setup is like this:
>>
>>
>> #virtual=<myVIP>:21
>> #       real=10.10.1.20:21 masq
[...]
>> #       service=ftp
>> #       scheduler=rr
>> #       protocol=tcp
>> #       checktype=connect
>>
>> ( i remarked it to prevent fruther crashes...)
>>
>> when ip_vs_ftp is loaded and someone trying to make a ftp connection, the system
>> panics instantly.
>>
>> 10.10.1.20 - 10.10.1.23 are lxc-containers using veth connected to the bridge
>> running on 4 different nodes. The node running ldirector/ipvsadm has also one of
>> those containers running (don't know if that matters)
> 
> 	It is always good to know the setup. Do you access VIP
> from local clients (from director)?
> 

Not for ftp, but we have mail as well in the same setup, and yes, there we do
access it from local client.

>> brctl show
>> bridge name     bridge id               STP enabled     interfaces
>> br0             8000.00259052bbf4       no              bond0
>>                                                         vethMKELUc

[...]

> 	Before I create patch to avoid rerouting for
> LOCAL_IN you can try to set IPVS sysctl var "snat_reroute" to 0
> or even to change ip_vs_route_me_harder() function just to return 0.
> snat_reroute=1 (a default value) is needed if you have
> multiple links to clients and use ip rules to select
> correct route by src ip (after SNAT). If you have single
> uplink snat_reroute can be 0.
> 


ip rule show

0:      from all lookup local
32765:  from all to 10.10.0.0/16 lookup 200

I use ip rules, but this is not for source but destination. I need this to
enable clients from the local net to connect to some VIPs so they get there
correct route back.

I have also seen "b25adce1606427fd8 ipvs: correct usage/allocation of seqadj ext
in ipvs" in the net while googling, but i thought that it would be included in
3.12.33 as the patch is over a year old and since this is marked as stable i did
not expect any issues.

Maybe i would not have stubmled accross this if the ocfs2 devs were as fast as
the netdev-devs! But to my ocfs2 isseu/bug i still have no reply until today. So
thank you for the fast responses! I would like to test any patch for 3.12.

If i understand correctly, i set:

echo 0 > /proc/sys/net/ipv4/vs/snat_reroute
modprobe ip_vs_ftp

and reenable ftp ipvs?

It does not crash, but ftp is not working with neither PASV nor PORT:


[14:47:42] [R] Verbindung herstellen zu 192.168.10.62 -> IP=192.168.10.62 PORT=21
[14:47:42] [R] Verbunden mit 192.168.10.62
[14:47:43] [R] 220 (vsFTPd 3.0.2)
[14:47:43] [R] USER (hidden)
[14:47:43] [R] 331 Please specify the password.
[14:47:43] [R] PASS (hidden)
[14:47:43] [R] 230 Login successful.
[14:47:43] [R] SYST
[14:47:43] [R] 215 UNIX Type: L8
[14:47:43] [R] FEAT
[14:47:43] [R] 211-Features:
[14:47:43] [R]  EPRT
[14:47:43] [R]  EPSV
[14:47:43] [R]  MDTM
[14:47:43] [R]  PASV
[14:47:43] [R]  REST STREAM
[14:47:43] [R]  SIZE
[14:47:43] [R]  TVFS
[14:47:43] [R]  UTF8
[14:47:43] [R] 211 End
[14:47:43] [R] PWD
[14:47:43] [R] 257 "/"
[14:47:43] [R] CWD /
[14:47:43] [R] 250 Directory successfully changed.
[14:47:43] [R] PWD
[14:47:43] [R] 257 "/"
[14:47:43] [R] TYPE A
[14:47:43] [R] 200 Switching to ASCII mode.
[14:47:43] [R] PASV
[14:47:43] [R] 227 Entering Passive Mode (10,10,1,23,251,6).
[14:47:43] [R] Datenkanal-IP öffnen: 192.168.10.62 PORT: 64262
[14:47:44] [R] Datensocket-Fehler: Verbindung abgewiesen
[14:47:44] [R] List Fehler
[14:47:44] [R] PASV
[14:47:44] [R] 227 Entering Passive Mode (10,10,1,23,250,144).
[14:47:44] [R] Datenkanal-IP öffnen: 192.168.10.62 PORT: 64144
[14:47:45] [R] Datensocket-Fehler: Verbindung abgewiesen
[14:47:45] [R] List Fehler
[14:47:45] [R] PASV-Modus fehlgeschlagen, PORT -Modus versuchen...
[14:47:45] [R] Auf PORT: 62505 warten, Verbindung erwarten.
[14:47:45] [R] PORT 192,168,200,13,244,41
[14:47:45] [R] 500 Illegal PORT command.
[14:47:45] [R] List Fehler
[14:48:14] [R] QUIT
[14:48:14] [R] 221 Goodbye.
[14:48:14] [R] Ausgeloggt: 192.168.10.62


-- 

Mit freundlichen Grüßen,

Florian Wiessner

Smart Weblications GmbH
Martinsberger Str. 1
D-95119 Naila

fon.: +49 9282 9638 200
fax.: +49 9282 9638 205
24/7: +49 900 144 000 00 - 0,99 EUR/Min*
http://www.smart-weblications.de

--
Sitz der Gesellschaft: Naila
Geschäftsführer: Florian Wiessner
HRB-Nr.: HRB 3840 Amtsgericht Hof
*aus dem dt. Festnetz, ggf. abweichende Preise aus dem Mobilfunknetz

^ permalink raw reply

* Re: [PATCH iproute2] bridge link: add option 'self'
From: Roopa Prabhu @ 2014-12-05 13:46 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: sfeldma, jhs, bcrl, tgraf, john.fastabend, stephen, linville,
	nhorman, nicolas.dichtel, vyasevic, f.fainelli, buytenh, aviadr,
	netdev, davem, shm, gospo
In-Reply-To: <20141205075559.GC1866@nanopsycho.orion>

On 12/4/14, 11:55 PM, Jiri Pirko wrote:
> Fri, Dec 05, 2014 at 03:27:16AM CET, roopa@cumulusnetworks.com wrote:
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> Currently self is set internally only if hwmode is set.
>> This makes it necessary for the hw to have a mode.
>> There is no hwmode really required to go to hardware. So, introduce
>> self for anybody who wants to target hardware.
>
> Signed-off line is missing.
>
> Other than that, the patch looks fine.
>
> Reviewed-by: Jiri Pirko <jiri@resnulli.us>
>
> Feel free to add my review line to the repost.

yep, thanks.
>
>
>> ---
>> bridge/link.c |    3 +++
>> 1 file changed, 3 insertions(+)
>>
>> diff --git a/bridge/link.c b/bridge/link.c
>> index 90d9e7f..2b86141 100644
>> --- a/bridge/link.c
>> +++ b/bridge/link.c
>> @@ -321,6 +321,9 @@ static int brlink_modify(int argc, char **argv)
>> 					"\"veb\".\n");
>> 				exit(-1);
>> 			}
>> +		} else if (strcmp(*argv, "self") == 0) {
>> +			NEXT_ARG();
>> +			flags = BRIDGE_FLAGS_SELF;
>> 		} else {
>> 			usage();
>> 		}
>> -- 
>> 1.7.10.4
>>

^ permalink raw reply

* Re: [PATCH 2/3] bridge: offload bridge port attributes to switch asic if feature flag set
From: Roopa Prabhu @ 2014-12-05 13:44 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: sfeldma, jhs, bcrl, tgraf, john.fastabend, stephen, linville,
	nhorman, nicolas.dichtel, vyasevic, f.fainelli, buytenh, aviadr,
	netdev, davem, shm, gospo
In-Reply-To: <20141205073840.GA1866@nanopsycho.orion>

On 12/4/14, 11:38 PM, Jiri Pirko wrote:
> Fri, Dec 05, 2014 at 03:26:40AM CET, roopa@cumulusnetworks.com wrote:
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> This allows offloading to switch asic without having the user to set
>> any flag. And this is done in the bridge driver to rollback kernel settings
>> on hw offload failure if required in the future.
>>
>> With this, it also makes sure a notification goes out only after the
>> attributes are set both in the kernel and hw.
>> ---
>> net/bridge/br_netlink.c |   27 ++++++++++++++++++++++++++-
>> 1 file changed, 26 insertions(+), 1 deletion(-)
>>
>> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
>> index 9f5eb55..ce173f0 100644
>> --- a/net/bridge/br_netlink.c
>> +++ b/net/bridge/br_netlink.c
>> @@ -407,9 +407,21 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
>> 				afspec, RTM_SETLINK);
>> 	}
>>
>> +	if ((dev->features & NETIF_F_HW_SWITCH_OFFLOAD) &&
>> +			dev->netdev_ops->ndo_bridge_setlink) {
>> +		int ret = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
> 	This (and I suspect other patches as well) has many issues which
> 	are pointed out by scripts/checkpatch.pl sctipts. For example
> 	here, there should be an empty line. Please run your patches by
> 	this script before you send them.

ack, sure thing (I usually do but forgot yesterday).

>
>> +		if (ret && ret != -EOPNOTSUPP) {
>> +			/* XXX Fix this in the future to rollback
>> +			 * kernel settings and return error
>> +			 */
>> +			br_warn(p->br, "error offloading bridge attributes "
>> +					"on port %u(%s)\n", (unsigned int) p->port_no,
>> +					p->dev->name);
>> +		}
>> +	}
>> +
>> 	if (err == 0)
>> 		br_ifinfo_notify(RTM_NEWLINK, p);
>> -
>> out:
>> 	return err;
>> }
>> @@ -433,6 +445,19 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)
>> 	err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
>> 			afspec, RTM_DELLINK);
>>
>> +	if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD
>> +			&& dev->netdev_ops->ndo_bridge_setlink) {
>> +		int ret = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
> 	c&p issue, there should be check for dellink here.
>
>> +		if (ret && ret != -EOPNOTSUPP) {
>> +			/* XXX Fix this in the future to rollback
>> +			 * kernel settings and return error
>> +			 */
>> +			br_warn(p->br, "error offloading bridge attributes "
>> +					"on port %u(%s)\n", (unsigned int) p->port_no,
>> +					p->dev->name);
>> +		}
>> +	}
>> +
>
> 	I agree with Scott that this code should be moved to rtnetlink.c
>
>> 	return err;
>> }
>> static int br_validate(struct nlattr *tb[], struct nlattr *data[])
>> -- 
>> 1.7.10.4
>>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [bisected] xfrm: TCP connection initiating PMTU discovery stalls on v3.12+
From: Eric Dumazet @ 2014-12-05 13:26 UTC (permalink / raw)
  To: Wolfgang Walter
  Cc: netdev, Thomas Jarosch, Eric Dumazet, Herbert Xu,
	Steffen Klassert
In-Reply-To: <1841985.3SsQX9ZHP0@h2o.as.studentenwerk.mhn.de>

On Fri, 2014-12-05 at 13:09 +0100, Wolfgang Walter wrote:
> Hello,
> 
> as reverting this patch fixes this rather annoying problem: is it dangerous to 
> revert it as a workaround until the root cause is found?
> 

Unfortunately no, this patch fixes a serious issue.

We need to find the root cause of your problem instead of trying to work
around it.

^ permalink raw reply

* Re: [PATCH][net-next] net: avoid to call skb_queue_len again
From: Eric Dumazet @ 2014-12-05 13:24 UTC (permalink / raw)
  To: roy.qing.li; +Cc: netdev
In-Reply-To: <1417772948-17909-1-git-send-email-roy.qing.li@gmail.com>

On Fri, 2014-12-05 at 17:49 +0800, roy.qing.li@gmail.com wrote:
> From: Li RongQing <roy.qing.li@gmail.com>
> 
> the queue length of sd->input_pkt_queue has been putted into qlen,
> and impossible to change, since hold the lock
> 
> Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
> ---
>  net/core/dev.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 0814a56..b954400 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -3297,7 +3297,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
>  	rps_lock(sd);
>  	qlen = skb_queue_len(&sd->input_pkt_queue);
>  	if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
> -		if (skb_queue_len(&sd->input_pkt_queue)) {
> +		if (qlen) {
>  enqueue:
>  			__skb_queue_tail(&sd->input_pkt_queue, skb);
>  			input_queue_tail_incr_save(sd, qtail);

Acked-by: Eric Dumazet <edumazet@google.com>

Thanks !

^ permalink raw reply

* Re: [PATCH 2/3] bridge: offload bridge port attributes to switch asic if feature flag set
From: Sergei Shtylyov @ 2014-12-05 13:21 UTC (permalink / raw)
  To: roopa, jiri, sfeldma, jhs, bcrl, tgraf, john.fastabend, stephen,
	linville, nhorman, nicolas.dichtel, vyasevic, f.fainelli, buytenh,
	aviadr
  Cc: netdev, davem, shm, gospo
In-Reply-To: <1417746401-8140-3-git-send-email-roopa@cumulusnetworks.com>

Hello.

On 12/5/2014 5:26 AM, roopa@cumulusnetworks.com wrote:

> From: Roopa Prabhu <roopa@cumulusnetworks.com>

> This allows offloading to switch asic without having the user to set
> any flag. And this is done in the bridge driver to rollback kernel settings
> on hw offload failure if required in the future.

> With this, it also makes sure a notification goes out only after the
> attributes are set both in the kernel and hw.
> ---
>   net/bridge/br_netlink.c |   27 ++++++++++++++++++++++++++-
>   1 file changed, 26 insertions(+), 1 deletion(-)

> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
> index 9f5eb55..ce173f0 100644
> --- a/net/bridge/br_netlink.c
> +++ b/net/bridge/br_netlink.c
> @@ -407,9 +407,21 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
>   				afspec, RTM_SETLINK);
>   	}
>
> +	if ((dev->features & NETIF_F_HW_SWITCH_OFFLOAD) &&
> +			dev->netdev_ops->ndo_bridge_setlink) {
> +		int ret = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);

    Need empty line after declaration.

> +		if (ret && ret != -EOPNOTSUPP) {
> +			/* XXX Fix this in the future to rollback
> +			 * kernel settings and return error
> +			 */
> +			br_warn(p->br, "error offloading bridge attributes "
> +					"on port %u(%s)\n", (unsigned int) p->port_no,

    Don't break up the message strings. Also, your continuation lines should 
start under the next character after ( on the first line.

> +					p->dev->name);
> +		}
> +	}
> +
>   	if (err == 0)
>   		br_ifinfo_notify(RTM_NEWLINK, p);
> -

    Why?

>   out:
>   	return err;
>   }
> @@ -433,6 +445,19 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)
>   	err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
>   			afspec, RTM_DELLINK);
>
> +	if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD
> +			&& dev->netdev_ops->ndo_bridge_setlink) {
> +		int ret = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
> +		if (ret && ret != -EOPNOTSUPP) {
> +			/* XXX Fix this in the future to rollback
> +			 * kernel settings and return error
> +			 */
> +			br_warn(p->br, "error offloading bridge attributes "
> +					"on port %u(%s)\n", (unsigned int) p->port_no,
> +					p->dev->name);

    Same comments here.

> +		}
> +	}
> +
>   	return err;
>   }
>   static int br_validate(struct nlattr *tb[], struct nlattr *data[])

WBR, Sergei

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox