Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [Patch net-next v3 5/5] net_sched: act: clean up tca_action_flush()
From: Jamal Hadi Salim @ 2014-02-12 12:44 UTC (permalink / raw)
  To: Cong Wang, netdev; +Cc: David S. Miller
In-Reply-To: <1392167255-21744-6-git-send-email-xiyou.wangcong@gmail.com>

On 02/11/14 20:07, Cong Wang wrote:
> We could allocate tc_action on stack in tca_action_flush(),
> since it is not large.
>
> Also, we could use create_a() in tcf_action_get_1().
>
> Cc: Jamal Hadi Salim <jhs@mojatatu.com>
> Cc: David S. Miller <davem@davemloft.net>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>


Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>

> ---
>   net/sched/act_api.c | 53 +++++++++++++++++++++++------------------------------
>   1 file changed, 23 insertions(+), 30 deletions(-)
>
> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
> index 27e4c53..8a5ba5a 100644
> --- a/net/sched/act_api.c
> +++ b/net/sched/act_api.c
> @@ -685,6 +685,20 @@ act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
>   	return rtnl_unicast(skb, net, portid);
>   }
>
> +static struct tc_action *create_a(int i)
> +{
> +	struct tc_action *act;
> +
> +	act = kzalloc(sizeof(*act), GFP_KERNEL);
> +	if (act == NULL) {
> +		pr_debug("create_a: failed to alloc!\n");
> +		return NULL;
> +	}
> +	act->order = i;
> +	INIT_LIST_HEAD(&act->list);
> +	return act;
> +}
> +
>   static struct tc_action *
>   tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
>   {
> @@ -704,11 +718,10 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
>   	index = nla_get_u32(tb[TCA_ACT_INDEX]);
>
>   	err = -ENOMEM;
> -	a = kzalloc(sizeof(struct tc_action), GFP_KERNEL);
> +	a = create_a(0);
>   	if (a == NULL)
>   		goto err_out;
>
> -	INIT_LIST_HEAD(&a->list);
>   	err = -EINVAL;
>   	a->ops = tc_lookup_action(tb[TCA_ACT_KIND]);
>   	if (a->ops == NULL) /* could happen in batch of actions */
> @@ -738,20 +751,6 @@ static void cleanup_a(struct list_head *actions)
>   	}
>   }
>
> -static struct tc_action *create_a(int i)
> -{
> -	struct tc_action *act;
> -
> -	act = kzalloc(sizeof(*act), GFP_KERNEL);
> -	if (act == NULL) {
> -		pr_debug("create_a: failed to alloc!\n");
> -		return NULL;
> -	}
> -	act->order = i;
> -	INIT_LIST_HEAD(&act->list);
> -	return act;
> -}
> -
>   static int tca_action_flush(struct net *net, struct nlattr *nla,
>   			    struct nlmsghdr *n, u32 portid)
>   {
> @@ -763,18 +762,12 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
>   	struct nlattr *nest;
>   	struct nlattr *tb[TCA_ACT_MAX + 1];
>   	struct nlattr *kind;
> -	struct tc_action *a = create_a(0);
> +	struct tc_action a;
>   	int err = -ENOMEM;
>
> -	if (a == NULL) {
> -		pr_debug("tca_action_flush: couldnt create tc_action\n");
> -		return err;
> -	}
> -
>   	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
>   	if (!skb) {
>   		pr_debug("tca_action_flush: failed skb alloc\n");
> -		kfree(a);
>   		return err;
>   	}
>
> @@ -786,8 +779,10 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
>
>   	err = -EINVAL;
>   	kind = tb[TCA_ACT_KIND];
> -	a->ops = tc_lookup_action(kind);
> -	if (a->ops == NULL) /*some idjot trying to flush unknown action */
> +	memset(&a, 0, sizeof(struct tc_action));
> +	INIT_LIST_HEAD(&a.list);
> +	a.ops = tc_lookup_action(kind);
> +	if (a.ops == NULL) /*some idjot trying to flush unknown action */
>   		goto err_out;
>
>   	nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0);
> @@ -802,7 +797,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
>   	if (nest == NULL)
>   		goto out_module_put;
>
> -	err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
> +	err = a.ops->walk(skb, &dcb, RTM_DELACTION, &a);
>   	if (err < 0)
>   		goto out_module_put;
>   	if (err == 0)
> @@ -812,8 +807,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
>
>   	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
>   	nlh->nlmsg_flags |= NLM_F_ROOT;
> -	module_put(a->ops->owner);
> -	kfree(a);
> +	module_put(a.ops->owner);
>   	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
>   			     n->nlmsg_flags & NLM_F_ECHO);
>   	if (err > 0)
> @@ -822,11 +816,10 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
>   	return err;
>
>   out_module_put:
> -	module_put(a->ops->owner);
> +	module_put(a.ops->owner);
>   err_out:
>   noflush_out:
>   	kfree_skb(skb);
> -	kfree(a);
>   	return err;
>   }
>
>

^ permalink raw reply

* team driver MAC addresses questions
From: qca_dlansky @ 2014-02-12 13:04 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev@vger.kernel.org, qca_vkondrat

Hi Jiri,

We have couple of questions about the team driver with regards to MAC addresses assignment.
For our testing, we have ath9k (wlan1) as the port being added into team0.

Q1:
When team device created it gets some random MAC address. When adding wlan1 into team device, team sticks to its random MAC and doesn't inherit from the added port.
This is because inside team_dev_type_check_change():

if (dev->type == port_dev->type)
        return 0;

What is the reason for this behavior? Doesn't it make sense to inherit MAC from the 1st port being added?

Q2:
We've noticed the following behavior:
- wlan1 has some MAC address - m1
- team0 created with random MAC address - m2

root@lx-foo:~# ip link
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT 
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT qlen 1000
    link/ether 00:21:70:be:4f:68 brd ff:ff:ff:ff:ff:ff
4: wlan1: <BROADCAST,MULTICAST> mtu 1500 qdisc mq state DOWN mode DORMANT qlen 1000
    link/ether 44:39:c4:18:b5:7a brd ff:ff:ff:ff:ff:ff
12: team0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT 
    link/ether 9a:ac:d5:91:df:aa brd ff:ff:ff:ff:ff:ff

- When adding port wlan1 into team0, m1 gets assigned to team0 and m2 gets assigned to wlan1

root@lx-foo:~# LD_LIBRARY_PATH=/usr/local/lib/ teamdctl team0 port add wlan1
root@lx-foo:~# ip link
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT 
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT qlen 1000
    link/ether 00:21:70:be:4f:68 brd ff:ff:ff:ff:ff:ff
4: wlan1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq master team0 state UP mode DORMANT qlen 1000
    link/ether 9a:ac:d5:91:df:aa brd ff:ff:ff:ff:ff:ff
12: team0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT 
    link/ether 44:39:c4:18:b5:7a brd ff:ff:ff:ff:ff:ff

This is a bit strange for us. We were expecting both of them to have the same MAC address.

Further investigation showed that when adding the port, the MAC address of the added port is set twice:
once by team_add_slave(), when wlan1 is still down. The MAC address is set to m2.
second time by team_set_mac_address(), after wlan1 is up. The MAC address is set to m1.

However, the 2nd call fails which leaves wlan1 with m2. The reason for the failure is because ath9k doesn't support IFF_LIVE_ADDR_CHANGE.

Our question: in team_set_mac_address(), in case there is a failure setting MAC address for any port inside port_list, doesn't it make sense to undo MAC address changes and report the failure up the stack?

Thanks,
  Vladimir & Dedy.

^ permalink raw reply

* Re: team driver MAC addresses questions
From: Jiri Pirko @ 2014-02-12 13:37 UTC (permalink / raw)
  To: qca_dlansky; +Cc: netdev@vger.kernel.org, qca_vkondrat
In-Reply-To: <65502A17874025489B7D8479C16CFA634E91D1D0@nasanexd02f.na.qualcomm.com>

Wed, Feb 12, 2014 at 02:04:45PM CET, qca_dlansky@qualcomm.com wrote:
>Hi Jiri,
>
>We have couple of questions about the team driver with regards to MAC addresses assignment.
>For our testing, we have ath9k (wlan1) as the port being added into team0.
>
>Q1:
>When team device created it gets some random MAC address. When adding wlan1 into team device, team sticks to its random MAC and doesn't inherit from the added port.
>This is because inside team_dev_type_check_change():
>
>if (dev->type == port_dev->type)
>        return 0;
>
>What is the reason for this behavior? Doesn't it make sense to inherit MAC from the 1st port being added?

Well there is no reason to inherit mac from first, second, ... port
added. consider that different order of addition would end up team with
different mac every time. Administrator can easily set mac addressto
anything which he sees fit and this mac will be always used.

>
>Q2:
>We've noticed the following behavior:
>- wlan1 has some MAC address - m1
>- team0 created with random MAC address - m2
>
>root@lx-foo:~# ip link
>1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT 
>    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT qlen 1000
>    link/ether 00:21:70:be:4f:68 brd ff:ff:ff:ff:ff:ff
>4: wlan1: <BROADCAST,MULTICAST> mtu 1500 qdisc mq state DOWN mode DORMANT qlen 1000
>    link/ether 44:39:c4:18:b5:7a brd ff:ff:ff:ff:ff:ff
>12: team0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT 
>    link/ether 9a:ac:d5:91:df:aa brd ff:ff:ff:ff:ff:ff
>
>- When adding port wlan1 into team0, m1 gets assigned to team0 and m2 gets assigned to wlan1
>
>root@lx-foo:~# LD_LIBRARY_PATH=/usr/local/lib/ teamdctl team0 port add wlan1
>root@lx-foo:~# ip link
>1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT 
>    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT qlen 1000
>    link/ether 00:21:70:be:4f:68 brd ff:ff:ff:ff:ff:ff
>4: wlan1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq master team0 state UP mode DORMANT qlen 1000
>    link/ether 9a:ac:d5:91:df:aa brd ff:ff:ff:ff:ff:ff
>12: team0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT 
>    link/ether 44:39:c4:18:b5:7a brd ff:ff:ff:ff:ff:ff
>
>This is a bit strange for us. We were expecting both of them to have the same MAC address.
>
>Further investigation showed that when adding the port, the MAC address of the added port is set twice:
>once by team_add_slave(), when wlan1 is still down. The MAC address is set to m2.
>second time by team_set_mac_address(), after wlan1 is up. The MAC address is set to m1.

Are you using teamd? What config?

>
>However, the 2nd call fails which leaves wlan1 with m2. The reason for the failure is because ath9k doesn't support IFF_LIVE_ADDR_CHANGE.
>
>Our question: in team_set_mac_address(), in case there is a failure setting MAC address for any port inside port_list, doesn't it make sense to undo MAC address changes and report the failure up the stack?
>

Yes. That would be probably the proper thing to do. WOuld you send the
patch or I should do it? Would be needed to do something similar what
bonding does.

>Thanks,
>  Vladimir & Dedy.

^ permalink raw reply

* [PATCH net-next v2] IPv6: enable bind() to assign an anycast address
From: Francois-Xavier Le Bail @ 2014-02-12 13:38 UTC (permalink / raw)
  To: NETDEV, David Miller, Hannes Frederic Sowa

- Use ipv6_chk_acast_addr_src() in inet6_bind().

Signed-off-by: Francois-Xavier Le Bail <fx.lebail@yahoo.com>
---
v2: ipv6_chk_acast_addr_src() was previously added.

 net/ipv6/af_inet6.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c921d5d..68b81e9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -347,7 +347,9 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 			if (!(addr_type & IPV6_ADDR_MULTICAST))	{
 				if (!(inet->freebind || inet->transparent) &&
 				    !ipv6_chk_addr(net, &addr->sin6_addr,
-						   dev, 0)) {
+						   dev, 0) &&
+				    !ipv6_chk_acast_addr_src(net, dev,
+							     &addr->sin6_addr)) {
 					err = -EADDRNOTAVAIL;
 					goto out_unlock;
 				}

^ permalink raw reply related

* bnx2x + SR-IOV, no internal L2 switching
From: Yoann Juet @ 2014-02-12 13:33 UTC (permalink / raw)
  To: netdev

[-- Attachment #1.1: Type: text/plain, Size: 1363 bytes --]

Hi all,

I'm conducting experiments on SR-IOV with Broadcom and Intel cards on 
debian/unstable with KVM hypervisor. On Broadcom cards (bnx2x module, 
BCM57810 devices), Virtual Functions (VFs) get running, Virtual Machines 
attached to such VFs inherit network connectivity with excellent 
performance.

However, VMs attached to VFs on the Broadcom Physical Functions (PFs) 
behave like they were connected to an ancient hub, not a L2 switch. It 
is as if there was no internal L2 switching on the Broadcom card to 
process VF <-> VF or VF <-> PF communications. As a result, a VM sees 
all inbound/outbound traffic from/to others VMs as well as traffic 
destined to the PF (for instance, the physical ethX has an IP address).

On the other hand, everything works like a charm with Intel cards (ixgbe 
module, 82599EB devices). Traffic between VFs or VF/PF is switched 
internally by the card.

I found very little literature about SR-IOV on Broadcom devices. I 
wonder if it's a normal behaviour, a misconfiguration on my side or 
perhaps a firmware/driver bug.

Have you seen this issue before ?

---
Kernel 3.12.9 (same behaviour with kernels 3.10.x)
	driver: bnx2x
	firmware-version: 7.8.17
Debian/unstable
	libvirt 1.2.1
	QEMU 1.7.0
---

Best regards,
-- 
Université de Nantes - Direction des Systèmes d'Information

[-- Attachment #1.2: yoann_juet.vcf --]
[-- Type: text/x-vcard, Size: 377 bytes --]

begin:vcard
fn:Yoann Juet
n:Juet;Yoann
org;quoted-printable;quoted-printable:Direction des Syst=C3=A8mes d'Information;P=C3=B4le R=C3=A9seau
adr;quoted-printable:BP 92208;;2 rue de la Houssini=C3=A8re;Nantes Cedex 3;;44322;France
email;internet:yoann.juet@univ-nantes.fr
tel;work:02.53.48.49.26
tel;fax:02.53.48.49.09
tel;cell:06.73.15.42.19
version:2.1
end:vcard

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 3256 bytes --]

^ permalink raw reply

* [PATCH] [CLEANUP] tcp: remove unused min_cwnd member of tcp_congestion_ops
From: Stanislav Fomichev @ 2014-02-12 13:35 UTC (permalink / raw)
  To: davem, kuznet, jmorris, yoshfuji, kaber, edumazet, ncardwell,
	ycheng; +Cc: netdev

Commit 684bad110757 "tcp: use PRR to reduce cwin in CWR state" removed all
calls to min_cwnd, so we can safely remove it.
Also, remove tcp_reno_min_cwnd because it was only used for min_cwnd.

Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
---
 Documentation/networking/tcp.txt |  2 +-
 include/net/tcp.h                |  3 ---
 net/ipv4/tcp_cong.c              | 10 ----------
 net/ipv4/tcp_highspeed.c         |  1 -
 net/ipv4/tcp_hybla.c             |  1 -
 net/ipv4/tcp_illinois.c          |  1 -
 net/ipv4/tcp_lp.c                |  1 -
 net/ipv4/tcp_scalable.c          |  1 -
 net/ipv4/tcp_vegas.c             |  1 -
 net/ipv4/tcp_westwood.c          |  1 -
 net/ipv4/tcp_yeah.c              |  1 -
 11 files changed, 1 insertion(+), 22 deletions(-)

diff --git a/Documentation/networking/tcp.txt b/Documentation/networking/tcp.txt
index 7d11bb5dc30a..bdc4c0db51e1 100644
--- a/Documentation/networking/tcp.txt
+++ b/Documentation/networking/tcp.txt
@@ -30,7 +30,7 @@ A congestion control mechanism can be registered through functions in
 tcp_cong.c. The functions used by the congestion control mechanism are
 registered via passing a tcp_congestion_ops struct to
 tcp_register_congestion_control. As a minimum name, ssthresh,
-cong_avoid, min_cwnd must be valid.
+cong_avoid must be valid.
 
 Private data for a congestion control mechanism is stored in tp->ca_priv.
 tcp_ca(tp) returns a pointer to this space.  This is preallocated space - it
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70e55d200610..8ccc431f7269 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -793,8 +793,6 @@ struct tcp_congestion_ops {
 
 	/* return slow start threshold (required) */
 	u32 (*ssthresh)(struct sock *sk);
-	/* lower bound for congestion window (optional) */
-	u32 (*min_cwnd)(const struct sock *sk);
 	/* do new cwnd calculation (required) */
 	void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
 	/* call before changing ca_state (optional) */
@@ -829,7 +827,6 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
 extern struct tcp_congestion_ops tcp_init_congestion_ops;
 u32 tcp_reno_ssthresh(struct sock *sk);
 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
-u32 tcp_reno_min_cwnd(const struct sock *sk);
 extern struct tcp_congestion_ops tcp_reno;
 
 static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index ad37bf18ae4b..f49351edf97d 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -362,21 +362,12 @@ u32 tcp_reno_ssthresh(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
 
-/* Lower bound on congestion window with halving. */
-u32 tcp_reno_min_cwnd(const struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	return tp->snd_ssthresh/2;
-}
-EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
-
 struct tcp_congestion_ops tcp_reno = {
 	.flags		= TCP_CONG_NON_RESTRICTED,
 	.name		= "reno",
 	.owner		= THIS_MODULE,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_reno_cong_avoid,
-	.min_cwnd	= tcp_reno_min_cwnd,
 };
 
 /* Initial congestion control used (until SYN)
@@ -388,6 +379,5 @@ struct tcp_congestion_ops tcp_init_congestion_ops  = {
 	.owner		= THIS_MODULE,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_reno_cong_avoid,
-	.min_cwnd	= tcp_reno_min_cwnd,
 };
 EXPORT_SYMBOL_GPL(tcp_init_congestion_ops);
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 8ed9305dfdf4..8b9e7bad77c0 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -162,7 +162,6 @@ static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
 	.init		= hstcp_init,
 	.ssthresh	= hstcp_ssthresh,
 	.cong_avoid	= hstcp_cong_avoid,
-	.min_cwnd	= tcp_reno_min_cwnd,
 
 	.owner		= THIS_MODULE,
 	.name		= "highspeed"
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 478fe82611bf..2a1a9e2a4e51 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -166,7 +166,6 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
 static struct tcp_congestion_ops tcp_hybla __read_mostly = {
 	.init		= hybla_init,
 	.ssthresh	= tcp_reno_ssthresh,
-	.min_cwnd	= tcp_reno_min_cwnd,
 	.cong_avoid	= hybla_cong_avoid,
 	.set_state	= hybla_state,
 
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 8a520996f3d2..560d9879b89c 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -329,7 +329,6 @@ static struct tcp_congestion_ops tcp_illinois __read_mostly = {
 	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_illinois_init,
 	.ssthresh	= tcp_illinois_ssthresh,
-	.min_cwnd	= tcp_reno_min_cwnd,
 	.cong_avoid	= tcp_illinois_cong_avoid,
 	.set_state	= tcp_illinois_state,
 	.get_info	= tcp_illinois_info,
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 991d62a2f9bb..503798f2fcd6 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -319,7 +319,6 @@ static struct tcp_congestion_ops tcp_lp __read_mostly = {
 	.init = tcp_lp_init,
 	.ssthresh = tcp_reno_ssthresh,
 	.cong_avoid = tcp_lp_cong_avoid,
-	.min_cwnd = tcp_reno_min_cwnd,
 	.pkts_acked = tcp_lp_pkts_acked,
 
 	.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 19ea6c2951f3..0ac50836da4d 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -39,7 +39,6 @@ static u32 tcp_scalable_ssthresh(struct sock *sk)
 static struct tcp_congestion_ops tcp_scalable __read_mostly = {
 	.ssthresh	= tcp_scalable_ssthresh,
 	.cong_avoid	= tcp_scalable_cong_avoid,
-	.min_cwnd	= tcp_reno_min_cwnd,
 
 	.owner		= THIS_MODULE,
 	.name		= "scalable",
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 06cae62bf208..a022c17c9cf1 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -310,7 +310,6 @@ static struct tcp_congestion_ops tcp_vegas __read_mostly = {
 	.init		= tcp_vegas_init,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_vegas_cong_avoid,
-	.min_cwnd	= tcp_reno_min_cwnd,
 	.pkts_acked	= tcp_vegas_pkts_acked,
 	.set_state	= tcp_vegas_state,
 	.cwnd_event	= tcp_vegas_cwnd_event,
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 76a1e23259e1..b94a04ae2ed5 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -276,7 +276,6 @@ static struct tcp_congestion_ops tcp_westwood __read_mostly = {
 	.init		= tcp_westwood_init,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_reno_cong_avoid,
-	.min_cwnd	= tcp_westwood_bw_rttmin,
 	.cwnd_event	= tcp_westwood_event,
 	.get_info	= tcp_westwood_info,
 	.pkts_acked	= tcp_westwood_pkts_acked,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index a347a078ee07..5d2d15c5295f 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -231,7 +231,6 @@ static struct tcp_congestion_ops tcp_yeah __read_mostly = {
 	.init		= tcp_yeah_init,
 	.ssthresh	= tcp_yeah_ssthresh,
 	.cong_avoid	= tcp_yeah_cong_avoid,
-	.min_cwnd	= tcp_reno_min_cwnd,
 	.set_state	= tcp_vegas_state,
 	.cwnd_event	= tcp_vegas_cwnd_event,
 	.get_info	= tcp_vegas_get_info,
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH ipsec-next v2] xfrm: avoid creating temporary SA when there are no listeners
From: Horia Geanta @ 2014-02-12 14:00 UTC (permalink / raw)
  To: Steffen Klassert; +Cc: David S. Miller, netdev
In-Reply-To: <20140212120914.GB3438@secunet.com>

In the case when KMs have no listeners, km_query() will fail and
temporary SAs are garbage collected immediately after their allocation.
This causes strain on memory allocation, leading even to OOM since
temporary SA alloc/free cycle is performed for every packet
and garbage collection does not keep up the pace.

The sane thing to do is to make sure we have audience before
temporary SA allocation.

Signed-off-by: Horia Geanta <horia.geanta@freescale.com>
---
v2: rebased onto current ipsec-next and fixed build failure

 include/net/xfrm.h    | 15 +++++++++++++++
 net/key/af_key.c      | 19 +++++++++++++++++++
 net/xfrm/xfrm_state.c | 31 +++++++++++++++++++++++++++++++
 net/xfrm/xfrm_user.c  |  6 ++++++
 4 files changed, 71 insertions(+)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index afa5730..5313ccf 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -594,6 +594,7 @@ struct xfrm_mgr {
 					   const struct xfrm_migrate *m,
 					   int num_bundles,
 					   const struct xfrm_kmaddress *k);
+	bool			(*is_alive)(const struct km_event *c);
 };
 
 int xfrm_register_km(struct xfrm_mgr *km);
@@ -1646,6 +1647,20 @@ static inline int xfrm_aevent_is_on(struct net *net)
 	rcu_read_unlock();
 	return ret;
 }
+
+static inline int xfrm_acquire_is_on(struct net *net)
+{
+	struct sock *nlsk;
+	int ret = 0;
+
+	rcu_read_lock();
+	nlsk = rcu_dereference(net->xfrm.nlsk);
+	if (nlsk)
+		ret = netlink_has_listeners(nlsk, XFRMNLGRP_ACQUIRE);
+	rcu_read_unlock();
+
+	return ret;
+}
 #endif
 
 static inline int xfrm_alg_len(const struct xfrm_algo *alg)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 1a04c13..e1c69d0 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3059,6 +3059,24 @@ static u32 get_acqseq(void)
 	return res;
 }
 
+static bool pfkey_is_alive(const struct km_event *c)
+{
+	struct netns_pfkey *net_pfkey = net_generic(c->net, pfkey_net_id);
+	struct sock *sk;
+	bool is_alive = false;
+
+	rcu_read_lock();
+	sk_for_each_rcu(sk, &net_pfkey->table) {
+		if (pfkey_sk(sk)->registered) {
+			is_alive = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return is_alive;
+}
+
 static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp)
 {
 	struct sk_buff *skb;
@@ -3784,6 +3802,7 @@ static struct xfrm_mgr pfkeyv2_mgr =
 	.new_mapping	= pfkey_send_new_mapping,
 	.notify_policy	= pfkey_send_policy_notify,
 	.migrate	= pfkey_send_migrate,
+	.is_alive	= pfkey_is_alive,
 };
 
 static int __net_init pfkey_net_init(struct net *net)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a26b7aa..cea0dad 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -161,6 +161,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 int __xfrm_state_delete(struct xfrm_state *x);
 
 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
+bool km_is_alive(const struct km_event *c);
 void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
 
 static DEFINE_SPINLOCK(xfrm_type_lock);
@@ -788,6 +789,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	struct xfrm_state *best = NULL;
 	u32 mark = pol->mark.v & pol->mark.m;
 	unsigned short encap_family = tmpl->encap_family;
+	struct km_event c;
 
 	to_put = NULL;
 
@@ -832,6 +834,17 @@ found:
 			error = -EEXIST;
 			goto out;
 		}
+
+		c.net = net;
+		/* If the KMs have no listeners (yet...), avoid allocating an SA
+		 * for each and every packet - garbage collection might not
+		 * handle the flood.
+		 */
+		if (!km_is_alive(&c)) {
+			error = -ESRCH;
+			goto out;
+		}
+
 		x = xfrm_state_alloc(net);
 		if (x == NULL) {
 			error = -ENOMEM;
@@ -1793,6 +1806,24 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address
 }
 EXPORT_SYMBOL(km_report);
 
+bool km_is_alive(const struct km_event *c)
+{
+	struct xfrm_mgr *km;
+	bool is_alive = false;
+
+	rcu_read_lock();
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		if (km->is_alive && km->is_alive(c)) {
+			is_alive = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return is_alive;
+}
+EXPORT_SYMBOL(km_is_alive);
+
 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
 {
 	int err;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ade9988..d7694f2 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2982,6 +2982,11 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC);
 }
 
+static bool xfrm_is_alive(const struct km_event *c)
+{
+	return (bool)xfrm_acquire_is_on(c->net);
+}
+
 static struct xfrm_mgr netlink_mgr = {
 	.id		= "netlink",
 	.notify		= xfrm_send_state_notify,
@@ -2991,6 +2996,7 @@ static struct xfrm_mgr netlink_mgr = {
 	.report		= xfrm_send_report,
 	.migrate	= xfrm_send_migrate,
 	.new_mapping	= xfrm_send_mapping,
+	.is_alive	= xfrm_is_alive,
 };
 
 static int __net_init xfrm_user_net_init(struct net *net)
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH v6] can: add Renesas R-Car CAN driver
From: Sergei Shtylyov @ 2014-02-12 14:08 UTC (permalink / raw)
  To: wg, mkl, linux-can; +Cc: linux-sh, vksavl, netdev
In-Reply-To: <201401250416.13430.sergei.shtylyov@cogentembedded.com>

Hello.

On 25-01-2014 5:16, Sergei Shtylyov wrote:

> Add support for the CAN controller found in Renesas R-Car SoCs.

> Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

> ---
> The patch is against the 'linux-can-next.git' repo.
> Not posting to netdev@vger.kernel.org this time or DaveM will scold me. :-)

    Wolfgang, Marc, more than 2 weeks has passed since I posted this new 
version, any feedback now that net-next is open again?

WBR, Sergei


^ permalink raw reply

* RE: team driver MAC addresses questions
From: qca_dlansky @ 2014-02-12 14:17 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev@vger.kernel.org, qca_vkondrat
In-Reply-To: <20140212133726.GA3027@minipsycho.orion>

>Q2:
>We've noticed the following behavior:
>- wlan1 has some MAC address - m1
>- team0 created with random MAC address - m2
>
>root@lx-foo:~# ip link
>1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT 
>    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT qlen 1000
>    link/ether 00:21:70:be:4f:68 brd ff:ff:ff:ff:ff:ff
>4: wlan1: <BROADCAST,MULTICAST> mtu 1500 qdisc mq state DOWN mode DORMANT qlen 1000
>    link/ether 44:39:c4:18:b5:7a brd ff:ff:ff:ff:ff:ff
>12: team0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT 
>    link/ether 9a:ac:d5:91:df:aa brd ff:ff:ff:ff:ff:ff
>
>- When adding port wlan1 into team0, m1 gets assigned to team0 and m2 
>gets assigned to wlan1
>
>root@lx-foo:~# LD_LIBRARY_PATH=/usr/local/lib/ teamdctl team0 port add 
>wlan1 root@lx-foo:~# ip link
>1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT 
>    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT qlen 1000
>    link/ether 00:21:70:be:4f:68 brd ff:ff:ff:ff:ff:ff
>4: wlan1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq master team0 state UP mode DORMANT qlen 1000
>    link/ether 9a:ac:d5:91:df:aa brd ff:ff:ff:ff:ff:ff
>12: team0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT 
>    link/ether 44:39:c4:18:b5:7a brd ff:ff:ff:ff:ff:ff
>
>This is a bit strange for us. We were expecting both of them to have the same MAC address.
>
>Further investigation showed that when adding the port, the MAC address of the added port is set twice:
>once by team_add_slave(), when wlan1 is still down. The MAC address is set to m2.
>second time by team_set_mac_address(), after wlan1 is up. The MAC address is set to m1.

Are you using teamd? What config?

[DL]  just 'teamd -d', no config at all

>
>However, the 2nd call fails which leaves wlan1 with m2. The reason for the failure is because ath9k doesn't support IFF_LIVE_ADDR_CHANGE.
>
>Our question: in team_set_mac_address(), in case there is a failure setting MAC address for any port inside port_list, doesn't it make sense to undo MAC address changes and report the failure up the stack?
>

Yes. That would be probably the proper thing to do. WOuld you send the patch or I should do it? Would be needed to do something similar what bonding does.

[DL] Let us think about it. We want to prepare patch, but we need time to do so.

>Thanks,
>  Vladimir & Dedy.

^ permalink raw reply

* Re: netfilter: nf_tables: add reject module for NFPROTO_INET
From: Dave Jones @ 2014-02-12 14:18 UTC (permalink / raw)
  To: netdev; +Cc: kaber, pablo
In-Reply-To: <20140211203359.56ED7660CD3@gitolite.kernel.org>

On Tue, Feb 11, 2014 at 08:33:59PM +0000, Linux Kernel wrote:
 > Gitweb:     http://git.kernel.org/linus/;a=commit;h=05513e9e33dbded8124567466a444d32173eecc6
 > Commit:     05513e9e33dbded8124567466a444d32173eecc6
 > Parent:     cc4723ca316742891954efa346298e7c747c0d17
 > Author:     Patrick McHardy <kaber@trash.net>
 > AuthorDate: Wed Feb 5 15:03:39 2014 +0000
 > Committer:  Pablo Neira Ayuso <pablo@netfilter.org>
 > CommitDate: Thu Feb 6 09:44:18 2014 +0100
 > 
 >     netfilter: nf_tables: add reject module for NFPROTO_INET
 >     
 >     Add a reject module for NFPROTO_INET. It does nothing but dispatch
 >     to the AF-specific modules based on the hook family.
 >     
 >     Signed-off-by: Patrick McHardy <kaber@trash.net>
 >     Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
 > ---

....

 > +static void nft_reject_inet_eval(const struct nft_expr *expr,
 > +				 struct nft_data data[NFT_REG_MAX + 1],
 > +				 const struct nft_pktinfo *pkt)
 > +{
 > +	switch (pkt->ops->pf) {
 > +	case NFPROTO_IPV4:
 > +		nft_reject_ipv4_eval(expr, data, pkt);
 > +	case NFPROTO_IPV6:
 > +		nft_reject_ipv6_eval(expr, data, pkt);
 > +	}
 > +}

Is the fallthrough intentional here, or is there a missing break ?

	Dave

^ permalink raw reply

* [PATCH ipsec-next v3] xfrm: avoid creating temporary SA when there are no listeners
From: Horia Geanta @ 2014-02-12 14:20 UTC (permalink / raw)
  To: Steffen Klassert; +Cc: David S. Miller, netdev
In-Reply-To: <1392213609-20098-1-git-send-email-horia.geanta@freescale.com>

In the case when KMs have no listeners, km_query() will fail and
temporary SAs are garbage collected immediately after their allocation.
This causes strain on memory allocation, leading even to OOM since
temporary SA alloc/free cycle is performed for every packet
and garbage collection does not keep up the pace.

The sane thing to do is to make sure we have audience before
temporary SA allocation.

Signed-off-by: Horia Geanta <horia.geanta@freescale.com>
---
v3: rebase typo in v2 - must use list_for_each_entry_rcu
instead of list_for_each_entry

v2: rebased onto current ipsec-next and fixed build failure

 include/net/xfrm.h    | 15 +++++++++++++++
 net/key/af_key.c      | 19 +++++++++++++++++++
 net/xfrm/xfrm_state.c | 31 +++++++++++++++++++++++++++++++
 net/xfrm/xfrm_user.c  |  6 ++++++
 4 files changed, 71 insertions(+)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index afa5730..5313ccf 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -594,6 +594,7 @@ struct xfrm_mgr {
 					   const struct xfrm_migrate *m,
 					   int num_bundles,
 					   const struct xfrm_kmaddress *k);
+	bool			(*is_alive)(const struct km_event *c);
 };
 
 int xfrm_register_km(struct xfrm_mgr *km);
@@ -1646,6 +1647,20 @@ static inline int xfrm_aevent_is_on(struct net *net)
 	rcu_read_unlock();
 	return ret;
 }
+
+static inline int xfrm_acquire_is_on(struct net *net)
+{
+	struct sock *nlsk;
+	int ret = 0;
+
+	rcu_read_lock();
+	nlsk = rcu_dereference(net->xfrm.nlsk);
+	if (nlsk)
+		ret = netlink_has_listeners(nlsk, XFRMNLGRP_ACQUIRE);
+	rcu_read_unlock();
+
+	return ret;
+}
 #endif
 
 static inline int xfrm_alg_len(const struct xfrm_algo *alg)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 1a04c13..e1c69d0 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3059,6 +3059,24 @@ static u32 get_acqseq(void)
 	return res;
 }
 
+static bool pfkey_is_alive(const struct km_event *c)
+{
+	struct netns_pfkey *net_pfkey = net_generic(c->net, pfkey_net_id);
+	struct sock *sk;
+	bool is_alive = false;
+
+	rcu_read_lock();
+	sk_for_each_rcu(sk, &net_pfkey->table) {
+		if (pfkey_sk(sk)->registered) {
+			is_alive = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return is_alive;
+}
+
 static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp)
 {
 	struct sk_buff *skb;
@@ -3784,6 +3802,7 @@ static struct xfrm_mgr pfkeyv2_mgr =
 	.new_mapping	= pfkey_send_new_mapping,
 	.notify_policy	= pfkey_send_policy_notify,
 	.migrate	= pfkey_send_migrate,
+	.is_alive	= pfkey_is_alive,
 };
 
 static int __net_init pfkey_net_init(struct net *net)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a26b7aa..0bf12f6 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -161,6 +161,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 int __xfrm_state_delete(struct xfrm_state *x);
 
 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
+bool km_is_alive(const struct km_event *c);
 void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
 
 static DEFINE_SPINLOCK(xfrm_type_lock);
@@ -788,6 +789,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	struct xfrm_state *best = NULL;
 	u32 mark = pol->mark.v & pol->mark.m;
 	unsigned short encap_family = tmpl->encap_family;
+	struct km_event c;
 
 	to_put = NULL;
 
@@ -832,6 +834,17 @@ found:
 			error = -EEXIST;
 			goto out;
 		}
+
+		c.net = net;
+		/* If the KMs have no listeners (yet...), avoid allocating an SA
+		 * for each and every packet - garbage collection might not
+		 * handle the flood.
+		 */
+		if (!km_is_alive(&c)) {
+			error = -ESRCH;
+			goto out;
+		}
+
 		x = xfrm_state_alloc(net);
 		if (x == NULL) {
 			error = -ENOMEM;
@@ -1793,6 +1806,24 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address
 }
 EXPORT_SYMBOL(km_report);
 
+bool km_is_alive(const struct km_event *c)
+{
+	struct xfrm_mgr *km;
+	bool is_alive = false;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
+		if (km->is_alive && km->is_alive(c)) {
+			is_alive = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return is_alive;
+}
+EXPORT_SYMBOL(km_is_alive);
+
 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
 {
 	int err;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ade9988..d7694f2 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2982,6 +2982,11 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC);
 }
 
+static bool xfrm_is_alive(const struct km_event *c)
+{
+	return (bool)xfrm_acquire_is_on(c->net);
+}
+
 static struct xfrm_mgr netlink_mgr = {
 	.id		= "netlink",
 	.notify		= xfrm_send_state_notify,
@@ -2991,6 +2996,7 @@ static struct xfrm_mgr netlink_mgr = {
 	.report		= xfrm_send_report,
 	.migrate	= xfrm_send_migrate,
 	.new_mapping	= xfrm_send_mapping,
+	.is_alive	= xfrm_is_alive,
 };
 
 static int __net_init xfrm_user_net_init(struct net *net)
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH net-next] ipv4: ip_forward: perform skb->pkt_type check at the beginning
From: Sergei Shtylyov @ 2014-02-12 14:25 UTC (permalink / raw)
  To: Denis Kirjanov, davem, netdev
In-Reply-To: <1392192632-24513-1-git-send-email-kda@linux-powerpc.org>

Hello.

On 12-02-2014 12:10, Denis Kirjanov wrote:

> Packets which have L2 address different from ours should be
> already filtered before entering into ip_forward().

> Perform that check at the beginning to avoid processing such packets.

> Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
> ---
>   net/ipv4/ip_forward.c | 7 ++++---
>   1 file changed, 4 insertions(+), 3 deletions(-)

> diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
> index e9f1217..1a07056 100644
> --- a/net/ipv4/ip_forward.c
> +++ b/net/ipv4/ip_forward.c
> @@ -59,6 +59,10 @@ int ip_forward(struct sk_buff *skb)
>   	struct rtable *rt;	/* Route we use */
>   	struct ip_options *opt	= &(IPCB(skb)->opt);
>
> +    /* that should never happen */
> +    if (skb->pkt_type != PACKET_HOST)
> +        goto drop;
> +

    Please indent the code with tabs, not spaces.

>   	if (skb_warn_if_lro(skb))
>   		goto drop;
>
> @@ -68,9 +72,6 @@ int ip_forward(struct sk_buff *skb)
>   	if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb))
>   		return NET_RX_SUCCESS;
>
> -	if (skb->pkt_type != PACKET_HOST)
> -		goto drop;
> -

    Like it was here.

WBR, Sergei

^ permalink raw reply

* Re: netfilter: nf_tables: add reject module for NFPROTO_INET
From: Patrick McHardy @ 2014-02-12 14:27 UTC (permalink / raw)
  To: Dave Jones; +Cc: netdev, pablo
In-Reply-To: <20140212141827.GA7129@redhat.com>

On Wed, Feb 12, 2014 at 09:18:27AM -0500, Dave Jones wrote:
> On Tue, Feb 11, 2014 at 08:33:59PM +0000, Linux Kernel wrote:
>  > Gitweb:     http://git.kernel.org/linus/;a=commit;h=05513e9e33dbded8124567466a444d32173eecc6
>  > Commit:     05513e9e33dbded8124567466a444d32173eecc6
>  > Parent:     cc4723ca316742891954efa346298e7c747c0d17
>  > Author:     Patrick McHardy <kaber@trash.net>
>  > AuthorDate: Wed Feb 5 15:03:39 2014 +0000
>  > Committer:  Pablo Neira Ayuso <pablo@netfilter.org>
>  > CommitDate: Thu Feb 6 09:44:18 2014 +0100
>  > 
>  >     netfilter: nf_tables: add reject module for NFPROTO_INET
>  >     
>  >     Add a reject module for NFPROTO_INET. It does nothing but dispatch
>  >     to the AF-specific modules based on the hook family.
>  >     
>  >     Signed-off-by: Patrick McHardy <kaber@trash.net>
>  >     Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
>  > ---
> 
> ....
> 
>  > +static void nft_reject_inet_eval(const struct nft_expr *expr,
>  > +				 struct nft_data data[NFT_REG_MAX + 1],
>  > +				 const struct nft_pktinfo *pkt)
>  > +{
>  > +	switch (pkt->ops->pf) {
>  > +	case NFPROTO_IPV4:
>  > +		nft_reject_ipv4_eval(expr, data, pkt);
>  > +	case NFPROTO_IPV6:
>  > +		nft_reject_ipv6_eval(expr, data, pkt);
>  > +	}
>  > +}
> 
> Is the fallthrough intentional here, or is there a missing break ?

Crap. Thanks for catching this, I'll send a fix later.

^ permalink raw reply

* Re: [PATCH net-next 01/10] net: phy: add "internal" PHY mode
From: Sergei Shtylyov @ 2014-02-12 14:35 UTC (permalink / raw)
  To: Florian Fainelli, netdev; +Cc: davem, cernekee, devicetree
In-Reply-To: <1392178053-3143-2-git-send-email-f.fainelli@gmail.com>

Hello.

On 12-02-2014 8:07, Florian Fainelli wrote:

> On some systems, the PHY can be internal, in the same package as the
> Ethernet MAC, and still be responding to a specific address on the MDIO
> bus, in that case, the Ethernet MAC might need to know about it to
> properly configure a port multiplexer to switch to an internal or
> external PHY. Add a new PHY interface mode for this and update the
> Device Tree of_get_phy_mode() function to look for it.

> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
[...]

> @@ -553,7 +554,8 @@ static inline bool phy_interrupt_is_valid(struct phy_device *phydev)
>    */
>   static inline bool phy_is_internal(struct phy_device *phydev)
>   {
> -	return phydev->is_internal;
> +	return phydev->is_internal ||
> +		phydev->interface == PHY_INTERFACE_MODE_INTERNAL;

    Shouldn't the continuation line be aligned under 'phydev' on the borken up 
line)?

WBR, Sergei

^ permalink raw reply

* RE: bnx2x + SR-IOV, no internal L2 switching
From: Ariel Elior @ 2014-02-12 14:38 UTC (permalink / raw)
  To: yoann.juet@univ-nantes.fr, netdev@vger.kernel.org
In-Reply-To: <52FB7843.6050601@univ-nantes.fr>

> -----Original Message-----
> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
> On Behalf Of Yoann Juet
> Sent: Wednesday, February 12, 2014 3:34 PM
> To: netdev@vger.kernel.org
> Subject: bnx2x + SR-IOV, no internal L2 switching

Hi Yoan,
57810 device does support l2 switching.
I find your terminology a little bit confusing and I have some further questions besides:

[Q1]: Are you attaching physical functions to VMs? I.e. are you passing through PFs to the VMs, or only virtual functions?

[Q2]: Are you using VFs only on VMs, or directly from the Hypervisor too?

[Q3]: You wrote "for instance the physical ethX has an IP address". That in itself is no problem and no surprise. You gave this as an example to traffic arriving where is shouldn't. Please elaborate.

[Q4]: Do you have vlan filters configured anywhere in your topology? Are they configured from Host or from Guest?
(in this respect host tagging would be 'ip link set eth0 vf 5 vlan 100' on the hypervisor while guest tagging would be 'ip link add link eth4 type vlan id 100' or 'vconfig add eth4 100' where eth0 is a PF and eth4 is a VF).

[Q5]: In the case you mentioned where you saw in a VM traffic which was destined to another VM: Did both VMs contain VFs? Were the VFs created from the same Physical Function? If not, what were the BDFs pf the respective PFs? Which mac addresses did you give the VFs?

[Q6]: Please isolate a specific case where switching is not behaving as expected and describe it in more detail:
Please describe the topology (which PFs are involved and which VFs. Who is assigned to which VMs)
Where are you sending data from (a VF, a PF, some peer on the network) and where is it arriving (perhaps arriving in multiple places)
Please detail the behavior you expect and the behavior you observe.
Please supply:
Mac addresses, ip addresses and masks, configured vlans (if any), promiscuous setting, etc.
ip -d -d link show on hypervisor and each of the VMs involved
ethtool -d from Hypervisor PFs 
dmesg from hypervisor

Please note we recently submitted some fixes to our tx-switching behavior:
In e8379c79 "bnx2x: fix VLAN configuration for VFs" we fixed an issue where traffic with the wrong vlan could still end up in a VM configured to a different vlan (hence my questions on vlans).
In c14db202 "bnx2x: Correct default Tx switching behavior" we fixed a connectivity issue with pf to vf connectivity.
Depending on your answers to the above, perhaps these might be relevant to your case.

Thanks,
Ariel

^ permalink raw reply

* Re: [PATCH 04/10] net: phy: add genphy_aneg_done()
From: Sergei Shtylyov @ 2014-02-12 14:43 UTC (permalink / raw)
  To: Florian Fainelli, netdev; +Cc: davem
In-Reply-To: <1392168462-18888-5-git-send-email-f.fainelli@gmail.com>

Hello.

On 12-02-2014 5:27, Florian Fainelli wrote:

> In preparation for allowing PHY drivers to potentially override their
> auto-negotiation done callback, move the contents of phy_aneg_done() to
> genphy_aneg_done() since that function really is the generic
> implementation based on the BMSR_ANEGCOMPLETE status.

> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
[...]

> diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
> index 82514e7..4e7db72 100644
> --- a/drivers/net/phy/phy_device.c
> +++ b/drivers/net/phy/phy_device.c
> @@ -865,6 +865,22 @@ int genphy_config_aneg(struct phy_device *phydev)
>   }
>   EXPORT_SYMBOL(genphy_config_aneg);
>
> +/**
> + * genphy_aneg_done - return auto-negotiation status
> + * @phydev: target phy_device struct
> + *
> + * Description: Reads the status register and returns 0 either if
> + *   auto-negotiation is incomplete, or if there was an error.
> + *   Returns BMSR_ANEGCOMPLETE if auto-negotiation is done.
> + */
> +int genphy_aneg_done(struct phy_device *phydev)
> +{
> +	int retval = phy_read(phydev, MII_BMSR);
> +
> +	return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE);

    I doubt parens are needed here.

WBR, Sergei

^ permalink raw reply

* Re: [PATCH net-next] ipv4: ip_forward: perform skb->pkt_type check at the beginning
From: David Miller @ 2014-02-12 14:44 UTC (permalink / raw)
  To: kda; +Cc: netdev
In-Reply-To: <1392192632-24513-1-git-send-email-kda@linux-powerpc.org>

From: Denis Kirjanov <kda@linux-powerpc.org>
Date: Wed, 12 Feb 2014 12:10:32 +0400

> +    /* that should never happen */
> +    if (skb->pkt_type != PACKET_HOST)
> +        goto drop;
> +

At a minimum, this is not formatted properly.  You are using spaces
instead of TAB characters.

^ permalink raw reply

* Re: [PATCH net-next] net: remove useless if check from register_netdevice()
From: David Miller @ 2014-02-12 14:46 UTC (permalink / raw)
  To: kda; +Cc: netdev
In-Reply-To: <1392191152-24182-1-git-send-email-kda@linux-powerpc.org>

From: Denis Kirjanov <kda@linux-powerpc.org>
Date: Wed, 12 Feb 2014 11:45:52 +0400

> @@ -5876,8 +5876,7 @@ int register_netdevice(struct net_device *dev)
>  	if (dev->netdev_ops->ndo_init) {
>  		ret = dev->netdev_ops->ndo_init(dev);
>  		if (ret) {
> -			if (ret > 0)
> -				ret = -EIO;
> +            ret = -EIO;
>  			goto out;
>  		}

Like your other patch you are not formatting this code properly at all.

If you cannot insert the correct TAB and space characters necessary
to indent the code properly, have something help you do it, such as
emacs's C-mode in "linux" mode.

^ permalink raw reply

* Re: [PATCH net-next] net: remove useless if check from register_netdevice()
From: Denis Kirjanov @ 2014-02-12 14:51 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20140212.094611.946835062678856302.davem@davemloft.net>

On 2/12/14, David Miller <davem@davemloft.net> wrote:
> From: Denis Kirjanov <kda@linux-powerpc.org>
> Date: Wed, 12 Feb 2014 11:45:52 +0400
>
>> @@ -5876,8 +5876,7 @@ int register_netdevice(struct net_device *dev)
>>  	if (dev->netdev_ops->ndo_init) {
>>  		ret = dev->netdev_ops->ndo_init(dev);
>>  		if (ret) {
>> -			if (ret > 0)
>> -				ret = -EIO;
>> +            ret = -EIO;
>>  			goto out;
>>  		}
>
> Like your other patch you are not formatting this code properly at all.
>

I'll resubmit both. Thank you.

> If you cannot insert the correct TAB and space characters necessary
> to indent the code properly, have something help you do it, such as
> emacs's C-mode in "linux" mode.
>

^ permalink raw reply

* [PATCH] net: qmi_wwan: add support for Cinterion PXS8 and PHS8
From: Aleksander Morgado @ 2014-02-12 14:55 UTC (permalink / raw)
  To: davem
  Cc: bjorn, netdev, linux-usb, Aleksander Morgado,
	Hans-Christoph Schemmel, Christian Schmiedl, Nicolaus Colberg,
	David McCullough

When the PXS8 and PHS8 devices show up with PID 0x0053 they will expose both a
QMI port and a WWAN interface.

CC: Hans-Christoph Schemmel <hans-christoph.schemmel@gemalto.com>
CC: Christian Schmiedl <christian.schmiedl@gemalto.com>
CC: Nicolaus Colberg <nicolaus.colberg@gemalto.com>
CC: David McCullough <david.mccullough@accelecon.com>
Signed-off-by: Aleksander Morgado <aleksander@aleksander.es>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index ff5c871..1eddd43 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -732,6 +732,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x1bc7, 0x1201, 2)},	/* Telit LE920 */
 	{QMI_FIXED_INTF(0x0b3c, 0xc005, 6)},    /* Olivetti Olicard 200 */
 	{QMI_FIXED_INTF(0x1e2d, 0x0060, 4)},	/* Cinterion PLxx */
+	{QMI_FIXED_INTF(0x1e2d, 0x0053, 4)},	/* Cinterion PHxx,PXxx */
 
 	/* 4. Gobi 1000 devices */
 	{QMI_GOBI1K_DEVICE(0x05c6, 0x9212)},	/* Acer Gobi Modem Device */
-- 
1.8.5.3

^ permalink raw reply related

* Re: 3.14-mw regression: rtl8169 WARNING: DMA-API: exceeded 7 overlapping mappings of pfn 55ebe
From: Dan Williams @ 2014-02-12 14:56 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Sander Eikelenboom, Konrad Rzeszutek Wilk, Wei Liu,
	Francois Romieu, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, Dave Jones,
	Russell King - ARM Linux
In-Reply-To: <1392178661.1752.1.camel@edumazet-glaptop2.roam.corp.google.com>

On Tue, Feb 11, 2014 at 8:17 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2014-02-11 at 18:07 -0800, Dan Williams wrote:
>
>> The overlap granularity is too large.  Multiple dma_map_single
>> mappings are allowed to a given page as long as they don't collide on
>> the same cache line.
>>
>
> I am not sure why you try number of mappings of a page.

For this debug facility I am tracking whether dma has completed by
making sure there are no active dma_map entries in the address range
of a page being cow'd.

> Try launching 100 concurrent netperf -t TCP_SENFILE
>
> Same page might be mapped more than 100 times, more than 10000 times in
> some cases.
>

Aren't these mapping serialized by the device to some extent?
Although multi-queue / multi-device would even defeat that...

Hmm, then I think at a minimum the activity tracking needs to be
constrained to overlapping DMA_FROM_DEVICE or DMA_BIDIRECTIONAL
mappings.  However, I am still operating on the assumption that some
architectures (especially non-io-coherent or dmabounce architectures)
expect a dma mapping to reflect exclusive ownership of the buffer.
>From the conversation I had with Russell, back in the day [1]:

"When we get to the second async_xor(), as we haven't started to run any
of these operations, the source and destination buffers are still mapped.
However, we ignore that and call dma_map_page() on them again - this is
illegal because the CPU does not own these buffers."

It might be the case that we can't have a general overlap detection
facility as it will flag stable use cases that nonetheless violate the
exclusivity expectation.

--
Dan

[1]: http://marc.info/?l=linux-arm-kernel&m=129389649101566&w=2

^ permalink raw reply

* Re: [PATCH] net: qmi_wwan: add support for Cinterion PXS8 and PHS8
From: Bjørn Mork @ 2014-02-12 15:25 UTC (permalink / raw)
  To: Aleksander Morgado
  Cc: davem-fT/PcQaiUtIeIZ0/mPfg9Q, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-usb-u79uwXL29TY76Z2rM5mHXA, Hans-Christoph Schemmel,
	Christian Schmiedl, Nicolaus Colberg, David McCullough
In-Reply-To: <1392216914-11520-1-git-send-email-aleksander-Dvg4H30XQSRVIjRurl1/8g@public.gmane.org>

Aleksander Morgado <aleksander-Dvg4H30XQSRVIjRurl1/8g@public.gmane.org> writes:

> When the PXS8 and PHS8 devices show up with PID 0x0053 they will expose both a
> QMI port and a WWAN interface.
>
> CC: Hans-Christoph Schemmel <hans-christoph.schemmel-av01+Y74gORBDgjK7y7TUQ@public.gmane.org>
> CC: Christian Schmiedl <christian.schmiedl-av01+Y74gORBDgjK7y7TUQ@public.gmane.org>
> CC: Nicolaus Colberg <nicolaus.colberg-av01+Y74gORBDgjK7y7TUQ@public.gmane.org>
> CC: David McCullough <david.mccullough-BMjRkEEyyBKaMJb+Lgu22Q@public.gmane.org>
> Signed-off-by: Aleksander Morgado <aleksander-Dvg4H30XQSRVIjRurl1/8g@public.gmane.org>

Acked-by: Bjørn Mork <bjorn-yOkvZcmFvRU@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] [CLEANUP] tcp: remove unused min_cwnd member of tcp_congestion_ops
From: Yuchung Cheng @ 2014-02-12 15:42 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: David Miller, kuznet, jmorris, yoshfuji, kaber, Eric Dumazet,
	Neal Cardwell, netdev
In-Reply-To: <1392212121-28574-1-git-send-email-stfomichev@yandex-team.ru>

On Wed, Feb 12, 2014 at 5:35 AM, Stanislav Fomichev
<stfomichev@yandex-team.ru> wrote:
> Commit 684bad110757 "tcp: use PRR to reduce cwin in CWR state" removed all
> calls to min_cwnd, so we can safely remove it.
> Also, remove tcp_reno_min_cwnd because it was only used for min_cwnd.
>
> Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
Acked-by: Yuchung Cheng <ycheng@google.com>

> ---
>  Documentation/networking/tcp.txt |  2 +-
>  include/net/tcp.h                |  3 ---
>  net/ipv4/tcp_cong.c              | 10 ----------
>  net/ipv4/tcp_highspeed.c         |  1 -
>  net/ipv4/tcp_hybla.c             |  1 -
>  net/ipv4/tcp_illinois.c          |  1 -
>  net/ipv4/tcp_lp.c                |  1 -
>  net/ipv4/tcp_scalable.c          |  1 -
>  net/ipv4/tcp_vegas.c             |  1 -
>  net/ipv4/tcp_westwood.c          |  1 -
>  net/ipv4/tcp_yeah.c              |  1 -
>  11 files changed, 1 insertion(+), 22 deletions(-)
>
> diff --git a/Documentation/networking/tcp.txt b/Documentation/networking/tcp.txt
> index 7d11bb5dc30a..bdc4c0db51e1 100644
> --- a/Documentation/networking/tcp.txt
> +++ b/Documentation/networking/tcp.txt
> @@ -30,7 +30,7 @@ A congestion control mechanism can be registered through functions in
>  tcp_cong.c. The functions used by the congestion control mechanism are
>  registered via passing a tcp_congestion_ops struct to
>  tcp_register_congestion_control. As a minimum name, ssthresh,
> -cong_avoid, min_cwnd must be valid.
> +cong_avoid must be valid.
>
>  Private data for a congestion control mechanism is stored in tp->ca_priv.
>  tcp_ca(tp) returns a pointer to this space.  This is preallocated space - it
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 70e55d200610..8ccc431f7269 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -793,8 +793,6 @@ struct tcp_congestion_ops {
>
>         /* return slow start threshold (required) */
>         u32 (*ssthresh)(struct sock *sk);
> -       /* lower bound for congestion window (optional) */
> -       u32 (*min_cwnd)(const struct sock *sk);
>         /* do new cwnd calculation (required) */
>         void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
>         /* call before changing ca_state (optional) */
> @@ -829,7 +827,6 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
>  extern struct tcp_congestion_ops tcp_init_congestion_ops;
>  u32 tcp_reno_ssthresh(struct sock *sk);
>  void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
> -u32 tcp_reno_min_cwnd(const struct sock *sk);
>  extern struct tcp_congestion_ops tcp_reno;
>
>  static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
> diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
> index ad37bf18ae4b..f49351edf97d 100644
> --- a/net/ipv4/tcp_cong.c
> +++ b/net/ipv4/tcp_cong.c
> @@ -362,21 +362,12 @@ u32 tcp_reno_ssthresh(struct sock *sk)
>  }
>  EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
>
> -/* Lower bound on congestion window with halving. */
> -u32 tcp_reno_min_cwnd(const struct sock *sk)
> -{
> -       const struct tcp_sock *tp = tcp_sk(sk);
> -       return tp->snd_ssthresh/2;
> -}
> -EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
> -
>  struct tcp_congestion_ops tcp_reno = {
>         .flags          = TCP_CONG_NON_RESTRICTED,
>         .name           = "reno",
>         .owner          = THIS_MODULE,
>         .ssthresh       = tcp_reno_ssthresh,
>         .cong_avoid     = tcp_reno_cong_avoid,
> -       .min_cwnd       = tcp_reno_min_cwnd,
>  };
>
>  /* Initial congestion control used (until SYN)
> @@ -388,6 +379,5 @@ struct tcp_congestion_ops tcp_init_congestion_ops  = {
>         .owner          = THIS_MODULE,
>         .ssthresh       = tcp_reno_ssthresh,
>         .cong_avoid     = tcp_reno_cong_avoid,
> -       .min_cwnd       = tcp_reno_min_cwnd,
>  };
>  EXPORT_SYMBOL_GPL(tcp_init_congestion_ops);
> diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
> index 8ed9305dfdf4..8b9e7bad77c0 100644
> --- a/net/ipv4/tcp_highspeed.c
> +++ b/net/ipv4/tcp_highspeed.c
> @@ -162,7 +162,6 @@ static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
>         .init           = hstcp_init,
>         .ssthresh       = hstcp_ssthresh,
>         .cong_avoid     = hstcp_cong_avoid,
> -       .min_cwnd       = tcp_reno_min_cwnd,
>
>         .owner          = THIS_MODULE,
>         .name           = "highspeed"
> diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
> index 478fe82611bf..2a1a9e2a4e51 100644
> --- a/net/ipv4/tcp_hybla.c
> +++ b/net/ipv4/tcp_hybla.c
> @@ -166,7 +166,6 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
>  static struct tcp_congestion_ops tcp_hybla __read_mostly = {
>         .init           = hybla_init,
>         .ssthresh       = tcp_reno_ssthresh,
> -       .min_cwnd       = tcp_reno_min_cwnd,
>         .cong_avoid     = hybla_cong_avoid,
>         .set_state      = hybla_state,
>
> diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
> index 8a520996f3d2..560d9879b89c 100644
> --- a/net/ipv4/tcp_illinois.c
> +++ b/net/ipv4/tcp_illinois.c
> @@ -329,7 +329,6 @@ static struct tcp_congestion_ops tcp_illinois __read_mostly = {
>         .flags          = TCP_CONG_RTT_STAMP,
>         .init           = tcp_illinois_init,
>         .ssthresh       = tcp_illinois_ssthresh,
> -       .min_cwnd       = tcp_reno_min_cwnd,
>         .cong_avoid     = tcp_illinois_cong_avoid,
>         .set_state      = tcp_illinois_state,
>         .get_info       = tcp_illinois_info,
> diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
> index 991d62a2f9bb..503798f2fcd6 100644
> --- a/net/ipv4/tcp_lp.c
> +++ b/net/ipv4/tcp_lp.c
> @@ -319,7 +319,6 @@ static struct tcp_congestion_ops tcp_lp __read_mostly = {
>         .init = tcp_lp_init,
>         .ssthresh = tcp_reno_ssthresh,
>         .cong_avoid = tcp_lp_cong_avoid,
> -       .min_cwnd = tcp_reno_min_cwnd,
>         .pkts_acked = tcp_lp_pkts_acked,
>
>         .owner = THIS_MODULE,
> diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
> index 19ea6c2951f3..0ac50836da4d 100644
> --- a/net/ipv4/tcp_scalable.c
> +++ b/net/ipv4/tcp_scalable.c
> @@ -39,7 +39,6 @@ static u32 tcp_scalable_ssthresh(struct sock *sk)
>  static struct tcp_congestion_ops tcp_scalable __read_mostly = {
>         .ssthresh       = tcp_scalable_ssthresh,
>         .cong_avoid     = tcp_scalable_cong_avoid,
> -       .min_cwnd       = tcp_reno_min_cwnd,
>
>         .owner          = THIS_MODULE,
>         .name           = "scalable",
> diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
> index 06cae62bf208..a022c17c9cf1 100644
> --- a/net/ipv4/tcp_vegas.c
> +++ b/net/ipv4/tcp_vegas.c
> @@ -310,7 +310,6 @@ static struct tcp_congestion_ops tcp_vegas __read_mostly = {
>         .init           = tcp_vegas_init,
>         .ssthresh       = tcp_reno_ssthresh,
>         .cong_avoid     = tcp_vegas_cong_avoid,
> -       .min_cwnd       = tcp_reno_min_cwnd,
>         .pkts_acked     = tcp_vegas_pkts_acked,
>         .set_state      = tcp_vegas_state,
>         .cwnd_event     = tcp_vegas_cwnd_event,
> diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
> index 76a1e23259e1..b94a04ae2ed5 100644
> --- a/net/ipv4/tcp_westwood.c
> +++ b/net/ipv4/tcp_westwood.c
> @@ -276,7 +276,6 @@ static struct tcp_congestion_ops tcp_westwood __read_mostly = {
>         .init           = tcp_westwood_init,
>         .ssthresh       = tcp_reno_ssthresh,
>         .cong_avoid     = tcp_reno_cong_avoid,
> -       .min_cwnd       = tcp_westwood_bw_rttmin,
>         .cwnd_event     = tcp_westwood_event,
>         .get_info       = tcp_westwood_info,
>         .pkts_acked     = tcp_westwood_pkts_acked,
> diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
> index a347a078ee07..5d2d15c5295f 100644
> --- a/net/ipv4/tcp_yeah.c
> +++ b/net/ipv4/tcp_yeah.c
> @@ -231,7 +231,6 @@ static struct tcp_congestion_ops tcp_yeah __read_mostly = {
>         .init           = tcp_yeah_init,
>         .ssthresh       = tcp_yeah_ssthresh,
>         .cong_avoid     = tcp_yeah_cong_avoid,
> -       .min_cwnd       = tcp_reno_min_cwnd,
>         .set_state      = tcp_vegas_state,
>         .cwnd_event     = tcp_vegas_cwnd_event,
>         .get_info       = tcp_vegas_get_info,
> --
> 1.8.3.2
>

^ permalink raw reply

* Re: [PATCH] net: clear iflink when moving to a new netns
From: Nicolas Dichtel @ 2014-02-12 15:43 UTC (permalink / raw)
  To: Cong Wang, netdev
  Cc: David S. Miller, Eric W. Biederman, Eric Dumazet,
	Hannes Frederic Sowa, Cong Wang
In-Reply-To: <1392162690-6647-1-git-send-email-xiyou.wangcong@gmail.com>

Le 12/02/2014 00:51, Cong Wang a écrit :
> From: Cong Wang <cwang@twopensource.com>
>
> BZ: https://bugzilla.kernel.org/show_bug.cgi?id=66691
>
> macvlan and vlan both use iflink to identify its lower device,
> however, after such device is moved to the new netns, its iflink
> would become meaningless as ifindex is per netns. So, instead of
> forbid them moving to another netns, just clear this field so that
> it will not be dumped at least.
>
> Cc: David S. Miller <davem@davemloft.net>
> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: Eric Dumazet <eric.dumazet@gmail.com>
> Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>,
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
> Signed-off-by: Cong Wang <cwang@twopensource.com>
I wonder if this patch breaks things in ip tunnels.
For example, ip6_tunnel uses iflink to find tunnels that are bound to an interface.
If you reset this field, ipip6_tunnel_lookup() will fail when the tunnel moves
to another netns.

^ permalink raw reply

* Re: Experimental Privacy Functions and TCP SYN Payloads
From: Yuchung Cheng @ 2014-02-12 15:51 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: Quinn Wood, linux-kernel@vger.kernel.org, netdev
In-Reply-To: <52FB5C87.50408@iogearbox.net>

On Wed, Feb 12, 2014 at 3:35 AM, Daniel Borkmann <borkmann@iogearbox.net> wrote:
> (please cc netdev)
>
> On 02/12/2014 11:25 AM, Quinn Wood wrote:
>>
>> If program on host A spoofs the source address of an outgoing IPv4 packet
>> then
>> places that address in the first 32 bits of a UDP payload, a program on
>> host B
>> that is aware of these behaviors can still reply to the program on host A.
>> [1]
>>
>> Continuing with this approach the program on host A could encrypt the UDP
>> pay-
>> load in a way that the program on host B can decrypt, and effectively
>> reduce
>> the ability of others in the wide network to passively determine who host
>> A is
>> sending transmissions to while simultaneously ensuring the program on host
>> B
>> can respond to the program on host A. [2]
>>
>> I'm uncertain how to proceed if I want to use TCP for stateful
>> connections.
>> The requirement of a handshake before data is handed off to the program
>> means
>> this approach won't work out of the box. I'm looking for any insight folks
>> may
>> have regarding this.
>>
>> My original approach to the handshake included setting one of the reserved
>> bits in the TCP header to indicate the first 32 bits of the payload were
>> the
>> real source address. However this would be reliant on SYN packets
>> containing
>> a payload. Does the Linux kernel allow this?
For 3.7+ you can use TCP Fast Open.

For a quick trial experiment, you can just set
sysctl net.ipv4.tcp_fastopen=0x603 on both end hosts and use
sendmsg(..., MSG_FASTOPEN) instead of connect() then send(). the
sendmsg() will behave as a combo call of connect() and send() and
return similar errno. accept() will return after data in the SYN is
received instead of after handshake is completed.

>>
>> -
>>
>> [1] Barring any non store-and-forward network behavior like dropping
>> packets
>>      with questionable source addresses. Considering recent NTP-related
>> news
>>      this seems to be a not-entirely common activity :)
>> [2] This is of course reliant on both programs knowing the proper key for
>> the
>>      other.
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox