Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH iproute2 v2] ip link: Fix crash on older kernels when show VF dev
From: Vadim Kochan @ 2015-01-09 19:22 UTC (permalink / raw)
  To: netdev; +Cc: william, Vadim Kochan, Vadim Kochan

From: Vadim Kochan <vadim4j@gmail.com>

The issue was caused that ifla_vf_rate does not exist on
older kernels and should be checked if it exists as nested attr.

Signed-off-by: Vadim Kochan <vadim4j@gmail.net>
Reported-by: William Dauchy <william@gandi.net>
Tested-by: William Dauchy <william@gandi.net>
---
 ip/ipaddress.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/ip/ipaddress.c b/ip/ipaddress.c
index 28dfe8c..830b166 100644
--- a/ip/ipaddress.c
+++ b/ip/ipaddress.c
@@ -259,11 +259,10 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
 {
 	struct ifla_vf_mac *vf_mac;
 	struct ifla_vf_vlan *vf_vlan;
-	struct ifla_vf_rate *vf_rate;
 	struct ifla_vf_tx_rate *vf_tx_rate;
 	struct ifla_vf_spoofchk *vf_spoofchk;
 	struct ifla_vf_link_state *vf_linkstate;
-	struct rtattr *vf[IFLA_VF_MAX+1];
+	struct rtattr *vf[IFLA_VF_MAX + 1] = {};
 	struct rtattr *tmp;
 	SPRINT_BUF(b1);
 
@@ -277,7 +276,6 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
 	vf_mac = RTA_DATA(vf[IFLA_VF_MAC]);
 	vf_vlan = RTA_DATA(vf[IFLA_VF_VLAN]);
 	vf_tx_rate = RTA_DATA(vf[IFLA_VF_TX_RATE]);
-	vf_rate = RTA_DATA(vf[IFLA_VF_RATE]);
 
 	/* Check if the spoof checking vf info type is supported by
 	 * this kernel.
@@ -313,10 +311,16 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
 		fprintf(fp, ", qos %d", vf_vlan->qos);
 	if (vf_tx_rate->rate)
 		fprintf(fp, ", tx rate %d (Mbps)", vf_tx_rate->rate);
-	if (vf_rate->max_tx_rate)
-		fprintf(fp, ", max_tx_rate %dMbps", vf_rate->max_tx_rate);
-	if (vf_rate->min_tx_rate)
-		fprintf(fp, ", min_tx_rate %dMbps", vf_rate->min_tx_rate);
+
+	if (vf[IFLA_VF_RATE]) {
+		struct ifla_vf_rate *vf_rate = RTA_DATA(vf[IFLA_VF_RATE]);
+
+		if (vf_rate->max_tx_rate)
+			fprintf(fp, ", max_tx_rate %dMbps", vf_rate->max_tx_rate);
+		if (vf_rate->min_tx_rate)
+			fprintf(fp, ", min_tx_rate %dMbps", vf_rate->min_tx_rate);
+	}
+
 	if (vf_spoofchk && vf_spoofchk->setting != -1) {
 		if (vf_spoofchk->setting)
 			fprintf(fp, ", spoof checking on");
-- 
2.1.3

^ permalink raw reply related

* [PATCH iproute2 v3] ip link: Fix crash on older kernels when show VF dev
From: Vadim Kochan @ 2015-01-09 19:24 UTC (permalink / raw)
  To: netdev; +Cc: william, Vadim Kochan

From: Vadim Kochan <vadim4j@gmail.com>

The issue was caused that ifla_vf_rate does not exist on
older kernels and should be checked if it exists as nested attr.

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
Reported-by: William Dauchy <william@gandi.net>
Tested-by: William Dauchy <william@gandi.net>
---
 ip/ipaddress.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/ip/ipaddress.c b/ip/ipaddress.c
index 28dfe8c..830b166 100644
--- a/ip/ipaddress.c
+++ b/ip/ipaddress.c
@@ -259,11 +259,10 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
 {
 	struct ifla_vf_mac *vf_mac;
 	struct ifla_vf_vlan *vf_vlan;
-	struct ifla_vf_rate *vf_rate;
 	struct ifla_vf_tx_rate *vf_tx_rate;
 	struct ifla_vf_spoofchk *vf_spoofchk;
 	struct ifla_vf_link_state *vf_linkstate;
-	struct rtattr *vf[IFLA_VF_MAX+1];
+	struct rtattr *vf[IFLA_VF_MAX + 1] = {};
 	struct rtattr *tmp;
 	SPRINT_BUF(b1);
 
@@ -277,7 +276,6 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
 	vf_mac = RTA_DATA(vf[IFLA_VF_MAC]);
 	vf_vlan = RTA_DATA(vf[IFLA_VF_VLAN]);
 	vf_tx_rate = RTA_DATA(vf[IFLA_VF_TX_RATE]);
-	vf_rate = RTA_DATA(vf[IFLA_VF_RATE]);
 
 	/* Check if the spoof checking vf info type is supported by
 	 * this kernel.
@@ -313,10 +311,16 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
 		fprintf(fp, ", qos %d", vf_vlan->qos);
 	if (vf_tx_rate->rate)
 		fprintf(fp, ", tx rate %d (Mbps)", vf_tx_rate->rate);
-	if (vf_rate->max_tx_rate)
-		fprintf(fp, ", max_tx_rate %dMbps", vf_rate->max_tx_rate);
-	if (vf_rate->min_tx_rate)
-		fprintf(fp, ", min_tx_rate %dMbps", vf_rate->min_tx_rate);
+
+	if (vf[IFLA_VF_RATE]) {
+		struct ifla_vf_rate *vf_rate = RTA_DATA(vf[IFLA_VF_RATE]);
+
+		if (vf_rate->max_tx_rate)
+			fprintf(fp, ", max_tx_rate %dMbps", vf_rate->max_tx_rate);
+		if (vf_rate->min_tx_rate)
+			fprintf(fp, ", min_tx_rate %dMbps", vf_rate->min_tx_rate);
+	}
+
 	if (vf_spoofchk && vf_spoofchk->setting != -1) {
 		if (vf_spoofchk->setting)
 			fprintf(fp, ", spoof checking on");
-- 
2.1.3

^ permalink raw reply related

* Re: [PATCH iproute2] ip link: Fix crash on older kernels when show VF dev
From: Vadim Kochan @ 2015-01-09 19:25 UTC (permalink / raw)
  To: William Dauchy; +Cc: Vadim Kochan, netdev
In-Reply-To: <20150109190656.GA25275@angus-think.lan>

On Fri, Jan 09, 2015 at 09:06:56PM +0200, Vadim Kochan wrote:
> On Fri, Jan 09, 2015 at 06:55:57PM +0100, William Dauchy wrote:
> > On Jan09 19:25, Vadim Kochan wrote:
> > > From: Vadim Kochan <vadim4j@gmail.com>
> > > 
> > > The issue was caused that ifla_vf_rate does not exist on
> > > older kernels and should be checked if it exists as nested attr.
> > > 
> > > Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
> > > Reported-by: William Dauchy <william@gandi.net>
> > > Tested-by: William Dauchy <william@gandi.com>
> > 
> > gandi.net actually ;)
> > 
> > Thanks,
> > 
> 
> Sorry, I will re-send.
> 

Uff, sent v3.

^ permalink raw reply

* Re: route/max_size sysctl in ipv4
From: Cong Wang @ 2015-01-09 19:37 UTC (permalink / raw)
  To: Ani Sinha; +Cc: Eric Dumazet, David Miller, netdev@vger.kernel.org
In-Reply-To: <CAOxq_8Na14ES1souk-jBa8A=9ArHc-ffRtSd36zbvwHvGM+24Q@mail.gmail.com>

On Fri, Jan 9, 2015 at 11:08 AM, Ani Sinha <ani@arista.com> wrote:
>
> Perhaps. What I am truly confused about is :
>
> - We are keeping a sysctl interface that does absolutely nothing in
> the kernel and is completely useless in case some userland
> scripts/tools are rendered broken from it's removal.

I am all for getting rid of it, sane script should always check
the sysctl existence first. I think we did remove some sysctl's
from kernel before.

>
> - surprisingly, we contradict ourselves when we let scripts break when
> running from a child namespace because the  same sysctl is no longer
> available!
>

I am not sure how exactly your script is broken, but it should
test the existence of any /proc file before reading it, even when
you don't use netns, because this could be a new sysctl introduced
recently (probably not the case for ip_rt_max_size, but you get my point).

^ permalink raw reply

* Re: [PATCH net-next RFC 1/5] net-timestamp: no-payload option
From: Andy Lutomirski @ 2015-01-09 19:43 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Network Development, David S. Miller, Richard Cochran,
	Eric Dumazet
In-Reply-To: <1420824719-28848-2-git-send-email-willemb@google.com>

On Fri, Jan 9, 2015 at 9:31 AM, Willem de Bruijn <willemb@google.com> wrote:
> From: Willem de Bruijn <willemb@google.com>
>
> Add timestamping option SOF_TIMESTAMPING_OPT_TSONLY. For transmit
> timestamps, this loops timestamps on top of empty packets.
>
> Doing so reduces the pressure on SO_RCVBUF. Payload inspection and
> cmsg reception (aside from timestamps) are no longer possible. This
> works together with a follow on patch that allows administrators to
> only allow tx timestamping if it does not loop payload or metadata.

If this loses IP_PKTINFO, that will be a bit unfortunate.

--Andy

>
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---
>  include/uapi/linux/net_tstamp.h |  3 ++-
>  net/core/skbuff.c               | 19 ++++++++++++++-----
>  net/ipv4/ip_sockglue.c          |  9 +++++----
>  net/ipv6/datagram.c             |  4 ++--
>  net/rxrpc/ar-error.c            |  5 +++++
>  5 files changed, 28 insertions(+), 12 deletions(-)
>
> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
> index edbc888..6d1abea 100644
> --- a/include/uapi/linux/net_tstamp.h
> +++ b/include/uapi/linux/net_tstamp.h
> @@ -24,8 +24,9 @@ enum {
>         SOF_TIMESTAMPING_TX_SCHED = (1<<8),
>         SOF_TIMESTAMPING_TX_ACK = (1<<9),
>         SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
> +       SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
>
> -       SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG,
> +       SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY,
>         SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
>                                  SOF_TIMESTAMPING_LAST
>  };
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 5a2a2e8..ece2bb8 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -3710,19 +3710,28 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
>                      struct sock *sk, int tstype)
>  {
>         struct sk_buff *skb;
> +       bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
>
>         if (!sk)
>                 return;
>
> -       if (hwtstamps)
> -               *skb_hwtstamps(orig_skb) = *hwtstamps;
> +       if (tsonly)
> +               skb = alloc_skb(0, GFP_ATOMIC);
>         else
> -               orig_skb->tstamp = ktime_get_real();
> -
> -       skb = skb_clone(orig_skb, GFP_ATOMIC);
> +               skb = skb_clone(orig_skb, GFP_ATOMIC);
>         if (!skb)
>                 return;
>
> +       if (tsonly) {
> +               skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags;
> +               skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
> +       }
> +
> +       if (hwtstamps)
> +               *skb_hwtstamps(skb) = *hwtstamps;
> +       else
> +               skb->tstamp = ktime_get_real();
> +
>         __skb_complete_tx_timestamp(skb, sk, tstype);
>  }
>  EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> index a317797..d81ef70 100644
> --- a/net/ipv4/ip_sockglue.c
> +++ b/net/ipv4/ip_sockglue.c
> @@ -440,7 +440,7 @@ static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk,
>
>         if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) ||
>             (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
> -           (!skb->dev))
> +           (!skb->dev) || (!skb->len))
>                 return false;
>
>         info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
> @@ -483,7 +483,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
>
>         serr = SKB_EXT_ERR(skb);
>
> -       if (sin) {
> +       if (sin && skb->len) {
>                 sin->sin_family = AF_INET;
>                 sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
>                                                    serr->addr_offset);
> @@ -496,8 +496,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
>         sin = &errhdr.offender;
>         sin->sin_family = AF_UNSPEC;
>
> -       if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
> -           ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
> +       if (skb->len &&
> +           (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
> +            ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) {
>                 struct inet_sock *inet = inet_sk(sk);
>
>                 sin->sin_family = AF_INET;
> diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> index 100c589..91a31ea 100644
> --- a/net/ipv6/datagram.c
> +++ b/net/ipv6/datagram.c
> @@ -369,7 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
>
>         serr = SKB_EXT_ERR(skb);
>
> -       if (sin) {
> +       if (sin && skb->len) {
>                 const unsigned char *nh = skb_network_header(skb);
>                 sin->sin6_family = AF_INET6;
>                 sin->sin6_flowinfo = 0;
> @@ -394,7 +394,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
>         memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
>         sin = &errhdr.offender;
>         sin->sin6_family = AF_UNSPEC;
> -       if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
> +       if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) {
>                 sin->sin6_family = AF_INET6;
>                 sin->sin6_flowinfo = 0;
>                 sin->sin6_port = 0;
> diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
> index 74c0fcd..5394b6b 100644
> --- a/net/rxrpc/ar-error.c
> +++ b/net/rxrpc/ar-error.c
> @@ -42,6 +42,11 @@ void rxrpc_UDP_error_report(struct sock *sk)
>                 _leave("UDP socket errqueue empty");
>                 return;
>         }
> +       if (!skb->len) {
> +               _leave("UDP empty message");
> +               kfree_skb(skb);
> +               return;
> +       }
>
>         rxrpc_new_skb(skb);
>
> --
> 2.2.0.rc0.207.ga3a616c
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply

* Re: [PATCH net-next v2] tcp: avoid reducing cwnd when ACK+DSACK is received
From: Yuchung Cheng @ 2015-01-09 19:43 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Neal Cardwell, Sébastien Barré, David Miller, Netdev,
	Gregory Detal, Nandita Dukkipati
In-Reply-To: <1420734325.5947.61.camel@edumazet-glaptop2.roam.corp.google.com>

On Thu, Jan 8, 2015 at 8:25 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> On Thu, 2015-01-08 at 10:49 -0500, Neal Cardwell wrote:
> > On Thu, Jan 8, 2015 at 10:39 AM, Sébastien Barré
> > <sebastien.barre@uclouvain.be> wrote:
> > > What do you and Neal think ?
> >
> > My preference is to have the whole expression detecting the case where
> > the receiver got the probe packet encoded in a single expression. I
> > don't have a strong feeling about whether it should be stored in a
> > bool (to reduce the size of the diff) or written directly into the if
> > () expression (to reduce the size of the code). I'll defer to Eric on
> > which he thinks is better. :-)
>
> There is no shame using helpers with nice names to help understand this
> TCP stack. Even if the helper is used exactly once.
>
> In this case, it seems we test 2 different conditions, so this could use
> 2 helpers with self describing names.
>
> When I see :
>
> > +     if (((ack == tp->tlp_high_seq) &&
> > +          !(flag & (FLAG_SND_UNA_ADVANCED |
> > +                    FLAG_NOT_DUP | FLAG_DATA_SACKED))) ||
> > +         (!before(ack, tp->tlp_high_seq) && (flag & FLAG_DSACKING_ACK))) {
> >               tp->tlp_high_seq = 0;
>
> My brain hurts.
Sebastien: I suggest breaking down by ACK types for readability. e.g.,

/* This routine deals with acks during a TLP episode.
 * We mark the end of a TLP episode on receiving TLP dupack or when
 * ack is after tlp_high_seq.
 * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
 */
static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
{
        struct tcp_sock *tp = tcp_sk(sk);

        if (before(ack, tp->tlp_high_seq))
                return;

        if (flag & FLAG_DSACKING_ACK) {
                /* This DSACK means original and TLP probe arrived; no loss */
                tp->tlp_high_seq = 0;
        } else if (after(ack, tp->tlp_high_seq)) {
                /* ACK advances: there was a loss, so reduce cwnd. Reset
                 * tlp_high_seq in tcp_init_cwnd_reduction()
                 */
                tcp_init_cwnd_reduction(sk);
                tcp_set_ca_state(sk, TCP_CA_CWR);
                tcp_end_cwnd_reduction(sk);
                tcp_try_keep_open(sk);
                NET_INC_STATS_BH(sock_net(sk),
                                 LINUX_MIB_TCPLOSSPROBERECOVERY);
        } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
                             FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
                /* Pure dupack: original and TLP probe arrived; no loss */
                tp->tlp_high_seq = 0;
        }
}

>
>

^ permalink raw reply

* Re: [PATCH net-next RFC 1/5] net-timestamp: no-payload option
From: Willem de Bruijn @ 2015-01-09 19:47 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Network Development, David S. Miller, Richard Cochran,
	Eric Dumazet
In-Reply-To: <CALCETrU2kFYsYnhPnBWd3059k5Z6=8B==kwXU0xidAsR6EqgCg@mail.gmail.com>

On Fri, Jan 9, 2015 at 2:43 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Fri, Jan 9, 2015 at 9:31 AM, Willem de Bruijn <willemb@google.com> wrote:
>> From: Willem de Bruijn <willemb@google.com>
>>
>> Add timestamping option SOF_TIMESTAMPING_OPT_TSONLY. For transmit
>> timestamps, this loops timestamps on top of empty packets.
>>
>> Doing so reduces the pressure on SO_RCVBUF. Payload inspection and
>> cmsg reception (aside from timestamps) are no longer possible. This
>> works together with a follow on patch that allows administrators to
>> only allow tx timestamping if it does not loop payload or metadata.
>
> If this loses IP_PKTINFO, that will be a bit unfortunate.
>

If it doesn't, then we might as well loop the entire payload. For applications
that need pktinfo or other cmsg, do not select the option.

^ permalink raw reply

* Re: [PATCH net-next RFC 1/5] net-timestamp: no-payload option
From: Andy Lutomirski @ 2015-01-09 20:02 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Network Development, David S. Miller, Richard Cochran,
	Eric Dumazet
In-Reply-To: <CA+FuTSf4SqTuLmrJ971Wu3eaqc94qd93M_OCa9QCCDHOYwxF8A@mail.gmail.com>

On Fri, Jan 9, 2015 at 11:47 AM, Willem de Bruijn <willemb@google.com> wrote:
> On Fri, Jan 9, 2015 at 2:43 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>> On Fri, Jan 9, 2015 at 9:31 AM, Willem de Bruijn <willemb@google.com> wrote:
>>> From: Willem de Bruijn <willemb@google.com>
>>>
>>> Add timestamping option SOF_TIMESTAMPING_OPT_TSONLY. For transmit
>>> timestamps, this loops timestamps on top of empty packets.
>>>
>>> Doing so reduces the pressure on SO_RCVBUF. Payload inspection and
>>> cmsg reception (aside from timestamps) are no longer possible. This
>>> works together with a follow on patch that allows administrators to
>>> only allow tx timestamping if it does not loop payload or metadata.
>>
>> If this loses IP_PKTINFO, that will be a bit unfortunate.
>>
>
> If it doesn't, then we might as well loop the entire payload. For applications
> that need pktinfo or other cmsg, do not select the option.

Right, but it loses the ability to get the ifindex if the sysctl is
set to the conservative option, which I don't think is desirable.

--Andy

-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply

* Re: [PATCH] net/fsl: Add mEMAC MDIO support to XGMAC MDIO
From: Emil Medve @ 2015-01-09 20:11 UTC (permalink / raw)
  To: shh.xie, netdev, davem; +Cc: Andy Fleming, Shaohui Xie
In-Reply-To: <1420364162-13109-1-git-send-email-shh.xie@gmail.com>

Hello Shao-Hui,


On 01/04/2015 03:36 AM, shh.xie@gmail.com wrote:
> From: Andy Fleming <afleming@gmail.com>
> 
> The Freescale mEMAC supports operating at 10/100/1000/10G, and
> its associated MDIO controller is likewise capable of operating
> both Clause 22 and Clause 45 MDIO buses. It is nearly identical
> to the MDIO controller on the XGMAC, so we just modify that
> driver.
> 
> Portions of this driver developed by:
> 
> Sandeep Singh <sandeep@freescale.com>
> Roy Zang <tie-fei.zang@freescale.com>
> 
> Signed-off-by: Andy Fleming <afleming@gmail.com>
> Signed-off-by: Shaohui Xie <Shaohui.Xie@freescale.com>
> ---
>  drivers/net/ethernet/freescale/Kconfig      |  3 +-
>  drivers/net/ethernet/freescale/xgmac_mdio.c | 64 ++++++++++++++++++++++++-----
>  2 files changed, 55 insertions(+), 12 deletions(-)
> 
> 	...
> 
> diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
> index a352445..e0fc3d1 100644
> --- a/drivers/net/ethernet/freescale/xgmac_mdio.c
> +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
> 
> 	...
> 
> @@ -123,21 +144,39 @@ static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 val
>  static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
>  {
>  	struct tgec_mdio_controller __iomem *regs = bus->priv;
> -	uint16_t dev_addr = regnum >> 16;
> +	uint16_t dev_addr;
> +	uint32_t mdio_stat;
>  	uint32_t mdio_ctl;
>  	uint16_t value;
>  	int ret;
>  
> +	mdio_stat = in_be32(&regs->mdio_stat);
> +	if (regnum & MII_ADDR_C45) {
> +		dev_addr = (regnum >> 16) & 0x1f;
> +		mdio_stat |= MDIO_STAT_ENC;
> +	} else {
> +		dev_addr = regnum & 0x1f;
> +		mdio_stat = ~MDIO_STAT_ENC;

Shouldn't this be 'mdio_stat &= ~MDIO_STAT_ENC'?


Cheers,

^ permalink raw reply

* [PATCH] MAINTAINERS: add me as ibmveth maintainer
From: Thomas Falcon @ 2015-01-09 20:29 UTC (permalink / raw)
  To: netdev; +Cc: Santiago Leon, Brian King, Nathan Fontenot

Adding myself as the ibmveth maintainer and replacing
Santiago Leon.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Cc: Santiago Leon <santi_leon@yahoo.com>
Cc: Brian King <brking@linux.vnet.ibm.com>
Cc: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index ddb9ac8..5cb9f6f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4748,7 +4748,7 @@ S:	Supported
 F:	drivers/scsi/ipr.*
 
 IBM Power Virtual Ethernet Device Driver
-M:	Santiago Leon <santil@linux.vnet.ibm.com>
+M:	Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/ibm/ibmveth.*
-- 
1.9.3

^ permalink raw reply related

* Re: [PATCH net-next RFC 1/5] net-timestamp: no-payload option
From: Willem de Bruijn @ 2015-01-09 20:33 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Network Development, David S. Miller, Richard Cochran,
	Eric Dumazet
In-Reply-To: <CALCETrWZy0=OnYhyCB8U7Km7vMcjB+Vc8ahm+ea4=0YiiPBCAQ@mail.gmail.com>

On Fri, Jan 9, 2015 at 3:02 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Fri, Jan 9, 2015 at 11:47 AM, Willem de Bruijn <willemb@google.com> wrote:
>> On Fri, Jan 9, 2015 at 2:43 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>>> On Fri, Jan 9, 2015 at 9:31 AM, Willem de Bruijn <willemb@google.com> wrote:
>>>> From: Willem de Bruijn <willemb@google.com>
>>>>
>>>> Add timestamping option SOF_TIMESTAMPING_OPT_TSONLY. For transmit
>>>> timestamps, this loops timestamps on top of empty packets.
>>>>
>>>> Doing so reduces the pressure on SO_RCVBUF. Payload inspection and
>>>> cmsg reception (aside from timestamps) are no longer possible. This
>>>> works together with a follow on patch that allows administrators to
>>>> only allow tx timestamping if it does not loop payload or metadata.
>>>
>>> If this loses IP_PKTINFO, that will be a bit unfortunate.
>>>
>>
>> If it doesn't, then we might as well loop the entire payload. For applications
>> that need pktinfo or other cmsg, do not select the option.
>
> Right, but it loses the ability to get the ifindex if the sysctl is
> set to the conservative option, which I don't think is desirable.

Understood. I just find the alternative, where the no-data policy is
weakened by exceptions, even less desirable. That makes it
harder to explain what the sysctl/option do and what the security
implications are.

^ permalink raw reply

* Re: [PATCH net-next v2] tcp: avoid reducing cwnd when ACK+DSACK is received
From: Neal Cardwell @ 2015-01-09 20:36 UTC (permalink / raw)
  To: Yuchung Cheng
  Cc: Eric Dumazet, Sébastien Barré, David Miller, Netdev,
	Gregory Detal, Nandita Dukkipati
In-Reply-To: <CAK6E8=dhW=0hRYHjjgBh0KwKOQfzHhwY2egusQ8S4tsoJb0Nsg@mail.gmail.com>

On Fri, Jan 9, 2015 at 2:43 PM, Yuchung Cheng <ycheng@google.com> wrote:
> Sebastien: I suggest breaking down by ACK types for readability. e.g.,

I like this approach, as well.  It breaks up the logic into smaller
pieces that are easier to comment and understand, without having
helper bool flags to read and mentally track. And this version passes
all our internal TLP tests, FWIW.

neal

^ permalink raw reply

* Re: [PATCH net-next RFC 1/5] net-timestamp: no-payload option
From: Andy Lutomirski @ 2015-01-09 20:55 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Network Development, David S. Miller, Richard Cochran,
	Eric Dumazet
In-Reply-To: <CA+FuTSdWWv65CNQYv1d9+WG1X54eGyP_+mbh3dMHw_HmH8wzYA@mail.gmail.com>

On Fri, Jan 9, 2015 at 12:33 PM, Willem de Bruijn <willemb@google.com> wrote:
> On Fri, Jan 9, 2015 at 3:02 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>> On Fri, Jan 9, 2015 at 11:47 AM, Willem de Bruijn <willemb@google.com> wrote:
>>> On Fri, Jan 9, 2015 at 2:43 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>>>> On Fri, Jan 9, 2015 at 9:31 AM, Willem de Bruijn <willemb@google.com> wrote:
>>>>> From: Willem de Bruijn <willemb@google.com>
>>>>>
>>>>> Add timestamping option SOF_TIMESTAMPING_OPT_TSONLY. For transmit
>>>>> timestamps, this loops timestamps on top of empty packets.
>>>>>
>>>>> Doing so reduces the pressure on SO_RCVBUF. Payload inspection and
>>>>> cmsg reception (aside from timestamps) are no longer possible. This
>>>>> works together with a follow on patch that allows administrators to
>>>>> only allow tx timestamping if it does not loop payload or metadata.
>>>>
>>>> If this loses IP_PKTINFO, that will be a bit unfortunate.
>>>>
>>>
>>> If it doesn't, then we might as well loop the entire payload. For applications
>>> that need pktinfo or other cmsg, do not select the option.
>>
>> Right, but it loses the ability to get the ifindex if the sysctl is
>> set to the conservative option, which I don't think is desirable.
>
> Understood. I just find the alternative, where the no-data policy is
> weakened by exceptions, even less desirable. That makes it
> harder to explain what the sysctl/option do and what the security
> implications are.

Agreed.

If there was no-payload but not no-cmsg, then it would be a nice
middle ground, but I guess that's bad for some reason involving
accounting?

--Andy

-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply

* لسلام عليك ورحمة الله
From: BARR ALHARIRI @ 2015-01-09 20:36 UTC (permalink / raw)



لسلام عليكم
مرحبا اخي  انا المدام براءه الحريري من سوريا والحين عندي مشروع بدي اعرضه 
عليك فارجوا منك ترسل لي رساله بالايميل مشان اقدراحكي معك وخبرك بالتفاصيل 
وبانتظار ردك
وفيك تتواصل معي علي هالايميل : barralhariri24@gmail.com
براءه الحريري

^ permalink raw reply

* Re: [PATCH net-next RFC 1/5] net-timestamp: no-payload option
From: Willem de Bruijn @ 2015-01-09 21:18 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Network Development, David S. Miller, Richard Cochran,
	Eric Dumazet
In-Reply-To: <CALCETrW55OFy5RGKdEMyzMCTyGG-da6qtOUF4Bq1FZUAzpVS0g@mail.gmail.com>

On Fri, Jan 9, 2015 at 3:55 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Fri, Jan 9, 2015 at 12:33 PM, Willem de Bruijn <willemb@google.com> wrote:
>> On Fri, Jan 9, 2015 at 3:02 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>>> On Fri, Jan 9, 2015 at 11:47 AM, Willem de Bruijn <willemb@google.com> wrote:
>>>> On Fri, Jan 9, 2015 at 2:43 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>>>>> On Fri, Jan 9, 2015 at 9:31 AM, Willem de Bruijn <willemb@google.com> wrote:
>>>>>> From: Willem de Bruijn <willemb@google.com>
>>>>>>
>>>>>> Add timestamping option SOF_TIMESTAMPING_OPT_TSONLY. For transmit
>>>>>> timestamps, this loops timestamps on top of empty packets.
>>>>>>
>>>>>> Doing so reduces the pressure on SO_RCVBUF. Payload inspection and
>>>>>> cmsg reception (aside from timestamps) are no longer possible. This
>>>>>> works together with a follow on patch that allows administrators to
>>>>>> only allow tx timestamping if it does not loop payload or metadata.
>>>>>
>>>>> If this loses IP_PKTINFO, that will be a bit unfortunate.
>>>>>
>>>>
>>>> If it doesn't, then we might as well loop the entire payload. For applications
>>>> that need pktinfo or other cmsg, do not select the option.
>>>
>>> Right, but it loses the ability to get the ifindex if the sysctl is
>>> set to the conservative option, which I don't think is desirable.
>>
>> Understood. I just find the alternative, where the no-data policy is
>> weakened by exceptions, even less desirable. That makes it
>> harder to explain what the sysctl/option do and what the security
>> implications are.
>
> Agreed.
>
> If there was no-payload but not no-cmsg, then it would be a nice
> middle ground, but I guess that's bad for some reason involving
> accounting?

Enabling all cmsg opens up quite a few holes, including the tos
options that we previously discussed. Also, these are implemented
by reading the relevant bits from the payload at recvmsg time, so
at least we would have to queue the full payload (though not
necessarily return it).

> --Andy
>
> --
> Andy Lutomirski
> AMA Capital Management, LLC

^ permalink raw reply

* Re: route/max_size sysctl in ipv4
From: Ani Sinha @ 2015-01-09 21:23 UTC (permalink / raw)
  To: Cong Wang; +Cc: Eric Dumazet, David Miller, netdev@vger.kernel.org
In-Reply-To: <CAHA+R7Pi=87b_umrKnd4bsZ+DPkRoTWhNyPwAND5pDzF_3EDaQ@mail.gmail.com>

On Fri, Jan 9, 2015 at 11:37 AM, Cong Wang <cwang@twopensource.com> wrote:
> On Fri, Jan 9, 2015 at 11:08 AM, Ani Sinha <ani@arista.com> wrote:
>>
>> Perhaps. What I am truly confused about is :
>>
>> - We are keeping a sysctl interface that does absolutely nothing in
>> the kernel and is completely useless in case some userland
>> scripts/tools are rendered broken from it's removal.
>
> I am all for getting rid of it, sane script should always check
> the sysctl existence first. I think we did remove some sysctl's
> from kernel before.

I'd be much happier if things break uniformly everywhere - within and
outside namespaces. Besides, keeping a useless sysctl around that does
nothing is confusing and error prone, to say the least.

If we do decide to keep the sysctl around, I beg that we at least
update the documentation to reflect the true fact. I have already sent
a patch for this.

^ permalink raw reply

* Re: [PATCH net-next RFC 1/5] net-timestamp: no-payload option
From: Andy Lutomirski @ 2015-01-09 22:00 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Network Development, David S. Miller, Richard Cochran,
	Eric Dumazet
In-Reply-To: <CA+FuTSe90dh+8QnABH=GGVrExqjur_OW8WaWS3b6J2BogbhCQQ@mail.gmail.com>

OK, makes sense.

On Fri, Jan 9, 2015 at 1:18 PM, Willem de Bruijn <willemb@google.com> wrote:
> On Fri, Jan 9, 2015 at 3:55 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>> On Fri, Jan 9, 2015 at 12:33 PM, Willem de Bruijn <willemb@google.com> wrote:
>>> On Fri, Jan 9, 2015 at 3:02 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>>>> On Fri, Jan 9, 2015 at 11:47 AM, Willem de Bruijn <willemb@google.com> wrote:
>>>>> On Fri, Jan 9, 2015 at 2:43 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>>>>>> On Fri, Jan 9, 2015 at 9:31 AM, Willem de Bruijn <willemb@google.com> wrote:
>>>>>>> From: Willem de Bruijn <willemb@google.com>
>>>>>>>
>>>>>>> Add timestamping option SOF_TIMESTAMPING_OPT_TSONLY. For transmit
>>>>>>> timestamps, this loops timestamps on top of empty packets.
>>>>>>>
>>>>>>> Doing so reduces the pressure on SO_RCVBUF. Payload inspection and
>>>>>>> cmsg reception (aside from timestamps) are no longer possible. This
>>>>>>> works together with a follow on patch that allows administrators to
>>>>>>> only allow tx timestamping if it does not loop payload or metadata.
>>>>>>
>>>>>> If this loses IP_PKTINFO, that will be a bit unfortunate.
>>>>>>
>>>>>
>>>>> If it doesn't, then we might as well loop the entire payload. For applications
>>>>> that need pktinfo or other cmsg, do not select the option.
>>>>
>>>> Right, but it loses the ability to get the ifindex if the sysctl is
>>>> set to the conservative option, which I don't think is desirable.
>>>
>>> Understood. I just find the alternative, where the no-data policy is
>>> weakened by exceptions, even less desirable. That makes it
>>> harder to explain what the sysctl/option do and what the security
>>> implications are.
>>
>> Agreed.
>>
>> If there was no-payload but not no-cmsg, then it would be a nice
>> middle ground, but I guess that's bad for some reason involving
>> accounting?
>
> Enabling all cmsg opens up quite a few holes, including the tos
> options that we previously discussed. Also, these are implemented
> by reading the relevant bits from the payload at recvmsg time, so
> at least we would have to queue the full payload (though not
> necessarily return it).
>
>> --Andy
>>
>> --
>> Andy Lutomirski
>> AMA Capital Management, LLC



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply

* Re: [PATCH 2/6] vxlan: Group Policy extension
From: Thomas Graf @ 2015-01-09 22:10 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: David S. Miller, Jesse Gross, Stephen Hemminger, Pravin Shelar,
	Tom Herbert, netdev@vger.kernel.org, dev@openvswitch.org
In-Reply-To: <CAADnVQKUNc4pu7W6Riies3tVeh848ihHidb3iM2UnbEmea-Y-Q@mail.gmail.com>

On 01/09/15 at 09:37am, Alexei Starovoitov wrote:
> On Thu, Jan 8, 2015 at 2:47 PM, Thomas Graf <tgraf@suug.ch> wrote:
> > +
> > +struct vxlan_gbp {
> > +} __packed;
> 
> empty struct ? seems unused.
> looks good to me otherwise.

Poor leftover, must feel all lonely there. Thanks for the reviews.
Will wait a little bit longer for more feedback and send out v3.

^ permalink raw reply

* Re: [PATCH net v3] gen_stats.c: Duplicate xstats buffer for later use
From: Cong Wang @ 2015-01-09 22:35 UTC (permalink / raw)
  To: Ignacy Gawędzki, netdev
In-Reply-To: <20150108133037.GA10389@zenon.in.qult.net>

On Thu, Jan 8, 2015 at 5:30 AM, Ignacy Gawędzki
<ignacy.gawedzki@green-communications.fr> wrote:
> The gnet_stats_copy_app() function gets called, more often than not, with its
> second argument a pointer to an automatic variable in the caller's stack.
> Therefore, to avoid copying garbage afterwards when calling
> gnet_stats_finish_copy(), this data is better copied to a dynamically allocated
> memory that gets freed after use.

Good catch!

>
> Signed-off-by: Ignacy Gawędzki <ignacy.gawedzki@green-communications.fr>
> ---
>  net/core/gen_stats.c | 15 ++++++++++++++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
> index 0c08062..c9f1fa8 100644
> --- a/net/core/gen_stats.c
> +++ b/net/core/gen_stats.c
> @@ -32,6 +32,9 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
>         return 0;
>
>  nla_put_failure:
> +       kfree(d->xstats);
> +       d->xstats = NULL;
> +       d->xstats_len = 0;
>         spin_unlock_bh(d->lock);
>         return -1;
>  }

Although it is not your fault, the API sucks, gnet_stats_copy()
should not need to care about the error path of its caller.
I will cook a patch for it.

> @@ -305,7 +308,10 @@ int
>  gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
>  {
>         if (d->compat_xstats) {
> -               d->xstats = st;
> +               kfree(d->xstats);
> +               d->xstats = kmemdup(st, len, GFP_ATOMIC);
> +               if (!d->xstats)
> +                       goto kmalloc_failure;

Do we really need to call kfree() before kmemdup()?
I don't think gnet_stats_copy_app() is supposed to be called
twice before gnet_stats_copy_finish()?

Thanks.

^ permalink raw reply

* Re: [patch -next] net: eth: xgene: devm_ioremap() returns NULL on error
From: Iyappan Subramanian @ 2015-01-09 22:56 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: Feng Kan, Keyur Chudgar, Grant Likely, Rob Herring, netdev,
	kernel-janitors
In-Reply-To: <20150108105211.GB10597@mwanda>

On Thu, Jan 8, 2015 at 2:52 AM, Dan Carpenter <dan.carpenter@oracle.com> wrote:
> devm_ioremap() returns NULL on failure, it doesn't return an ERR_PTR.
>
> Fixes: de7b5b3d790a ('net: eth: xgene: change APM X-Gene SoC platform ethernet to support ACPI')
> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
>
> diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
> index 1e56bf3..02add38 100644
> --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
> +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
> @@ -804,9 +804,9 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
>                 return -ENODEV;
>         }
>         pdata->base_addr = devm_ioremap(dev, res->start, resource_size(res));
> -       if (IS_ERR(pdata->base_addr)) {
> +       if (!pdata->base_addr) {
>                 dev_err(dev, "Unable to retrieve ENET Port CSR region\n");
> -               return PTR_ERR(pdata->base_addr);
> +               return -ENOMEM;
>         }
>
>         res = platform_get_resource(pdev, IORESOURCE_MEM, RES_RING_CSR);
> @@ -816,9 +816,9 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
>         }
>         pdata->ring_csr_addr = devm_ioremap(dev, res->start,
>                                                         resource_size(res));
> -       if (IS_ERR(pdata->ring_csr_addr)) {
> +       if (!pdata->ring_csr_addr) {
>                 dev_err(dev, "Unable to retrieve ENET Ring CSR region\n");
> -               return PTR_ERR(pdata->ring_csr_addr);
> +               return -ENOMEM;
>         }
>
>         res = platform_get_resource(pdev, IORESOURCE_MEM, RES_RING_CMD);
> @@ -828,9 +828,9 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
>         }
>         pdata->ring_cmd_addr = devm_ioremap(dev, res->start,
>                                                         resource_size(res));
> -       if (IS_ERR(pdata->ring_cmd_addr)) {
> +       if (!pdata->ring_cmd_addr) {
>                 dev_err(dev, "Unable to retrieve ENET Ring command region\n");
> -               return PTR_ERR(pdata->ring_cmd_addr);
> +               return -ENOMEM;
>         }
>
>         ret = platform_get_irq(pdev, 0);

Acked-by: Iyappan Subramanian <isubramanian@apm.com>

^ permalink raw reply

* Re: [PATCH RFC net-next] ip_tunnel: create percpu gro_cell
From: Eric Dumazet @ 2015-01-09 22:57 UTC (permalink / raw)
  To: Martin KaFai Lau; +Cc: netdev, kernel-team
In-Reply-To: <1420828933-844290-1-git-send-email-kafai@fb.com>

On Fri, 2015-01-09 at 10:42 -0800, Martin KaFai Lau wrote:
> In the ipip tunnel, the skb->queue_mapping is lost in ipip_rcv().
> All skb will be queued to the same cell->napi_skbs.  The
> gro_cell_poll is pinned to one core under load. In production traffic,
> we also see severe rx_dropped in the tunl iface and it is probably due to
> this limit: skb_queue_len(&cell->napi_skbs) > netdev_max_backlog
> 
> This patch is trying to alloc_percpu(struct gro_cell) and schedule
> gro_cell_poll to process it in the same core.


I wrote a similar patch here at Google, but I removed the
____cacheline_aligned_in_smp attribute on struct gro_cell

It was needed because of kcalloc(), but with alloc_percpu(), no gain
adding a padding.

^ permalink raw reply

* Re: [PATCH net v3] gen_stats.c: Duplicate xstats buffer for later use
From: Ignacy Gawędzki @ 2015-01-09 23:01 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev
In-Reply-To: <CAHA+R7OpbP_+iwh6X=sh4SsXJ4VreBjze7dPrV6Xh8RHEe+cGw@mail.gmail.com>

On Fri, Jan 09, 2015 at 02:35:41PM -0800, thus spake Cong Wang:
> On Thu, Jan 8, 2015 at 5:30 AM, Ignacy Gawędzki
> > diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
> > index 0c08062..c9f1fa8 100644
> > --- a/net/core/gen_stats.c
> > +++ b/net/core/gen_stats.c
> > @@ -32,6 +32,9 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
> >         return 0;
> >
> >  nla_put_failure:
> > +       kfree(d->xstats);
> > +       d->xstats = NULL;
> > +       d->xstats_len = 0;
> >         spin_unlock_bh(d->lock);
> >         return -1;
> >  }
> 
> Although it is not your fault, the API sucks, gnet_stats_copy()
> should not need to care about the error path of its caller.
> I will cook a patch for it.

That would be great, as I'm certainly not that fluent in kernel code.

> > @@ -305,7 +308,10 @@ int
> >  gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
> >  {
> >         if (d->compat_xstats) {
> > -               d->xstats = st;
> > +               kfree(d->xstats);
> > +               d->xstats = kmemdup(st, len, GFP_ATOMIC);
> > +               if (!d->xstats)
> > +                       goto kmalloc_failure;
> 
> Do we really need to call kfree() before kmemdup()?
> I don't think gnet_stats_copy_app() is supposed to be called
> twice before gnet_stats_copy_finish()?

It's not, but I figured it's safer that way and probably not that expensive
given the frequency with which calls to these functions are performed in
practice.

-- 
Ignacy Gawędzki
R&D Engineer
Green Communications

^ permalink raw reply

* [PATCH] usb/kaweth: use GFP_ATOMIC under spin_lock in usb_start_wait_urb()
From: Alexey Khoroshilov @ 2015-01-09 23:16 UTC (permalink / raw)
  To: David S. Miller
  Cc: Alexey Khoroshilov, linux-usb, netdev, linux-kernel, ldv-project

Commit e4c7f259c5be ("USB: kaweth.c: use GFP_ATOMIC under spin_lock")
makes sure that kaweth_internal_control_msg() allocates memory with GFP_ATOMIC,
but kaweth_internal_control_msg() also calls usb_start_wait_urb()
that still allocates memory with GFP_NOIO.

The patch fixes usb_start_wait_urb() as well.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
---
 drivers/net/usb/kaweth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index dcb6d33141e0..1e9cdca37014 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -1276,7 +1276,7 @@ static int usb_start_wait_urb(struct urb *urb, int timeout, int* actual_length)
         awd.done = 0;

         urb->context = &awd;
-        status = usb_submit_urb(urb, GFP_NOIO);
+        status = usb_submit_urb(urb, GFP_ATOMIC);
         if (status) {
                 // something went wrong
                 usb_free_urb(urb);
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH RFC net-next] ip_tunnel: create percpu gro_cell
From: Martin Lau @ 2015-01-09 23:17 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, kernel-team
In-Reply-To: <1420844264.5947.81.camel@edumazet-glaptop2.roam.corp.google.com>

On Fri, Jan 09, 2015 at 02:57:44PM -0800, Eric Dumazet wrote:
> On Fri, 2015-01-09 at 10:42 -0800, Martin KaFai Lau wrote:
> > In the ipip tunnel, the skb->queue_mapping is lost in ipip_rcv().
> > All skb will be queued to the same cell->napi_skbs.  The
> > gro_cell_poll is pinned to one core under load. In production traffic,
> > we also see severe rx_dropped in the tunl iface and it is probably due to
> > this limit: skb_queue_len(&cell->napi_skbs) > netdev_max_backlog
> > 
> > This patch is trying to alloc_percpu(struct gro_cell) and schedule
> > gro_cell_poll to process it in the same core.
> 
> 
> I wrote a similar patch here at Google,  but I removed the
> ____cacheline_aligned_in_smp attribute on struct gro_cell
> 
> It was needed because of kcalloc(), but with alloc_percpu(), no gain
> adding a padding.
Is the spin_lock() still needed?

Thanks,
--Martin

^ permalink raw reply

* [PATCH net-next] Driver: Vmxnet3: Reinitialize vmxnet3 backend on wakeup from hibernate
From: Shrikrishna Khare @ 2015-01-09 23:19 UTC (permalink / raw)
  To: sbhatewara, pv-drivers, netdev, linux-kernel
  Cc: Shrikrishna Khare, Srividya Murali

Failing to reinitialize on wakeup results in loss of network connectivity for
vmxnet3 interface.

Signed-off-by: Srividya Murali <smurali@vmware.com>
Signed-off-by: Shrikrishna Khare <skhare@vmware.com>
Reviewed-by: Shreyas N Bhatewara <sbhatewara@vmware.com>
---
 drivers/net/vmxnet3/vmxnet3_drv.c |   44 +++++++++++++++++++++----------------
 drivers/net/vmxnet3/vmxnet3_int.h |    4 +-
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 7af1f5c..3143981 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -3290,27 +3290,15 @@ skip_arp:
 static int
 vmxnet3_resume(struct device *device)
 {
-	int err, i = 0;
+	int err;
 	unsigned long flags;
 	struct pci_dev *pdev = to_pci_dev(device);
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-	struct Vmxnet3_PMConf *pmConf;
 
 	if (!netif_running(netdev))
 		return 0;
 
-	/* Destroy wake-up filters. */
-	pmConf = adapter->pm_conf;
-	memset(pmConf, 0, sizeof(*pmConf));
-
-	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
-	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
-								  *pmConf));
-	adapter->shared->devRead.pmConfDesc.confPA =
-		cpu_to_le64(adapter->pm_conf_pa);
-
-	netif_device_attach(netdev);
 	pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
 	err = pci_enable_device_mem(pdev);
@@ -3319,15 +3307,31 @@ vmxnet3_resume(struct device *device)
 
 	pci_enable_wake(pdev, PCI_D0, 0);
 
+	vmxnet3_alloc_intr_resources(adapter);
+
+	/* During hibernate and suspend, device has to be reinitialized as the
+	 * device state need not be preserved.
+	 */
+
+	/* Need not check adapter state as other reset tasks cannot run during
+	 * device resume.
+	 */
 	spin_lock_irqsave(&adapter->cmd_lock, flags);
 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
-			       VMXNET3_CMD_UPDATE_PMCFG);
+			       VMXNET3_CMD_QUIESCE_DEV);
 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
-	vmxnet3_alloc_intr_resources(adapter);
-	vmxnet3_request_irqs(adapter);
-	for (i = 0; i < adapter->num_rx_queues; i++)
-		napi_enable(&adapter->rx_queue[i].napi);
-	vmxnet3_enable_all_intrs(adapter);
+	vmxnet3_tq_cleanup_all(adapter);
+	vmxnet3_rq_cleanup_all(adapter);
+
+	vmxnet3_reset_dev(adapter);
+	err = vmxnet3_activate_dev(adapter);
+	if (err != 0) {
+		netdev_err(netdev,
+			   "failed to re-activate on resume, error: %d", err);
+		vmxnet3_force_close(adapter);
+		return err;
+	}
+	netif_device_attach(netdev);
 
 	return 0;
 }
@@ -3335,6 +3339,8 @@ vmxnet3_resume(struct device *device)
 static const struct dev_pm_ops vmxnet3_pm_ops = {
 	.suspend = vmxnet3_suspend,
 	.resume = vmxnet3_resume,
+	.freeze = vmxnet3_suspend,
+	.restore = vmxnet3_resume,
 };
 #endif
 
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 048f020..6297d9f 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,10 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.3.1.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.3.2.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01030100
+#define VMXNET3_DRIVER_VERSION_NUM      0x01030200
 
 #if defined(CONFIG_PCI_MSI)
 	/* RSS only makes sense if MSI-X is supported. */
-- 
1.7.4.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox