Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH net-next 1/2] ethtool: Support for configurable RSS hash function
From: Ben Hutchings @ 2014-11-05 21:51 UTC (permalink / raw)
  To: Amir Vadai
  Cc: David S. Miller, netdev, Or Gerlitz, Eyal Perry, Yevgeny Petrilin
In-Reply-To: <1415188769-19593-2-git-send-email-amirv@mellanox.com>

[-- Attachment #1: Type: text/plain, Size: 7383 bytes --]

On Wed, 2014-11-05 at 13:59 +0200, Amir Vadai wrote:
> From: Eyal Perry <eyalpe@mellanox.com>
> 
> This patch adds an RSS hash functions string-set, and two
> ethtool-options for set/get current RSS hash function. User-kernel API is done
> through the new hfunc mask field in the ethtool_rxfh struct. A bit set
> in the hfunc is corresponding to an index in the string-set.
> 
> Signed-off-by: Eyal Perry <eyalpe@mellanox.com>
> Signed-off-by: Amir Vadai <amirv@mellanox.com>
> ---
>  include/linux/ethtool.h      | 28 ++++++++++++++++++++++++
>  include/uapi/linux/ethtool.h |  6 ++++-
>  net/core/ethtool.c           | 52 ++++++++++++++++++++++++++++++--------------
>  3 files changed, 69 insertions(+), 17 deletions(-)
> 
> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
> index c1a2d60..61003b1 100644
> --- a/include/linux/ethtool.h
> +++ b/include/linux/ethtool.h
> @@ -59,6 +59,29 @@ enum ethtool_phys_id_state {
>  	ETHTOOL_ID_OFF
>  };
>  
> +enum {
> +	RSS_HASH_TOP_BIT, /* Configurable RSS hash function - Toeplitz */
> +	RSS_HASH_XOR_BIT, /* Configurable RSS hash function - Xor */
> +
> +	/*
> +	 * Add your fresh new hash function bits above and remember to update
> +	 * rss_hash_func_strings[] below
> +	 */
> +	RSS_HASH_FUNCS_COUNT
> +};
> +
> +#define __RSS_HASH_BIT(bit)	((u32)1 << (bit))
> +#define __RSS_HASH(name)	 __RSS_HASH_BIT(RSS_HASH_##name##_BIT)
> +
> +#define RSS_HASH_TOP		__RSS_HASH(TOP)
> +#define RSS_HASH_XOR		__RSS_HASH(XOR)

I think #define RSS_HASH_UNKNOWN 0 might also be useful.

And I think all of these names should get an ETH_ prefix.

> +static const char
> +rss_hash_func_strings[RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
> +	[RSS_HASH_TOP_BIT] =     "toeplitz",
> +	[RSS_HASH_XOR_BIT] =     "xor",
> +};

This belongs in net/core/ethtool.c.

>  struct net_device;
>  
>  /* Some generic methods drivers may use in their ethtool_ops */
> @@ -158,6 +181,9 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
>   *	Returns zero if not supported for this specific device.
>   * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table.
>   *	Returns zero if not supported for this specific device.
> + * @get_rxfh_func: Get the hardware RX flow hash function.
> + * @set_rxfh_func: Set the hardware RX flow hash function. Returns a negative
> + *	error code or zero.
>   * @get_rxfh: Get the contents of the RX flow hash indirection table and hash
>   *	key.
>   *	Will only be called if one or both of @get_rxfh_indir_size and
> @@ -241,6 +267,8 @@ struct ethtool_ops {
>  	int	(*reset)(struct net_device *, u32 *);
>  	u32	(*get_rxfh_key_size)(struct net_device *);
>  	u32	(*get_rxfh_indir_size)(struct net_device *);
> +	u32	(*get_rxfh_func)(struct net_device *);
> +	int	(*set_rxfh_func)(struct net_device *, u32);

Why not another parameter to get_rxfh/set_rxfh?  I know it's a pain to
update all the implementations, but changing algorithm potentially
changes the supported indirection table and key lengths.  They have to
be validated together.

>  	int	(*get_rxfh)(struct net_device *, u32 *indir, u8 *key);
>  	int	(*set_rxfh)(struct net_device *, const u32 *indir,
>  			    const u8 *key);
> diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
> index eb2095b..eb91da4 100644
> --- a/include/uapi/linux/ethtool.h
> +++ b/include/uapi/linux/ethtool.h
> @@ -534,6 +534,7 @@ struct ethtool_pauseparam {
>   * @ETH_SS_NTUPLE_FILTERS: Previously used with %ETHTOOL_GRXNTUPLE;
>   *	now deprecated
>   * @ETH_SS_FEATURES: Device feature names
> + * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names
>   */
>  enum ethtool_stringset {
>  	ETH_SS_TEST		= 0,
> @@ -541,6 +542,7 @@ enum ethtool_stringset {
>  	ETH_SS_PRIV_FLAGS,
>  	ETH_SS_NTUPLE_FILTERS,
>  	ETH_SS_FEATURES,
> +	ETH_SS_RSS_HASH_FUNCS,
>  };
>  
>  /**
> @@ -900,7 +902,9 @@ struct ethtool_rxfh {
>  	__u32	rss_context;
>  	__u32   indir_size;
>  	__u32   key_size;
> -	__u32	rsvd[2];
> +	__u8	hfunc;

Missing kernel-doc.  This needs to be very clear about what the valid
values are.

> +	__u8	rsvd8[3];
> +	__u32	rsvd32;
>  	__u32   rss_config[0];
>  };
>  #define ETH_RXFH_INDIR_NO_CHANGE	0xffffffff
> diff --git a/net/core/ethtool.c b/net/core/ethtool.c
> index 06dfb29..4791c17 100644
> --- a/net/core/ethtool.c
> +++ b/net/core/ethtool.c
> @@ -185,6 +185,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
>  	if (sset == ETH_SS_FEATURES)
>  		return ARRAY_SIZE(netdev_features_strings);
>  
> +	if (sset == ETH_SS_RSS_HASH_FUNCS)
> +		return ARRAY_SIZE(rss_hash_func_strings);
> +
>  	if (ops->get_sset_count && ops->get_strings)
>  		return ops->get_sset_count(dev, sset);
>  	else
[...]
> @@ -760,32 +769,43 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
>  	const struct ethtool_ops *ops = dev->ethtool_ops;
>  	struct ethtool_rxnfc rx_rings;
>  	struct ethtool_rxfh rxfh;
> -	u32 dev_indir_size = 0, dev_key_size = 0, i;
> +	u32 dev_indir_size = 0, dev_key_size = 0, dev_hfunc = 0, i;
>  	u32 *indir = NULL, indir_bytes = 0;
>  	u8 *hkey = NULL;
>  	u8 *rss_config;
>  	u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
>  
> -	if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) ||
> -	    !ops->get_rxnfc || !ops->set_rxfh)
> +	if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size ||
> +	      ops->get_rxfh_func) || !ops->get_rxnfc || !ops->set_rxfh)
>  		return -EOPNOTSUPP;
>  
> +	if (ops->get_rxfh_func)
> +		dev_hfunc = ops->get_rxfh_func(dev);
>  	if (ops->get_rxfh_indir_size)
>  		dev_indir_size = ops->get_rxfh_indir_size(dev);
>  	if (ops->get_rxfh_key_size)
>  		dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev);
> -	if ((dev_key_size + dev_indir_size) == 0)
> +	if ((dev_key_size + dev_indir_size + dev_hfunc) == 0)
>  		return -EOPNOTSUPP;
>  
>  	if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
>  		return -EFAULT;
>  
>  	/* Check that reserved fields are 0 for now */
> -	if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
> +	if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
> +	    rxfh.rsvd8[2] || rxfh.rsvd32)
>  		return -EINVAL;
>  
> +	if (rxfh.hfunc != dev_hfunc) {
> +		if (!ops->set_rxfh_func)
> +			return -EOPNOTSUPP;
> +		ret = ops->set_rxfh_func(dev, rxfh.hfunc);
> +		if (ret)
> +			return ret;
> +	}
> +
>  	/* If either indir or hash key is valid, proceed further.
> -	 * It is not valid to request that both be unchanged.
> +	 * Must request at least one change: indir size, hash key or function.
>  	 */
>  	if ((rxfh.indir_size &&
>  	     rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
> @@ -793,7 +813,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
>  	    (rxfh.key_size && (rxfh.key_size != dev_key_size)) ||
>  	    (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
>  	     rxfh.key_size == 0))
> -		return -EINVAL;
> +		return rxfh.hfunc ? 0 : -EINVAL;

Shouldn't the condition be rxfh.hfunc != dev_hfunc ?

Ben.

>  	if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
>  		indir_bytes = dev_indir_size * sizeof(indir[0]);

-- 
Ben Hutchings
The program is absolutely right; therefore, the computer must be wrong.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 811 bytes --]

^ permalink raw reply

* Re: [PATCH v2 net-next 0/7] gue: Remote checksum offload
From: David Miller @ 2014-11-05 21:44 UTC (permalink / raw)
  To: therbert; +Cc: netdev
In-Reply-To: <1415120817-2105-1-git-send-email-therbert@google.com>

From: Tom Herbert <therbert@google.com>
Date: Tue,  4 Nov 2014 09:06:50 -0800

> This patch set implements remote checksum offload for
> GUE, which is a mechanism that provides checksum offload of
> encapsulated packets using rudimentary offload capabilities found in
> most Network Interface Card (NIC) devices.

Series applied, thanks for doing this work Tom.

^ permalink raw reply

* Re: [patch net-next] sched: fix act file names in header comment
From: Cong Wang @ 2014-11-05 21:38 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, David Miller, Jamal Hadi Salim
In-Reply-To: <1415217111-9803-1-git-send-email-jiri@resnulli.us>

On Wed, Nov 5, 2014 at 11:51 AM, Jiri Pirko <jiri@resnulli.us> wrote:
> diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
> index d6bcbd9..7fffc22 100644
> --- a/net/sched/act_gact.c
> +++ b/net/sched/act_gact.c
> @@ -1,5 +1,5 @@
>  /*
> - * net/sched/gact.c    Generic actions
> + * net/sched/act_gact.c                Generic actions

Or maybe just remove the file name from the comment?
I have no strong opinions here.

^ permalink raw reply

* Re: [PATCH net 0/5] Implement ndo_gso_check() for vxlan nics
From: David Miller @ 2014-11-05 21:38 UTC (permalink / raw)
  To: gerlitz.or
  Cc: therbert, joestringer, netdev, sathya.perla, jeffrey.t.kirsher,
	linux.nics, amirv, shahed.shaikh, Dept-GELinuxNICDev,
	linux-kernel
In-Reply-To: <CAJ3xEMhxabp1EKbFPfvoZEFt=CQKFNcNrURGMX=7miDwmK9Y-A@mail.gmail.com>

From: Or Gerlitz <gerlitz.or@gmail.com>
Date: Wed, 5 Nov 2014 23:32:44 +0200

> but fact is that the proposed patch series has the --same-- helper for
> four drivers, so why not start with a that limited helper which would
> be picked up by these drivers and we'll take it from there.

I'm in favor of the helper, duplication is error prone.

And in fact, any differences a driver ends up needing might be
integratable into the helper.

^ permalink raw reply

* Re: [PATCH net 0/5] Implement ndo_gso_check() for vxlan nics
From: Or Gerlitz @ 2014-11-05 21:32 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Joe Stringer, Linux Netdev List, sathya.perla, Jeff Kirsher,
	linux.nics, Amir Vadai, shahed.shaikh, dept-gelinuxnicdev,
	Linux Kernel
In-Reply-To: <CA+mtBx_BOmV+tOtio48M+xRatK79B31LoGnf4uvFJAbxYqOt_w@mail.gmail.com>

On Wed, Nov 5, 2014 at 8:00 PM, Tom Herbert <therbert@google.com> wrote:
> On Wed, Nov 5, 2014 at 9:50 AM, Joe Stringer <joestringer@nicira.com> wrote:
>>
>> On 5 November 2014 04:38, Or Gerlitz <gerlitz.or@gmail.com> wrote:
>>>
>>> On Tue, Nov 4, 2014 at 11:56 PM, Joe Stringer <joestringer@nicira.com> wrote:
>>> > Most NICs that report NETIF_F_GSO_UDP_TUNNEL support VXLAN, and not other
>>> > UDP-based encapsulation protocols where the format and size of the header may
>>> > differ. This patch series implements ndo_gso_check() for these NICs,
>>> > restricting the GSO handling to something that looks and smells like VXLAN.
>>> >
>>> > Implementation shamelessly stolen from Tom Herbert (with minor fixups):
>>> > http://thread.gmane.org/gmane.linux.network/332428/focus=333111
>>>
>>>
>>> Hi Joe,
>>>
>>> 1st, thanks for picking this task...2nd, for drivers that currently
>>> support only pure VXLAN, I don't see the point
>>> to replicate the helper suggested by Tom (good catch on the size check
>>> to be 16 and not 12) four times and who know how more in the future.
>>> Let's just have one generic helper and make the mlx4/be/fm10k/benet
>>> drivers to have it as their ndo, OK?
>>
>>
>> Thanks for taking a look.
>>
>> I had debated whether to do this or not as the actual support on each NIC may differ, and each implementation may morph over time to match these capabilities better. Obviously the vendors will know better than me on this, so I'm posing this series to prod them for more information. At this point I've had just one maintainer come back and confirm that this helper is a good fit for their hardware, so I'd like to confirm that multiple drivers will use a ndo_gso_check_vxlan_helper() function before I go and create it.
>
>
> Thanks for implementing this fix!
>
> Personally, I would rather not have the helper. This is already a
> small number of drivers, and each driver owner should consider what
> limitations are of their device and try to enable to allow the maximum
> number of use cases possible. I'm also hoping that new devices will
> implement the more generic mechanism so that VXLAN is just one
> supported protocol.

but fact is that the proposed patch series has the --same-- helper for
four drivers, so why not start with a that limited helper which would
be picked up by these drivers and we'll take it from there.

^ permalink raw reply

* Re: [PATCH net v4] ipv6: mld: fix add_grhead skb_over_panic for devs with large MTUs
From: Daniel Borkmann @ 2014-11-05 21:31 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: davem, lw1a2.jing, netdev, Eric Dumazet, Hannes Frederic Sowa,
	David L Stevens
In-Reply-To: <1415220449.13896.15.camel@edumazet-glaptop2.roam.corp.google.com>

On 11/05/2014 09:47 PM, Eric Dumazet wrote:
...
> Thanks for your patience ;)

No problem, thanks for the feedback, Eric!

^ permalink raw reply

* Re: [PATCH net v2] vxlan: Do not reuse sockets for a different address family
From: Marcelo Ricardo Leitner @ 2014-11-05 21:29 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, stephen, sergei.shtylyov
In-Reply-To: <20141105.155913.297158120451064287.davem@davemloft.net>

On 05-11-2014 18:59, David Miller wrote:
> From: Marcelo Ricardo Leitner <mleitner@redhat.com>
> Date: Tue,  4 Nov 2014 15:03:20 -0200
>
>> @@ -281,7 +281,8 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
>>   	struct vxlan_sock *vs;
>>
>>   	hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
>> -		if (inet_sk(vs->sock->sk)->inet_sport == port)
>> +		if (inet_sk(vs->sock->sk)->inet_sport == port &&
>> +		    inet_sk(vs->sock->sk)->sk.sk_family == family)
>
> You didn't even compile this.

This patch was part of a bigger change and I really thought I had tested it by 
itself, but clearly I was wrong, sorry.

(I changed the port hash to include the family in the key, and that's how that 
variable got there.)

> You're in the penalty box, and I'm ignoring your patches for at least
> a week, this is really unacceptable for an upstream patch submission.
>
> Sorry.

Ack, sorry again. It was twice in a bad day, but my bad. Won't happen again.

Marcelo

^ permalink raw reply

* Re: [PATCH (net.git) 0/5] stmmac: review and fix lock and atomicity
From: David Miller @ 2014-11-05 21:24 UTC (permalink / raw)
  To: peppe.cavallaro; +Cc: netdev
In-Reply-To: <1415117289-7733-1-git-send-email-peppe.cavallaro@st.com>

From: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Date: Tue, 4 Nov 2014 17:08:04 +0100

> Recently some issues have been reported for the driver for locking mechanism
> and atomicity.
> 
> In fact, enabling DEBUG support to prove lock and to verify if sleeping while
> atomic context some warnings occur at runtime. I have reproduced all on STi
> platforms.
> 
> Concerning the tx path, I had provided a patch time ago but
> I discarded the idea to completely remove locks; in this patch-set we can have
> some useful fixes instead of.
> 
> This patch-set is to fix the atomicity in the PM stuff where I tried to collect
> all the points and advice reported in the past weeks.
> As final result, on my side no warnings and no problem when suspend/resume the
> driver on STi boxes.
> 
> I also added a patch that fixes the locks for the EEE.
> As pointed in some thread there was a design problem behind the eee
> initialization and I have tried to fix that before.
> As final result no issues when proving locks too.

Series applied, thanks.

^ permalink raw reply

* Re: [net-next.git 0/3 (v2)] stmmac: review driver Koptions
From: David Miller @ 2014-11-05 21:15 UTC (permalink / raw)
  To: peppe.cavallaro; +Cc: netdev
In-Reply-To: <1415112574-21321-1-git-send-email-peppe.cavallaro@st.com>

From: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Date: Tue, 4 Nov 2014 15:49:31 +0100

> Recently many Koption options have been added to have new glue logic on several
> platforms.
> 
> The main goal behind this work is to guarantee that the driver built 
> fine on all the branches where it is present independently of which
> glue logic is selected.
> 
> IMHO, it is better to remove all the not necessary Koption(s) that can hide 
> build problems when something changes in the driver and especially when 
> the DT compatibility allows us to manage all the platform data.
> 
> I compiled the driver w/o any issue on net-next Git for:
> 
>   x86, arm and sh4.
> 
> In case of there are build problems on some repos now it will be
> easy to catch them and cherry-pick patches from mainstream.
> 
> For sure, do not hesitate to contact me in case of issue.
> 
> Also this set removes STMMAC_DEBUG_FS and BUS_MODE_DA. The latter is useless
> and the former can be replaced by DEBUG_FS (always to make safe the build).
> 
> V2: patch-set re-based on top of the latest updates for net-next

Series applied, thanks.

^ permalink raw reply

* Re: [PATCH 1/4] inet: Add skb_copy_datagram_iter
From: Al Viro @ 2014-11-05 21:07 UTC (permalink / raw)
  To: David Miller; +Cc: herbert, netdev, linux-kernel, bcrl
In-Reply-To: <20141105.155054.2198151263164321219.davem@davemloft.net>

On Wed, Nov 05, 2014 at 03:50:54PM -0500, David Miller wrote:
> I think we can get away with it if, as you say, we don't export a 'msghdr'
> from any uapi headers.

OK.  I'm about halfway through the review of struct msghdr instances in the
current tree right now, will post user_msghdr patch once I'm done.  Already
found a dumb bug in o2net_send_tcp_msg() while doing that - broken by me
back in 3.15 ;-/  Will send a fix to Linus in an hour or so...

> And indeed, double checking, it's purely a linux/socket.h thing.
> 
> If this patch is OK, mind if I toss it into net-next Al?

Sure, no problem - AFAICS, the only real difference from rebase of April one
I've quoted upthread is that you add include of socket.h into skbuff.h; the
rest of the differences is pure whitespace noise.

Ping me when you put it there, OK?  I'll rebase the rest of old stuff on
top of it (similar helpers, mostly).

^ permalink raw reply

* Re: [PATCH] drivers: net: ethernet: xilinx: xilinx_emaclite: revert the original commit "1db3ddff1602edf2390b7667dcbaa0f71512e3ea"
From: David Miller @ 2014-11-05 21:01 UTC (permalink / raw)
  To: michal.simek
  Cc: gang.chen.5i5j, soren.brinkmann, sthokal, manuel.schoelling,
	f.fainelli, paul.gortmaker, ebiederm, netdev, linux-kernel
In-Reply-To: <a39951cc744047679d13b23e76c97347@BN1BFFO11FD022.protection.gbl>

From: Michal Simek <michal.simek@xilinx.com>
Date: Wed, 5 Nov 2014 08:19:41 +0100

> On 11/04/2014 03:43 PM, Chen Gang wrote:
>> Microblaze is a fpga soft core, it can be customized easily, which may
>> cause many various hardware version strings.
>> 
>> So the original fix patch based on hard-coded compatible version strings
>> is not a good idea (although it is correct for current issue). For it,
>> there will be a new solving way soon (which based on the device tree).
>> 
>> The original issue is related with qemu, so can only change the hardware
>> version string in qemu for it, then keep the original driver no touch (
>> qemu is for virtualization which has much easier life than real world).
>> 
>> 
>> Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
>> ---
>>  drivers/net/ethernet/xilinx/xilinx_emaclite.c | 1 -
>>  1 file changed, 1 deletion(-)
> 
> Acked-by: Michal Simek <michal.simek@xilinx.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net v2] vxlan: Do not reuse sockets for a different address family
From: David Miller @ 2014-11-05 20:59 UTC (permalink / raw)
  To: mleitner; +Cc: netdev, stephen, sergei.shtylyov
In-Reply-To: <e7e226e9bef6f03ac249bc8e5ebf0938944ace3c.1415120464.git.mleitner@redhat.com>

From: Marcelo Ricardo Leitner <mleitner@redhat.com>
Date: Tue,  4 Nov 2014 15:03:20 -0200

> @@ -281,7 +281,8 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
>  	struct vxlan_sock *vs;
>  
>  	hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
> -		if (inet_sk(vs->sock->sk)->inet_sport == port)
> +		if (inet_sk(vs->sock->sk)->inet_sport == port &&
> +		    inet_sk(vs->sock->sk)->sk.sk_family == family)

You didn't even compile this.

You're in the penalty box, and I'm ignoring your patches for at least
a week, this is really unacceptable for an upstream patch submission.

Sorry.

^ permalink raw reply

* Re: [PATCH] net: dsa: slave: Fix autoneg for phys on switch MDIO bus
From: Florian Fainelli @ 2014-11-05 20:53 UTC (permalink / raw)
  To: Andrew Lunn, davem; +Cc: netdev
In-Reply-To: <1415213248-29037-1-git-send-email-andrew@lunn.ch>

On 11/05/2014 10:47 AM, Andrew Lunn wrote:
> When the ports phys are connected to the switches internal MDIO bus,
> we need to connect the phy to the slave netdev, otherwise
> auto-negotiation etc, does not work.
> 
> Signed-off-by: Andrew Lunn <andrew@lunn.ch>
> ---
> 
> Hi Florian
> 
> Is this the right fix?
> 
> What i found is that ports on mv88E6171 we coming up as 10/half.  If i
> forced a renegotiation with ethtool -r lan0, the phy would then goto
> 1000/full.
> 
> The code for phys on internal switch MDIO buses never connect the phy
> to the device, only phys listed in DT get connected via
> of_phy_connect(). By connecting the phy, the state machine is active
> and does an autoneg when the slave interface is opened.

This looks fine to me, I think we might want to revisit/nuke the code at
the end of dsa_slave_create since it:

- is racy: netif_carrier_off() is called before register_netdev()
- is double racy: we should be bound to a PHY before calling
register_netdev() otherwise there could be MDIO accesses done to that
PHY without an actual PHY being registered

> 
> net/dsa/slave.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/net/dsa/slave.c b/net/dsa/slave.c
> index 6d1817449c36..ab03e00ffe8f 100644
> --- a/net/dsa/slave.c
> +++ b/net/dsa/slave.c
> @@ -489,11 +489,14 @@ static void dsa_slave_phy_setup(struct dsa_slave_priv *p,
>  	/* We could not connect to a designated PHY, so use the switch internal
>  	 * MDIO bus instead
>  	 */
> -	if (!p->phy)
> +	if (!p->phy) {
>  		p->phy = ds->slave_mii_bus->phy_map[p->port];
> -	else
> +		phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
> +				   p->phy_interface);
> +	} else {
>  		pr_info("attached PHY at address %d [%s]\n",
>  			p->phy->addr, p->phy->drv->name);
> +	}
>  }
>  
>  int dsa_slave_suspend(struct net_device *slave_dev)
> 

^ permalink raw reply

* Re: [PATCH] include/linux/socket.h: Fix comment
From: David Miller @ 2014-11-05 20:52 UTC (permalink / raw)
  To: linux; +Cc: netdev, linux-kernel
In-Reply-To: <1415220267-20044-1-git-send-email-linux@rasmusvillemoes.dk>

From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed,  5 Nov 2014 21:44:27 +0100

> File descriptors are always closed on exit :-)
> 
> Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>

Applied, thanks.

^ permalink raw reply

* Re: net: fec: fix regression on i.MX28 introduced by rx_copybreak support
From: David Miller @ 2014-11-05 20:52 UTC (permalink / raw)
  To: LW
  Cc: fabio.estevam, Frank.Li, netdev, linux-kernel, rmk+kernel,
	linux-arm-kernel
In-Reply-To: <20141105062148.2a997b95@ipc1.ka-ro>

From: Lothar Waßmann <LW@KARO-electronics.de>
Date: Wed, 5 Nov 2014 06:21:48 +0100

> David Miller wrote:
>> Because it goes "(n + (d - 1)) / d"
>> 
>> Which for 'd' of 4 gives:
>> 
>> 1 --> 4
>> 2 --> 4
>> 3 --> 4
>> 4 --> 4
>>
> '1', not '4'.
> The loop has to be done once for each (probably partial) WORD of input
> data, not for each BYTE.

You're right, thanks for being so patient with me.

Please repost your patch, I'll integrate it.

Thanks.

^ permalink raw reply

* Re: [PATCH 1/4] inet: Add skb_copy_datagram_iter
From: David Miller @ 2014-11-05 20:51 UTC (permalink / raw)
  To: viro; +Cc: herbert, netdev, linux-kernel, bcrl
In-Reply-To: <20141105041231.GP7996@ZenIV.linux.org.uk>

From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 5 Nov 2014 04:12:32 +0000

> On Wed, Nov 05, 2014 at 03:55:36AM +0000, Al Viro wrote:
>> What do you think of the trick with user_msghdr, BTW?
> 
> 	PS: where do you prefer the branches to be based off?
> git://git.kernel.org/pub/scm/linux/kernel/git/davem/net#master, mainline,
> something else?  I can certainly do that as patches over email, the
> question is what's best used as base...  FWIW, the analysis I've posted
> was in 3.18-rc3 and it looks like it ought to be valid in net#master
> as well.

Let's work against net-next, ie:

git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next#master

I can integrate, your, mine, and Herbert's changes all into the same
place.

Thanks.

^ permalink raw reply

* Re: [PATCH 1/4] inet: Add skb_copy_datagram_iter
From: David Miller @ 2014-11-05 20:50 UTC (permalink / raw)
  To: viro; +Cc: herbert, netdev, linux-kernel, bcrl
In-Reply-To: <20141105035536.GO7996@ZenIV.linux.org.uk>

From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 5 Nov 2014 03:55:36 +0000

> On Tue, Nov 04, 2014 at 10:27:27PM -0500, David Miller wrote:
> 
>> Al, is this the helper you are talking about?
> 
> Mostly, except that I kept it 4-argument (and used skb_copy_datagram_msg()
> for name).  Matter of taste - the ones you've missed because of that are
 ...
> and back then I decided that 13 more converted instances might be worth keeping
> it in 4-argument form...

Ok, fixed up patch below:

> What do you think of the trick with user_msghdr, BTW?

I think we can get away with it if, as you say, we don't export a 'msghdr'
from any uapi headers.

And indeed, double checking, it's purely a linux/socket.h thing.

If this patch is OK, mind if I toss it into net-next Al?

diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 1be8228..dcbd858 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -163,7 +163,7 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	memcpy(skb_push(skb, MISDN_HEADER_LEN), mISDN_HEAD_P(skb),
 	       MISDN_HEADER_LEN);
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	mISDN_sock_cmsg(sk, msg, skb);
 
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 6c9c16d..443cbbf 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -981,7 +981,7 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	if (skb) {
 		total_len = min_t(size_t, total_len, skb->len);
-		error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len);
+		error = skb_copy_datagram_msg(skb, 0, m, total_len);
 		if (error == 0) {
 			consume_skb(skb);
 			return total_len;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5ad9675..31cdb7e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -21,6 +21,7 @@
 #include <linux/bug.h>
 #include <linux/cache.h>
 #include <linux/rbtree.h>
+#include <linux/socket.h>
 
 #include <linux/atomic.h>
 #include <asm/types.h>
@@ -2637,6 +2638,11 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 			   struct poll_table_struct *wait);
 int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
 			    struct iovec *to, int size);
+static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
+					struct msghdr *msg, int size)
+{
+	return skb_copy_datagram_iovec(from, offset, msg->msg_iov, size);
+}
 int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
 				     struct iovec *iov);
 int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index c00897f..425942d 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1758,7 +1758,7 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 		copied = size;
 		msg->msg_flags |= MSG_TRUNC;
 	}
-	err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, offset, msg, copied);
 
 	if (!err && msg->msg_name) {
 		DECLARE_SOCKADDR(struct sockaddr_at *, sat, msg->msg_name);
diff --git a/net/atm/common.c b/net/atm/common.c
index 6a76515..9cd1cca 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -554,7 +554,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	error = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	error = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (error)
 		return error;
 	sock_recv_ts_and_drops(msg, sk, skb);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index c35c3f4..f4f835e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1634,7 +1634,7 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	if (msg->msg_name) {
 		ax25_digi digi;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 339c74a..0a7cc56 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -237,7 +237,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	}
 
 	skb_reset_transport_header(skb);
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err == 0) {
 		sock_recv_ts_and_drops(msg, sk, skb);
 
@@ -328,7 +328,7 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 		}
 
 		chunk = min_t(unsigned int, skb->len, size);
-		if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, chunk)) {
+		if (skb_copy_datagram_msg(skb, 0, msg, chunk)) {
 			skb_queue_head(&sk->sk_receive_queue, skb);
 			if (!copied)
 				copied = -EFAULT;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 115f149..29e1ec7 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -878,7 +878,7 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	}
 
 	skb_reset_transport_header(skb);
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	switch (hci_pi(sk)->channel) {
 	case HCI_CHANNEL_RAW:
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 43f750e..fbcd156 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -293,7 +293,7 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
 		copylen = len;
 	}
 
-	ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, copylen);
+	ret = skb_copy_datagram_msg(skb, 0, m, copylen);
 	if (ret)
 		goto out_free;
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 15e0c67..ac56dd0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2457,7 +2457,7 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
 		msg->msg_flags |= MSG_TRUNC;
 		copied = len;
 	}
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto out_free_skb;
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 5ab6627..8e6ae94 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -896,7 +896,7 @@ verify_sock_status:
 		else if (len < skb->len)
 			msg->msg_flags |= MSG_TRUNC;
 
-		if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
+		if (skb_copy_datagram_msg(skb, 0, msg, len)) {
 			/* Exception. Bailout! */
 			len = -EFAULT;
 			break;
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index ef2ad8a..fc9193e 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -324,7 +324,7 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
 	}
 
 	/* FIXME: skip headers if necessary ?! */
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto done;
 
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 9d1f648..73a4d53 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -195,7 +195,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		copied = len;
 	}
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto done;
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c373a9a..21894df 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -424,7 +424,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 		msg->msg_flags |= MSG_TRUNC;
 		copied = len;
 	}
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto out_free_skb;
 
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 57f7c98..736236c 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -875,7 +875,7 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	}
 
 	/* Don't bother checking the checksum */
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto done;
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 739db31..ee8fa4b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -718,7 +718,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		copied = len;
 	}
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto done;
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 39ec0c3..c239f47 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1377,7 +1377,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
 	/* XXX -- need to support SO_PEEK_OFF */
 
 	skb_queue_walk(&sk->sk_write_queue, skb) {
-		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len);
+		err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
 		if (err)
 			break;
 
@@ -1833,8 +1833,7 @@ do_prequeue:
 		}
 
 		if (!(flags & MSG_TRUNC)) {
-			err = skb_copy_datagram_iovec(skb, offset,
-						      msg->msg_iov, used);
+			err = skb_copy_datagram_msg(skb, offset, msg, used);
 			if (err) {
 				/* Exception. Bailout! */
 				if (!copied)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3f001db..df19027 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1281,8 +1281,8 @@ try_again:
 	}
 
 	if (skb_csum_unnecessary(skb))
-		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
-					      msg->msg_iov, copied);
+		err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
+					    msg, copied);
 	else {
 		err = skb_copy_and_csum_datagram_iovec(skb,
 						       sizeof(struct udphdr),
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 2cdc383..5c6996e 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -351,7 +351,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 		msg->msg_flags |= MSG_TRUNC;
 		copied = len;
 	}
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto out_free_skb;
 
@@ -445,7 +445,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len,
 		msg->msg_flags |= MSG_TRUNC;
 		copied = len;
 	}
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto out_free_skb;
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 075a0fb..0cbcf98 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -486,11 +486,11 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	}
 
 	if (skb_csum_unnecessary(skb)) {
-		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+		err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	} else if (msg->msg_flags&MSG_TRUNC) {
 		if (__skb_checksum_complete(skb))
 			goto csum_copy_err;
-		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+		err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	} else {
 		err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov);
 		if (err == -EINVAL)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f6ba535..9b68092 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -424,8 +424,8 @@ try_again:
 	}
 
 	if (skb_csum_unnecessary(skb))
-		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
-					      msg->msg_iov, copied);
+		err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
+					    msg, copied);
 	else {
 		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
 		if (err == -EINVAL)
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 313ef46..a0c7536 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1805,8 +1805,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	rc = skb_copy_datagram_iovec(skb, sizeof(struct ipxhdr), msg->msg_iov,
-				     copied);
+	rc = skb_copy_datagram_msg(skb, sizeof(struct ipxhdr), msg, copied);
 	if (rc)
 		goto out_free;
 	if (skb->tstamp.tv64)
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 92fafd4..980bc26 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1396,7 +1396,7 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
 		copied = size;
 		msg->msg_flags |= MSG_TRUNC;
 	}
-	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	skb_free_datagram(sk, skb);
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index a089b6b..057b564 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1355,7 +1355,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		sk->sk_shutdown = sk->sk_shutdown | RCV_SHUTDOWN;
 
 	cskb = skb;
-	if (skb_copy_datagram_iovec(cskb, offset, msg->msg_iov, copied)) {
+	if (skb_copy_datagram_msg(cskb, offset, msg, copied)) {
 		if (!(flags & MSG_PEEK))
 			skb_queue_head(&sk->sk_receive_queue, skb);
 		return -EFAULT;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 1847ec4..e588309 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3654,7 +3654,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
 	}
 
 	skb_reset_transport_header(skb);
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto out_free;
 
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 369a982..a6cc1fe 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -528,7 +528,7 @@ static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 		copied = len;
 	}
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto done;
 
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 0edb263..2177b96 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -672,7 +672,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
 		copied = len;
 	}
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto done;
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index b704a93..c559bcd 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -208,7 +208,7 @@ static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
 	else if (len < skb->len)
 		msg->msg_flags |= MSG_TRUNC;
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
+	err = skb_copy_datagram_msg(skb, 0, msg, len);
 	if (likely(err == 0))
 		err = len;
 
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index bb9cbc1..af66266 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -819,8 +819,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
 			used = len;
 
 		if (!(flags & MSG_TRUNC)) {
-			int rc = skb_copy_datagram_iovec(skb, offset,
-							 msg->msg_iov, used);
+			int rc = skb_copy_datagram_msg(skb, offset, msg, used);
 			if (rc) {
 				/* Exception. Bailout! */
 				if (!copied)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f1de72d..580b794 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2401,7 +2401,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	}
 
 	skb_reset_transport_header(data_skb);
-	err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
 
 	if (msg->msg_name) {
 		DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 1b06a1f..7e13f6a 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1167,7 +1167,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	er = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	er = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (er < 0) {
 		skb_free_datagram(sk, skb);
 		release_sock(sk);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 51f077a..83bc785 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -832,7 +832,7 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	copied = min_t(unsigned int, rlen, len);
 
 	cskb = skb;
-	if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) {
+	if (skb_copy_datagram_msg(cskb, 0, msg, copied)) {
 		if (!(flags & MSG_PEEK))
 			skb_queue_head(&sk->sk_receive_queue, skb);
 		return -EFAULT;
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 11c3544..9d7d2b7 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -269,7 +269,7 @@ static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		copied = len;
 	}
 
-	rc = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	rc = skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	skb_free_datagram(sk, skb);
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 87d20f4..4cd13d8 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2953,7 +2953,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto out_free;
 
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 290352c..0918bc2 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -150,7 +150,7 @@ static int pn_recvmsg(struct kiocb *iocb, struct sock *sk,
 		copylen = len;
 	}
 
-	rval = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copylen);
+	rval = skb_copy_datagram_msg(skb, 0, msg, copylen);
 	if (rval) {
 		rval = -EFAULT;
 		goto out;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 70a547e..44b2123 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1296,7 +1296,7 @@ copy:
 	else
 		len = skb->len;
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
+	err = skb_copy_datagram_msg(skb, 0, msg, len);
 	if (!err)
 		err = (flags & MSG_TRUNC) ? skb->len : len;
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index a85c1a0..9b600c2 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1249,7 +1249,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	if (msg->msg_name) {
 		struct sockaddr_rose *srose;
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index e9aaa65..4575485 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -180,7 +180,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 		if (copy > len - copied)
 			copy = len - copied;
 
-		ret = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copy);
+		ret = skb_copy_datagram_msg(skb, offset, msg, copy);
 
 		if (ret < 0)
 			goto copy_error;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 634a2ab..2120292 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2095,7 +2095,7 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (copied > len)
 		copied = len;
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	event = sctp_skb2event(skb);
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ad8a1a1..591bbfa 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1372,8 +1372,7 @@ restart:
 			sz = buf_len;
 			m->msg_flags |= MSG_TRUNC;
 		}
-		res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg),
-					      m->msg_iov, sz);
+		res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg), m, sz);
 		if (res)
 			goto exit;
 		res = sz;
@@ -1473,8 +1472,8 @@ restart:
 		needed = (buf_len - sz_copied);
 		sz_to_copy = (sz <= needed) ? sz : needed;
 
-		res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset,
-					      m->msg_iov, sz_to_copy);
+		res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg) + offset,
+					    m, sz_to_copy);
 		if (res)
 			goto exit;
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e968843..5eee625 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1825,7 +1825,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 	else if (size < skb->len - skip)
 		msg->msg_flags |= MSG_TRUNC;
 
-	err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
+	err = skb_copy_datagram_msg(skb, skip, msg, size);
 	if (err)
 		goto out_free;
 
@@ -2030,8 +2030,8 @@ again:
 		}
 
 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
-		if (skb_copy_datagram_iovec(skb, UNIXCB(skb).consumed + skip,
-					    msg->msg_iov, chunk)) {
+		if (skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
+					  msg, chunk)) {
 			if (copied == 0)
 				copied = -EFAULT;
 			break;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 9bb63ff..a57ddef 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1773,8 +1773,7 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
 	}
 
 	/* Place the datagram payload in the user's iovec. */
-	err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov,
-		payload_len);
+	err = skb_copy_datagram_msg(skb, sizeof(*dg), msg, payload_len);
 	if (err)
 		goto out;
 
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5ad4418..59e785b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1335,7 +1335,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
 	/* Currently, each datagram always contains a complete record */
 	msg->msg_flags |= MSG_EOR;
 
-	rc = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	rc = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (rc)
 		goto out_free_dgram;
 

^ permalink raw reply related

* Re: [PATCH net v4] ipv6: mld: fix add_grhead skb_over_panic for devs with large MTUs
From: Eric Dumazet @ 2014-11-05 20:47 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: davem, lw1a2.jing, netdev, Eric Dumazet, Hannes Frederic Sowa,
	David L Stevens
In-Reply-To: <1415215658-10054-1-git-send-email-dborkman@redhat.com>

On Wed, 2014-11-05 at 20:27 +0100, Daniel Borkmann wrote:
> It has been reported that generating an MLD listener report on
> devices with large MTUs (e.g. 9000) and a high number of IPv6
> addresses can trigger a skb_over_panic():
...
> 
> Reported-by: Wei Liu <lw1a2.jing@gmail.com>
> Fixes: 72e09ad107e7 ("ipv6: avoid high order allocations")
> Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Cc: David L Stevens <david.stevens@oracle.com>
> ---
>  v3->v4:
>   - Reduced noise from previous one as discussed

Thanks for your patience ;)

Acked-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply

* [PATCH] include/linux/socket.h: Fix comment
From: Rasmus Villemoes @ 2014-11-05 20:44 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, linux-kernel, Rasmus Villemoes

File descriptors are always closed on exit :-)

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
---
 include/linux/socket.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index ec538fc..bb9b836 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -256,7 +256,7 @@ struct ucred {
 #define MSG_EOF         MSG_FIN
 
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
-#define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exit for file
+#define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
 					   descriptor received through
 					   SCM_RIGHTS */
 #if defined(CONFIG_COMPAT)
-- 
2.0.4

^ permalink raw reply related

* Re: [PATCH] Add missing descriptions for fwmark_reflect for ipv4 and ipv6.
From: David Miller @ 2014-11-05 20:44 UTC (permalink / raw)
  To: logan; +Cc: netdev
In-Reply-To: <20141104110249.GA25828@mx.elandsys.com>

From: Loganaden Velvindron <logan@elandsys.com>
Date: Tue, 4 Nov 2014 03:02:49 -0800

> It was initially sent by Lorenzo Colitti, but was subsequently
> lost in the final diff he submitted.
> 
> Signed-off-by: Loganaden Velvindron <logan@elandsys.com>

Applied, thank you.

^ permalink raw reply

* RE: Kernel Oops in __inet_twsk_kill()
From: Charley (Hao Chuan) Chu @ 2014-11-05 20:35 UTC (permalink / raw)
  To: David Miller
  Cc: cwang@twopensource.com, borkmann@iogearbox.net,
	netdev@vger.kernel.org
In-Reply-To: <20141105.143929.861274469992898400.davem@davemloft.net>

Hi David,

Sorry, it is my mistake forgot to mention we have third party patch applied.

Thanks,
Charley

-----Original Message-----
From: David Miller [mailto:davem@davemloft.net] 
Sent: Wednesday, November 05, 2014 11:39 AM
To: Charley (Hao Chuan) Chu
Cc: cwang@twopensource.com; borkmann@iogearbox.net; netdev@vger.kernel.org
Subject: Re: Kernel Oops in __inet_twsk_kill()

From: "Charley (Hao Chuan) Chu" <charley.chu@broadcom.com>
Date: Wed, 5 Nov 2014 19:16:09 +0000

> The problem has been fixed. It is introduced by a third party patch,
> which decreases the refcnt of timewait socket.

This is something that really drives me nuts, seriously?

Never report problems to us when you have such patches applied.

It is an even larger crime to not mention this at all from the
beginning, because we might start to investigate and waste our
precious time doing so.

^ permalink raw reply

* Re: [PATCH 1/4] inet: Add skb_copy_datagram_iter
From: David Miller @ 2014-11-05 20:24 UTC (permalink / raw)
  To: herbert; +Cc: viro, netdev, linux-kernel, bcrl
In-Reply-To: <E1XlZWY-0003HZ-Ef@gondolin.me.apana.org.au>

Herbert, please provide a cover letter for this series, and the most recent
version of patch #2 gets various rejects when I try to apply it to net-next.

Thanks.

^ permalink raw reply

* Re: [GIT net-next] Open vSwitch
From: David Miller @ 2014-11-05 20:10 UTC (permalink / raw)
  To: pshelar; +Cc: netdev
In-Reply-To: <1415080846-1623-1-git-send-email-pshelar@nicira.com>

Please do not submit your patches such that the email Date: field is
the commit's date.  You're not posting these on Nov. 4th, yet that
is the Date: field on all of the individual patch emails.

I want them to be the date at the time you post the patch to the mailing
list.

Otherwise the ordering in patchwork is not cronological wrt. the list's
postings and this makes my work more difficult than it needs to be.

Thanks.

^ permalink raw reply

* [PATCH net-next 14/14] openvswitch: Avoid NULL mask check while building mask
From: Pravin B Shelar @ 2014-11-04  6:02 UTC (permalink / raw)
  To: davem; +Cc: netdev, Pravin B Shelar

OVS does mask validation even if it does not need to convert
netlink mask attributes to mask structure.  ovs_nla_get_match()
caller can pass NULL mask structure pointer if the caller does
not need mask.  Therefore NULL check is required in SW_FLOW_KEY*
macros.  Following patch does not convert mask netlink attributes
if mask pointer is NULL, so we do not need these checks in
SW_FLOW_KEY* macro.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Andy Zhou <azhou@nicira.com>
---
 net/openvswitch/flow_netlink.c | 107 ++++++++++++++++++++---------------------
 1 file changed, 53 insertions(+), 54 deletions(-)

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 482a0cb..ed31097 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -50,21 +50,18 @@
 
 #include "flow_netlink.h"
 
-static void update_range__(struct sw_flow_match *match,
-			   size_t offset, size_t size, bool is_mask)
+static void update_range(struct sw_flow_match *match,
+			 size_t offset, size_t size, bool is_mask)
 {
-	struct sw_flow_key_range *range = NULL;
+	struct sw_flow_key_range *range;
 	size_t start = rounddown(offset, sizeof(long));
 	size_t end = roundup(offset + size, sizeof(long));
 
 	if (!is_mask)
 		range = &match->range;
-	else if (match->mask)
+	else
 		range = &match->mask->range;
 
-	if (!range)
-		return;
-
 	if (range->start == range->end) {
 		range->start = start;
 		range->end = end;
@@ -80,22 +77,20 @@ static void update_range__(struct sw_flow_match *match,
 
 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
 	do { \
-		update_range__(match, offsetof(struct sw_flow_key, field),  \
-				     sizeof((match)->key->field), is_mask); \
-		if (is_mask) {						    \
-			if ((match)->mask)				    \
-				(match)->mask->key.field = value;	    \
-		} else {                                                    \
+		update_range(match, offsetof(struct sw_flow_key, field),    \
+			     sizeof((match)->key->field), is_mask);	    \
+		if (is_mask)						    \
+			(match)->mask->key.field = value;		    \
+		else							    \
 			(match)->key->field = value;		            \
-		}                                                           \
 	} while (0)
 
 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)	    \
 	do {								    \
-		update_range__(match, offset, len, is_mask);		    \
+		update_range(match, offset, len, is_mask);		    \
 		if (is_mask)						    \
 			memcpy((u8 *)&(match)->mask->key + offset, value_p, \
-			       len);					    \
+			       len);					   \
 		else							    \
 			memcpy((u8 *)(match)->key + offset, value_p, len);  \
 	} while (0)
@@ -104,18 +99,16 @@ static void update_range__(struct sw_flow_match *match,
 	SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
 				  value_p, len, is_mask)
 
-#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \
-	do { \
-		update_range__(match, offsetof(struct sw_flow_key, field),  \
-				     sizeof((match)->key->field), is_mask); \
-		if (is_mask) {						    \
-			if ((match)->mask)				    \
-				memset((u8 *)&(match)->mask->key.field, value,\
-				       sizeof((match)->mask->key.field));   \
-		} else {                                                    \
+#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)		    \
+	do {								    \
+		update_range(match, offsetof(struct sw_flow_key, field),    \
+			     sizeof((match)->key->field), is_mask);	    \
+		if (is_mask)						    \
+			memset((u8 *)&(match)->mask->key.field, value,      \
+			       sizeof((match)->mask->key.field));	    \
+		else							    \
 			memset((u8 *)&(match)->key->field, value,           \
 			       sizeof((match)->key->field));                \
-		}                                                           \
 	} while (0)
 
 static bool match_validate(const struct sw_flow_match *match,
@@ -677,8 +670,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
 
 		SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
 		attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
-	} else if (!is_mask)
-		SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
+	}
 
 	if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
 		__be16 eth_type;
@@ -903,8 +895,8 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val)
  * attribute specifies the mask field of the wildcarded flow.
  */
 int ovs_nla_get_match(struct sw_flow_match *match,
-		      const struct nlattr *key,
-		      const struct nlattr *mask)
+		      const struct nlattr *nla_key,
+		      const struct nlattr *nla_mask)
 {
 	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
 	const struct nlattr *encap;
@@ -914,7 +906,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
 	bool encap_valid = false;
 	int err;
 
-	err = parse_flow_nlattrs(key, a, &key_attrs);
+	err = parse_flow_nlattrs(nla_key, a, &key_attrs);
 	if (err)
 		return err;
 
@@ -955,36 +947,43 @@ int ovs_nla_get_match(struct sw_flow_match *match,
 	if (err)
 		return err;
 
-	if (match->mask && !mask) {
-		/* Create an exact match mask. We need to set to 0xff all the
-		 * 'match->mask' fields that have been touched in 'match->key'.
-		 * We cannot simply memset 'match->mask', because padding bytes
-		 * and fields not specified in 'match->key' should be left to 0.
-		 * Instead, we use a stream of netlink attributes, copied from
-		 * 'key' and set to 0xff: ovs_key_from_nlattrs() will take care
-		 * of filling 'match->mask' appropriately.
-		 */
-		newmask = kmemdup(key, nla_total_size(nla_len(key)),
-				  GFP_KERNEL);
-		if (!newmask)
-			return -ENOMEM;
+	if (match->mask) {
+		if (!nla_mask) {
+			/* Create an exact match mask. We need to set to 0xff
+			 * all the 'match->mask' fields that have been touched
+			 * in 'match->key'. We cannot simply memset
+			 * 'match->mask', because padding bytes and fields not
+			 * specified in 'match->key' should be left to 0.
+			 * Instead, we use a stream of netlink attributes,
+			 * copied from 'key' and set to 0xff.
+			 * ovs_key_from_nlattrs() will take care of filling
+			 * 'match->mask' appropriately.
+			 */
+			newmask = kmemdup(nla_key,
+					  nla_total_size(nla_len(nla_key)),
+					  GFP_KERNEL);
+			if (!newmask)
+				return -ENOMEM;
 
-		mask_set_nlattr(newmask, 0xff);
+			mask_set_nlattr(newmask, 0xff);
 
-		/* The userspace does not send tunnel attributes that are 0,
-		 * but we should not wildcard them nonetheless.
-		 */
-		if (match->key->tun_key.ipv4_dst)
-			SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 0xff, true);
+			/* The userspace does not send tunnel attributes that
+			 * are 0, but we should not wildcard them nonetheless.
+			 */
+			if (match->key->tun_key.ipv4_dst)
+				SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
+							 0xff, true);
 
-		mask = newmask;
-	}
+			nla_mask = newmask;
+		}
 
-	if (mask) {
-		err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
+		err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs);
 		if (err)
 			goto free_newmask;
 
+		/* Always match on tci. */
+		SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
+
 		if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) {
 			__be16 eth_type = 0;
 			__be16 tci = 0;
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 13/14] openvswitch: Refactor action alloc and copy api.
From: Pravin B Shelar @ 2014-11-04  6:02 UTC (permalink / raw)
  To: davem; +Cc: netdev, Pravin B Shelar

There are two separate API to allocate and copy actions list. Anytime
OVS needs to copy action list, it needs to call both functions.
Following patch moves action allocation to copy function to avoid
code duplication.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
---
 net/openvswitch/datapath.c     | 25 ++++---------------------
 net/openvswitch/flow_netlink.c | 24 +++++++++++++++++-------
 net/openvswitch/flow_netlink.h |  1 -
 3 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 5101780..014485e 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -543,18 +543,12 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto err_flow_free;
 
-	acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
-	err = PTR_ERR(acts);
-	if (IS_ERR(acts))
-		goto err_flow_free;
-
 	err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
 				   &flow->key, &acts);
 	if (err)
 		goto err_flow_free;
 
 	rcu_assign_pointer(flow->sf_acts, acts);
-
 	OVS_CB(packet)->egress_tun_info = NULL;
 	packet->priority = flow->key.phy.priority;
 	packet->mark = flow->key.phy.skb_mark;
@@ -872,16 +866,11 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
 
 	/* Validate actions. */
-	acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
-	error = PTR_ERR(acts);
-	if (IS_ERR(acts))
-		goto err_kfree_flow;
-
 	error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
 				     &acts);
 	if (error) {
 		OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
-		goto err_kfree_acts;
+		goto err_kfree_flow;
 	}
 
 	reply = ovs_flow_cmd_alloc_info(acts, info, false);
@@ -972,6 +961,7 @@ error:
 	return error;
 }
 
+/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
 static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
 						const struct sw_flow_key *key,
 						const struct sw_flow_mask *mask)
@@ -980,15 +970,10 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
 	struct sw_flow_key masked_key;
 	int error;
 
-	acts = ovs_nla_alloc_flow_actions(nla_len(a));
-	if (IS_ERR(acts))
-		return acts;
-
 	ovs_flow_mask_key(&masked_key, key, mask);
 	error = ovs_nla_copy_actions(a, &masked_key, &acts);
 	if (error) {
-		OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
-		kfree(acts);
+		OVS_NLERR("Actions may not be safe on all matching packets.\n");
 		return ERR_PTR(error);
 	}
 
@@ -1028,10 +1013,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 			error = PTR_ERR(acts);
 			goto error;
 		}
-	}
 
-	/* Can allocate before locking if have acts. */
-	if (acts) {
+		/* Can allocate before locking if have acts. */
 		reply = ovs_flow_cmd_alloc_info(acts, info, false);
 		if (IS_ERR(reply)) {
 			error = PTR_ERR(reply);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 1050b28..482a0cb 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1284,7 +1284,7 @@ nla_put_failure:
 
 #define MAX_ACTIONS_BUFSIZE	(32 * 1024)
 
-struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size)
+static struct sw_flow_actions *nla_alloc_flow_actions(int size)
 {
 	struct sw_flow_actions *sfa;
 
@@ -1329,7 +1329,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
 		new_acts_size = MAX_ACTIONS_BUFSIZE;
 	}
 
-	acts = ovs_nla_alloc_flow_actions(new_acts_size);
+	acts = nla_alloc_flow_actions(new_acts_size);
 	if (IS_ERR(acts))
 		return (void *)acts;
 
@@ -1396,7 +1396,7 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
 	a->nla_len = sfa->actions_len - st_offset;
 }
 
-static int ovs_nla_copy_actions__(const struct nlattr *attr,
+static int __ovs_nla_copy_actions(const struct nlattr *attr,
 				  const struct sw_flow_key *key,
 				  int depth, struct sw_flow_actions **sfa,
 				  __be16 eth_type, __be16 vlan_tci);
@@ -1441,7 +1441,7 @@ static int validate_and_copy_sample(const struct nlattr *attr,
 	if (st_acts < 0)
 		return st_acts;
 
-	err = ovs_nla_copy_actions__(actions, key, depth + 1, sfa,
+	err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
 				     eth_type, vlan_tci);
 	if (err)
 		return err;
@@ -1684,7 +1684,7 @@ static int copy_action(const struct nlattr *from,
 	return 0;
 }
 
-static int ovs_nla_copy_actions__(const struct nlattr *attr,
+static int __ovs_nla_copy_actions(const struct nlattr *attr,
 				  const struct sw_flow_key *key,
 				  int depth, struct sw_flow_actions **sfa,
 				  __be16 eth_type, __be16 vlan_tci)
@@ -1846,8 +1846,18 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key,
 			 struct sw_flow_actions **sfa)
 {
-	return ovs_nla_copy_actions__(attr, key, 0, sfa, key->eth.type,
-				      key->eth.tci);
+	int err;
+
+	*sfa = nla_alloc_flow_actions(nla_len(attr));
+	if (IS_ERR(*sfa))
+		return PTR_ERR(*sfa);
+
+	err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
+				     key->eth.tci);
+	if (err)
+		kfree(*sfa);
+
+	return err;
 }
 
 static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 4f03706..eb0b177 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -56,7 +56,6 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
 int ovs_nla_put_actions(const struct nlattr *attr,
 			int len, struct sk_buff *skb);
 
-struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len);
 void ovs_nla_free_flow_actions(struct sw_flow_actions *);
 
 #endif /* flow_netlink.h */
-- 
1.9.3

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox