Netdev List
 help / color / mirror / Atom feed
* [PATCH v2] VSOCK: add loopback to virtio_transport
From: Stefan Hajnoczi @ 2016-11-21 13:56 UTC (permalink / raw)
  To: netdev
  Cc: cavery, Claudio Imbrenda, Jorgen Hansen, David S . Miller,
	Stefan Hajnoczi

The VMware VMCI transport supports loopback inside virtual machines.
This patch implements loopback for virtio-vsock.

Flow control is handled by the virtio-vsock protocol as usual.  The
sending process stops transmitting on a connection when the peer's
receive buffer space is exhausted.

Cathy Avery <cavery@redhat.com> noticed this difference between VMCI and
virtio-vsock when a test case using loopback failed.  Although loopback
isn't the main point of AF_VSOCK, it is useful for testing and
virtio-vsock must match VMCI semantics so that userspace programs run
regardless of the underlying transport.

My understanding is that loopback is not supported on the host side with
VMCI.  Follow that by implementing it only in the guest driver, not the
vhost host driver.

Cc: Jorgen Hansen <jhansen@vmware.com>
Reported-by: Cathy Avery <cavery@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
v2:
 * Fixed checkpatch.pl warnings [DaveM]

 net/vmw_vsock/virtio_transport.c | 56 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 936d7ee..2e47f9f0 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -44,6 +44,10 @@ struct virtio_vsock {
 	spinlock_t send_pkt_list_lock;
 	struct list_head send_pkt_list;
 
+	struct work_struct loopback_work;
+	spinlock_t loopback_list_lock; /* protects loopback_list */
+	struct list_head loopback_list;
+
 	atomic_t queued_replies;
 
 	/* The following fields are protected by rx_lock.  vqs[VSOCK_VQ_RX]
@@ -74,6 +78,42 @@ static u32 virtio_transport_get_local_cid(void)
 	return vsock->guest_cid;
 }
 
+static void virtio_transport_loopback_work(struct work_struct *work)
+{
+	struct virtio_vsock *vsock =
+		container_of(work, struct virtio_vsock, loopback_work);
+	LIST_HEAD(pkts);
+
+	spin_lock_bh(&vsock->loopback_list_lock);
+	list_splice_init(&vsock->loopback_list, &pkts);
+	spin_unlock_bh(&vsock->loopback_list_lock);
+
+	mutex_lock(&vsock->rx_lock);
+	while (!list_empty(&pkts)) {
+		struct virtio_vsock_pkt *pkt;
+
+		pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list);
+		list_del_init(&pkt->list);
+
+		virtio_transport_recv_pkt(pkt);
+	}
+	mutex_unlock(&vsock->rx_lock);
+}
+
+static int virtio_transport_send_pkt_loopback(struct virtio_vsock *vsock,
+					      struct virtio_vsock_pkt *pkt)
+{
+	int len = pkt->len;
+
+	spin_lock_bh(&vsock->loopback_list_lock);
+	list_add_tail(&pkt->list, &vsock->loopback_list);
+	spin_unlock_bh(&vsock->loopback_list_lock);
+
+	queue_work(virtio_vsock_workqueue, &vsock->loopback_work);
+
+	return len;
+}
+
 static void
 virtio_transport_send_pkt_work(struct work_struct *work)
 {
@@ -159,6 +199,9 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
 		return -ENODEV;
 	}
 
+	if (le32_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid)
+		return virtio_transport_send_pkt_loopback(vsock, pkt);
+
 	if (pkt->reply)
 		atomic_inc(&vsock->queued_replies);
 
@@ -510,10 +553,13 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
 	mutex_init(&vsock->event_lock);
 	spin_lock_init(&vsock->send_pkt_list_lock);
 	INIT_LIST_HEAD(&vsock->send_pkt_list);
+	spin_lock_init(&vsock->loopback_list_lock);
+	INIT_LIST_HEAD(&vsock->loopback_list);
 	INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
 	INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
 	INIT_WORK(&vsock->event_work, virtio_transport_event_work);
 	INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
+	INIT_WORK(&vsock->loopback_work, virtio_transport_loopback_work);
 
 	mutex_lock(&vsock->rx_lock);
 	virtio_vsock_rx_fill(vsock);
@@ -539,6 +585,7 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
 	struct virtio_vsock *vsock = vdev->priv;
 	struct virtio_vsock_pkt *pkt;
 
+	flush_work(&vsock->loopback_work);
 	flush_work(&vsock->rx_work);
 	flush_work(&vsock->tx_work);
 	flush_work(&vsock->event_work);
@@ -565,6 +612,15 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
 	}
 	spin_unlock_bh(&vsock->send_pkt_list_lock);
 
+	spin_lock_bh(&vsock->loopback_list_lock);
+	while (!list_empty(&vsock->loopback_list)) {
+		pkt = list_first_entry(&vsock->loopback_list,
+				       struct virtio_vsock_pkt, list);
+		list_del(&pkt->list);
+		virtio_transport_free_pkt(pkt);
+	}
+	spin_unlock_bh(&vsock->loopback_list_lock);
+
 	mutex_lock(&the_virtio_vsock_mutex);
 	the_virtio_vsock = NULL;
 	vsock_core_exit();
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH 4.9.0-rc5] AR9300 calibration problems with antenna selected
From: Krzysztof Hałasa @ 2016-11-21 13:54 UTC (permalink / raw)
  To: miaoqing
  Cc: ath9k-devel, Kalle Valo, linux-wireless, ath9k-devel, netdev,
	linux-kernel
In-Reply-To: <2ea016e563f5db553f201a7d4205c2c0@codeaurora.org>

miaoqing@codeaurora.org writes:

>> rmmod ath9k
>> modprobe ath9k
>> iw dev wlan0 set type ibss
>> iw phy phyX set antenna 2
>
> 2 is a bad mask. We use bitmap, the valid masks are 1, 3, 7.

Thanks for your response.

I have two antenna connections (and a single antenna). Is it possible to
select the secondary antenna connector only? How?
-- 
Krzysztof Halasa

Industrial Research Institute for Automation and Measurements PIAP
Al. Jerozolimskie 202, 02-486 Warsaw, Poland

^ permalink raw reply

* Re: Synopsys Ethernet QoS Driver
From: Joao Pinto @ 2016-11-21 13:48 UTC (permalink / raw)
  To: Giuseppe CAVALLARO, Joao Pinto, Rayagond Kokatanur, Rabin Vincent
  Cc: mued dib, David Miller, Jeff Kirsher, jiri, saeedm, idosch,
	netdev, linux-kernel, CARLOS.PALMINHA, andreas.irestal,
	alexandre.torgue, lars.persson,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <87c8a24b-0812-7850-cb3f-7be691bab432@st.com>

Hello Peppe,

On 21-11-2016 12:52, Giuseppe CAVALLARO wrote:
> Hello Joao
> 
> On 11/21/2016 1:32 PM, Joao Pinto wrote:
>> Hello,
>>
>> On 21-11-2016 05:29, Rayagond Kokatanur wrote:
>>> On Sat, Nov 19, 2016 at 7:26 PM, Rabin Vincent <rabin@rab.in> wrote:
>>>> On Fri, Nov 18, 2016 at 02:20:27PM +0000, Joao Pinto wrote:
>>>>> For now we are interesting in improving the synopsys QoS driver under
>>>>> /nect/ethernet/synopsys. For now the driver structure consists of a single
>>>>> file
>>>>> called dwc_eth_qos.c, containing synopsys ethernet qos common ops and platform
>>>>> related stuff.
>>>>>
>>>>> Our strategy would be:
>>>>>
>>>>> a) Implement a platform glue driver (dwc_eth_qos_pltfm.c)
>>>>> b) Implement a pci glue driver (dwc_eth_qos_pci.c)
>>>>> c) Implement a "core driver" (dwc_eth_qos.c) that would only have Ethernet QoS
>>>>> related stuff to be reused by the platform / pci drivers
>>>>> d) Add a set of features to the "core driver" that we have available
>>>>> internally
>>>>
>>>> Note that there are actually two drivers in mainline for this hardware:
>>>>
>>>>  drivers/net/ethernet/synopsis/
>>>>  drivers/net/ethernet/stmicro/stmmac/
>>>
>>> Yes the later driver (drivers/net/ethernet/stmicro/stmmac/) supports
>>> both 3.x and 4.x. It has glue layer for pci, platform, core etc,
>>> please refer this driver once before you start.
>>>
>>> You can start adding missing feature of 4.x in stmmac driver.
>>
>> Thanks you all for all the info.
>> Well, I think we are in a good position to organize the ethernet drivers
>> concerning Synopsys IPs.
>>
>> First of all, in my opinion, it does not make sense to have a ethernet/synopsis
>> (typo :)) when ethernet/stmicro is also for a synopsys IP. If we have another
>> vendor using the same IP it should be able to reuse the commonn operations. But
>> I would put that discussion for later :)
>>
>> For now I suggest that for we create ethernet/qos and create there a folder
>> called dwc (designware controller) where all the synopsys qos IP specific code
>> in order to be reused for example by ethernet/stmicro/stmmac/. We just have to
>> figure out a clean interface for "client drivers" like stmmac to interact with
>> the new qos driver.
>>
>> What do you think about this approach?
> 
> The stmmac drivers run since many years on several platforms
> (sh4, stm32, arm, x86, mips ...) and it supports an huge of amount of
> configurations starting from 3.1x to 3.7x databooks.

Great!

> 
> It also supports QoS hardware; for example, 4.00a, 4.10a and 4.20a
> are fully working.

Synopsys QoS IP is a separated hardware component, so it should be reusable by
all implementations using it and so have its own "core driver" and platform +
pci glue drivers. This is necessary for example in hardware validation, where
you prototype an IP and instantiate its drivers and test it.

Was there a strong reason to integrate QoS features directly in stmmac and not
in synopsys/dwc_eth_qos.*?

> 
> Also the stmmac has platform, device-tree and pcie supports and
> a lot of maintained glue-logic files.
> 
> It is fully documented inside the kernel tree.

Thanks for the information, I am going to check it out.

> 
> I am happy to have new enhancements from other developers.
> So, on my side, if you want to spend your time on improving it on your
> platforms please feel free to do it!
> 
> Concerning the stmicro/stmmac naming, these come from a really old
> story and have no issue to adopt new folder/file names.

Thank you for your availability!

> 
> I am also open to merge fixes and changes from ethernet/synopsis.
> I want to point you on some benchmarks made by Alex some months ago
> (IIRC) that showed an stmmac winner (due to the several optimizations
> analyzed and reviewed in this mailing list).
> 
> Peppe
> 

Thanks
Joao

^ permalink raw reply

* Re: [PATCH] VSOCK: add loopback to virtio_transport
From: Stefan Hajnoczi @ 2016-11-21 13:40 UTC (permalink / raw)
  To: David Miller; +Cc: john.fastabend, netdev, cavery, imbrenda, jhansen
In-Reply-To: <20161119.221643.2303914168886226654.davem@davemloft.net>

[-- Attachment #1: Type: text/plain, Size: 865 bytes --]

On Sat, Nov 19, 2016 at 10:16:43PM -0500, David Miller wrote:
> From: John Fastabend <john.fastabend@gmail.com>
> Date: Sat, 19 Nov 2016 19:14:15 -0800
> 
> > On 16-11-19 07:08 PM, David Miller wrote:
> >> From: Stefan Hajnoczi <stefanha@redhat.com>
> >> Date: Thu, 17 Nov 2016 15:49:23 +0000
> >> 
> >>> @@ -159,6 +199,10 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
> >>>  		return -ENODEV;
> >>>  	}
> >>>  
> >>> +	if (le32_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) {
> >>> +		return virtio_transport_send_pkt_loopback(vsock, pkt);
> >>> +	}
> >>> +
> >> 
> >> A single-statement basic block should get surrounding curly
> >> braces.
> >> 
> > 
> > Should _not_ get curly braces in case it wasn't obvious ;)
> 
> Indeed, thanks for the correction :)

Thanks, I'll remember to run checkpatch.pl and send v2.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

^ permalink raw reply

* Re: [PATCH net v2 1/1] net: batman-adv: Treat NET_XMIT_CN as transmit successfully
From: Gao Feng @ 2016-11-21 13:31 UTC (permalink / raw)
  To: Florian Westphal
  Cc: mareklindner, Simon Wunderlich, a, David S. Miller, b.a.t.m.a.n,
	Linux Kernel Network Developers
In-Reply-To: <CA+6hz4rVCL2oxeQUojcqptNfcZdtLZdgEA2_j7iU1-B=s12SHg@mail.gmail.com>

Hi Florian,


On Mon, Nov 21, 2016 at 9:24 PM, Gao Feng <fgao@ikuai8.com> wrote:
> Hi Florian,
>
> On Mon, Nov 21, 2016 at 7:09 PM, Florian Westphal <fw@strlen.de> wrote:
>> fgao@ikuai8.com <fgao@ikuai8.com> wrote:
>>> From: Gao Feng <fgao@ikuai8.com>
>>>
>>> The tc could return NET_XMIT_CN as one congestion notification, but
>>> it does not mean the packet is lost. Other modules like ipvlan,
>>> macvlan, and others treat NET_XMIT_CN as success too.
>>>
>>> So batman-adv should add the NET_XMIT_CN check.
>>
>> "The tc could return NET_XMIT_CN as one congestion notification, but
>> it means another packet got dropped. Other modules like batman do not
>> treat NET_XMIT_CN as success, so modules like ipvlan, macvlan, ..
>> should ignore it as well."

I didn't get you in the last email

You mean the NET_XMIT_CN should be treated as one error?
But the comment of NET_XMIT_CN says "It indicates that the device will
soon be dropping packets, or already drops some packets of the same
priority". It is not sure another packet is dropped.

BTW, the macro "net_xmit_eval" does not treat NET_XMIT_CN as one error.

Regards
Feng

>>
>> What I am asking is:
>> Are you sure adding NET_XMIT_CN handling everywhere is the right way to
>> resolve the inconsistency?
>
> Or create one new macro to handle this case like net_xmit_eval.
> For example,
> #define net_xmit_ok(ret)    (ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)
>
> Is it ok? But it should be done for net-next.
>
> Best Regards
> Feng

^ permalink raw reply

* Re: Synopsys Ethernet QoS Driver
From: Lars Persson @ 2016-11-21 13:28 UTC (permalink / raw)
  To: Giuseppe CAVALLARO
  Cc: Joao Pinto, Rayagond Kokatanur, Rabin Vincent, mued dib,
	David Miller, Jeff Kirsher, jiri@mellanox.com,
	saeedm@mellanox.com, idosch@mellanox.com, netdev,
	linux-kernel@vger.kernel.org, CARLOS.PALMINHA@synopsys.com,
	Andreas Irestål, alexandre.torgue@st.com,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <87c8a24b-0812-7850-cb3f-7be691bab432@st.com>



> 21 nov. 2016 kl. 13:53 skrev Giuseppe CAVALLARO <peppe.cavallaro@st.com>:
> 
> Hello Joao
> 
>> On 11/21/2016 1:32 PM, Joao Pinto wrote:
>> Hello,
>> 
>>> On 21-11-2016 05:29, Rayagond Kokatanur wrote:
>>>> On Sat, Nov 19, 2016 at 7:26 PM, Rabin Vincent <rabin@rab.in> wrote:
>>>>> On Fri, Nov 18, 2016 at 02:20:27PM +0000, Joao Pinto wrote:
>>>>> For now we are interesting in improving the synopsys QoS driver under
>>>>> /nect/ethernet/synopsys. For now the driver structure consists of a single file
>>>>> called dwc_eth_qos.c, containing synopsys ethernet qos common ops and platform
>>>>> related stuff.
>>>>> 
>>>>> Our strategy would be:
>>>>> 
>>>>> a) Implement a platform glue driver (dwc_eth_qos_pltfm.c)
>>>>> b) Implement a pci glue driver (dwc_eth_qos_pci.c)
>>>>> c) Implement a "core driver" (dwc_eth_qos.c) that would only have Ethernet QoS
>>>>> related stuff to be reused by the platform / pci drivers
>>>>> d) Add a set of features to the "core driver" that we have available internally
>>>> 
>>>> Note that there are actually two drivers in mainline for this hardware:
>>>> 
>>>> drivers/net/ethernet/synopsis/
>>>> drivers/net/ethernet/stmicro/stmmac/
>>> 
>>> Yes the later driver (drivers/net/ethernet/stmicro/stmmac/) supports
>>> both 3.x and 4.x. It has glue layer for pci, platform, core etc,
>>> please refer this driver once before you start.
>>> 
>>> You can start adding missing feature of 4.x in stmmac driver.
>> 
>> Thanks you all for all the info.
>> Well, I think we are in a good position to organize the ethernet drivers
>> concerning Synopsys IPs.
>> 
>> First of all, in my opinion, it does not make sense to have a ethernet/synopsis
>> (typo :)) when ethernet/stmicro is also for a synopsys IP. If we have another
>> vendor using the same IP it should be able to reuse the commonn operations. But
>> I would put that discussion for later :)
>> 
>> For now I suggest that for we create ethernet/qos and create there a folder
>> called dwc (designware controller) where all the synopsys qos IP specific code
>> in order to be reused for example by ethernet/stmicro/stmmac/. We just have to
>> figure out a clean interface for "client drivers" like stmmac to interact with
>> the new qos driver.
>> 
>> What do you think about this approach?
> 
> The stmmac drivers run since many years on several platforms
> (sh4, stm32, arm, x86, mips ...) and it supports an huge of amount of
> configurations starting from 3.1x to 3.7x databooks.
> 
> It also supports QoS hardware; for example, 4.00a, 4.10a and 4.20a
> are fully working.
> 
> Also the stmmac has platform, device-tree and pcie supports and
> a lot of maintained glue-logic files.
> 
> It is fully documented inside the kernel tree.
> 
> I am happy to have new enhancements from other developers.
> So, on my side, if you want to spend your time on improving it on your
> platforms please feel free to do it!
> 
> Concerning the stmicro/stmmac naming, these come from a really old
> story and have no issue to adopt new folder/file names.
> 
> I am also open to merge fixes and changes from ethernet/synopsis.
> I want to point you on some benchmarks made by Alex some months ago
> (IIRC) that showed an stmmac winner (due to the several optimizations
> analyzed and reviewed in this mailing list).
> 
> Peppe
> 

Hello Joao and others,

As the maintainer of dwc_eth_qos.c I prefer also that we put efforts on the most mature driver, the stmmac.

I hope that the code can migrate into an ethernet/synopsys folder to keep the convention of naming the folder after the vendor. This makes it easy for others to find the driver. 

The dwc_eth_qos.c will eventually be removed and its DT binding interface can then be implemented in the stmmac driver.

- Lars

>> 
>> 
>>> 
>>>> 
>>>> (See http://lists.openwall.net/netdev/2016/02/29/127)
>>>> 
>>>> The former only supports 4.x of the hardware.
>>>> 
>>>> The later supports 4.x and 3.x and already has a platform glue driver
>>>> with support for several platforms, a PCI glue driver, and a core driver
>>>> with several features not present in the former (for example: TX/RX
>>>> interrupt coalescing, EEE, PTP).
>>>> 
>>>> Have you evaluated both drivers?  Why have you decided to work on the
>>>> former rather than the latter?
>>> 
>>> 
>> 
>> Thanks.
>> 
>> 
>> 
>> 
> 

^ permalink raw reply

* Re: [PATCH net v2 1/1] net: batman-adv: Treat NET_XMIT_CN as transmit successfully
From: Gao Feng @ 2016-11-21 13:24 UTC (permalink / raw)
  To: Florian Westphal
  Cc: mareklindner, Simon Wunderlich, a, David S. Miller, b.a.t.m.a.n,
	Linux Kernel Network Developers
In-Reply-To: <20161121110915.GB22120@breakpoint.cc>

Hi Florian,

On Mon, Nov 21, 2016 at 7:09 PM, Florian Westphal <fw@strlen.de> wrote:
> fgao@ikuai8.com <fgao@ikuai8.com> wrote:
>> From: Gao Feng <fgao@ikuai8.com>
>>
>> The tc could return NET_XMIT_CN as one congestion notification, but
>> it does not mean the packet is lost. Other modules like ipvlan,
>> macvlan, and others treat NET_XMIT_CN as success too.
>>
>> So batman-adv should add the NET_XMIT_CN check.
>
> "The tc could return NET_XMIT_CN as one congestion notification, but
> it means another packet got dropped. Other modules like batman do not
> treat NET_XMIT_CN as success, so modules like ipvlan, macvlan, ..
> should ignore it as well."
>
> What I am asking is:
> Are you sure adding NET_XMIT_CN handling everywhere is the right way to
> resolve the inconsistency?

Or create one new macro to handle this case like net_xmit_eval.
For example,
#define net_xmit_ok(ret)    (ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)

Is it ok? But it should be done for net-next.

Best Regards
Feng

^ permalink raw reply

* Re: [PATCH v7 2/2] mac80211: multicast to unicast conversion
From: Johannes Berg @ 2016-11-21 13:24 UTC (permalink / raw)
  To: Michael Braun
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	projekt-wlan-3kN+8DYepx7zMJDuovMtMLNAH6kLmebB
In-Reply-To: <1477921260-24556-2-git-send-email-michael-dev-1SGGS//iJ+Y38rf8aCqVIw@public.gmane.org>

On Mon, 2016-10-31 at 14:41 +0100, Michael Braun wrote:
 
> +static int ieee80211_set_multicast_to_unicast(struct wiphy *wiphy,
> +					      struct net_device
> *dev,
> +					      const bool enabled)
> +{
> +	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
> +
> +	/* not supported with P2P_GO for now */
> +	if (sdata->vif.type != NL80211_IFTYPE_AP)
> +		return -EOPNOTSUPP;

This check is completely inefficient, since vif.type is == AP for GO.

Also, I see no reason to restrict it that way, why not remove the
check? There's no difference.

> +static inline int
> +ieee80211_multicast_to_unicast(struct sk_buff *skb, struct
> net_device *dev)

use bool return?

johannes

^ permalink raw reply

* [PATCH next v2 2/2] tcp: make undo_cwnd mandatory for congestion modules
From: Florian Westphal @ 2016-11-21 13:18 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal, Eric Dumazet, Yuchung Cheng, Neal Cardwell
In-Reply-To: <1479734318-30607-1-git-send-email-fw@strlen.de>

The undo_cwnd fallback in the stack doubles cwnd based on ssthresh,
which un-does reno halving behaviour.

It seems more appropriate to let congctl algorithms pair .ssthresh
and .undo_cwnd properly. Add a 'tcp_reno_undo_cwnd' function and wire it
up for all congestion algorithms that used to rely on the fallback.

Cc: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 Changes since v1:
  - only use tcp_reno_undo_cwnd in algorithms that use
  tcp_reno_ssthresh.  The other algorithms that did not
  meet this requirement were changed in patch 1 of this
  series.

 include/net/tcp.h       |  1 +
 net/ipv4/tcp_cong.c     | 14 ++++++++++++--
 net/ipv4/tcp_dctcp.c    |  1 +
 net/ipv4/tcp_hybla.c    |  1 +
 net/ipv4/tcp_input.c    |  5 +----
 net/ipv4/tcp_lp.c       |  1 +
 net/ipv4/tcp_vegas.c    |  1 +
 net/ipv4/tcp_westwood.c |  1 +
 8 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 123979fe12bf..7de80739adab 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -958,6 +958,7 @@ u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
 void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
 
 u32 tcp_reno_ssthresh(struct sock *sk);
+u32 tcp_reno_undo_cwnd(struct sock *sk);
 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
 extern struct tcp_congestion_ops tcp_reno;
 
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index f9038d6b109e..79c4817abc94 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -68,8 +68,9 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
 {
 	int ret = 0;
 
-	/* all algorithms must implement ssthresh and cong_avoid ops */
-	if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) {
+	/* all algorithms must implement these */
+	if (!ca->ssthresh || !ca->undo_cwnd ||
+	    !(ca->cong_avoid || ca->cong_control)) {
 		pr_err("%s does not implement required ops\n", ca->name);
 		return -EINVAL;
 	}
@@ -443,10 +444,19 @@ u32 tcp_reno_ssthresh(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
 
+u32 tcp_reno_undo_cwnd(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+
+	return max(tp->snd_cwnd, tp->snd_ssthresh << 1);
+}
+EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
+
 struct tcp_congestion_ops tcp_reno = {
 	.flags		= TCP_CONG_NON_RESTRICTED,
 	.name		= "reno",
 	.owner		= THIS_MODULE,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_reno_cong_avoid,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 };
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 51139175bf61..bde22ebb92a8 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -342,6 +342,7 @@ static struct tcp_congestion_ops dctcp __read_mostly = {
 static struct tcp_congestion_ops dctcp_reno __read_mostly = {
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_reno_cong_avoid,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.get_info	= dctcp_get_info,
 	.owner		= THIS_MODULE,
 	.name		= "dctcp-reno",
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 083831e359df..0f7175c3338e 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -166,6 +166,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 static struct tcp_congestion_ops tcp_hybla __read_mostly = {
 	.init		= hybla_init,
 	.ssthresh	= tcp_reno_ssthresh,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= hybla_cong_avoid,
 	.set_state	= hybla_state,
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a70046fea0e8..22e6a2097ff6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2394,10 +2394,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
 	if (tp->prior_ssthresh) {
 		const struct inet_connection_sock *icsk = inet_csk(sk);
 
-		if (icsk->icsk_ca_ops->undo_cwnd)
-			tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
-		else
-			tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
+		tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
 
 		if (tp->prior_ssthresh > tp->snd_ssthresh) {
 			tp->snd_ssthresh = tp->prior_ssthresh;
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index c67ece1390c2..046fd3910873 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -316,6 +316,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
 static struct tcp_congestion_ops tcp_lp __read_mostly = {
 	.init = tcp_lp_init,
 	.ssthresh = tcp_reno_ssthresh,
+	.undo_cwnd = tcp_reno_undo_cwnd,
 	.cong_avoid = tcp_lp_cong_avoid,
 	.pkts_acked = tcp_lp_pkts_acked,
 
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 4c4bac1b5eab..218cfcc77650 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -307,6 +307,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
 static struct tcp_congestion_ops tcp_vegas __read_mostly = {
 	.init		= tcp_vegas_init,
 	.ssthresh	= tcp_reno_ssthresh,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= tcp_vegas_cong_avoid,
 	.pkts_acked	= tcp_vegas_pkts_acked,
 	.set_state	= tcp_vegas_state,
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 4b03a2e2a050..fed66dc0e0f5 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -278,6 +278,7 @@ static struct tcp_congestion_ops tcp_westwood __read_mostly = {
 	.init		= tcp_westwood_init,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_reno_cong_avoid,
+	.undo_cwnd      = tcp_reno_undo_cwnd,
 	.cwnd_event	= tcp_westwood_event,
 	.in_ack_event	= tcp_westwood_ack,
 	.get_info	= tcp_westwood_info,
-- 
2.7.3

^ permalink raw reply related

* [PATCH next 1/2] tcp: add cwnd_undo functions to various tcp cc algorithms
From: Florian Westphal @ 2016-11-21 13:18 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal
In-Reply-To: <1479734318-30607-1-git-send-email-fw@strlen.de>

congestion control algorithms that do not halve cwnd in their .ssthresh
should provide a .cwnd_undo rather than rely on current fallback which
assumes reno halving (and thus doubles the cwnd).

All of these do 'something else' in their .ssthresh implementation, thus
store the cwnd on loss and provide .undo_cwnd to restore it again.

A followup patch will remove the fallback and all algorithms will
need to provide a .cwnd_undo function.

Signed-off-by: Florian Westphal <fw@strlen.de>
---

 net/ipv4/tcp_highspeed.c | 11 ++++++++++-
 net/ipv4/tcp_illinois.c  | 10 ++++++++++
 net/ipv4/tcp_scalable.c  | 15 +++++++++++++++
 net/ipv4/tcp_veno.c      | 10 ++++++++++
 net/ipv4/tcp_yeah.c      | 10 ++++++++++
 5 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index db7842495a64..6d9879e93648 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -94,6 +94,7 @@ static const struct hstcp_aimd_val {
 
 struct hstcp {
 	u32	ai;
+	u32	loss_cwnd;
 };
 
 static void hstcp_init(struct sock *sk)
@@ -150,16 +151,24 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 static u32 hstcp_ssthresh(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
-	const struct hstcp *ca = inet_csk_ca(sk);
+	struct hstcp *ca = inet_csk_ca(sk);
 
+	ca->loss_cwnd = tp->snd_cwnd;
 	/* Do multiplicative decrease */
 	return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
 }
 
+static u32 hstcp_cwnd_undo(struct sock *sk)
+{
+	const struct hstcp *ca = inet_csk_ca(sk);
+
+	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
 
 static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
 	.init		= hstcp_init,
 	.ssthresh	= hstcp_ssthresh,
+	.undo_cwnd	= hstcp_cwnd_undo,
 	.cong_avoid	= hstcp_cong_avoid,
 
 	.owner		= THIS_MODULE,
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index c8e6d86be114..60352ff4f5a8 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -48,6 +48,7 @@ struct illinois {
 	u32	end_seq;	/* right edge of current RTT */
 	u32	alpha;		/* Additive increase */
 	u32	beta;		/* Muliplicative decrease */
+	u32	loss_cwnd;	/* cwnd on loss */
 	u16	acked;		/* # packets acked by current ACK */
 	u8	rtt_above;	/* average rtt has gone above threshold */
 	u8	rtt_low;	/* # of rtts measurements below threshold */
@@ -296,10 +297,18 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct illinois *ca = inet_csk_ca(sk);
 
+	ca->loss_cwnd = tp->snd_cwnd;
 	/* Multiplicative decrease */
 	return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
 }
 
+static u32 tcp_illinois_cwnd_undo(struct sock *sk)
+{
+	const struct illinois *ca = inet_csk_ca(sk);
+
+	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
 /* Extract info for Tcp socket info provided via netlink. */
 static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
 				union tcp_cc_info *info)
@@ -327,6 +336,7 @@ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
 static struct tcp_congestion_ops tcp_illinois __read_mostly = {
 	.init		= tcp_illinois_init,
 	.ssthresh	= tcp_illinois_ssthresh,
+	.undo_cwnd	= tcp_illinois_cwnd_undo,
 	.cong_avoid	= tcp_illinois_cong_avoid,
 	.set_state	= tcp_illinois_state,
 	.get_info	= tcp_illinois_info,
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index bf5ea9e9bbc1..f2123075ce6e 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,6 +15,10 @@
 #define TCP_SCALABLE_AI_CNT	50U
 #define TCP_SCALABLE_MD_SCALE	3
 
+struct scalable {
+	u32 loss_cwnd;
+};
+
 static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -32,12 +36,23 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 static u32 tcp_scalable_ssthresh(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
+	struct scalable *ca = inet_csk_ca(sk);
+
+	ca->loss_cwnd = tp->snd_cwnd;
 
 	return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
 }
 
+static u32 tcp_scalable_cwnd_undo(struct sock *sk)
+{
+	const struct scalable *ca = inet_csk_ca(sk);
+
+	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
 static struct tcp_congestion_ops tcp_scalable __read_mostly = {
 	.ssthresh	= tcp_scalable_ssthresh,
+	.undo_cwnd	= tcp_scalable_cwnd_undo,
 	.cong_avoid	= tcp_scalable_cong_avoid,
 
 	.owner		= THIS_MODULE,
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 40171e163cff..76005d4b8dfc 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -30,6 +30,7 @@ struct veno {
 	u32 basertt;		/* the min of all Veno rtt measurements seen (in usec) */
 	u32 inc;		/* decide whether to increase cwnd */
 	u32 diff;		/* calculate the diff rate */
+	u32 loss_cwnd;		/* cwnd when loss occured */
 };
 
 /* There are several situations when we must "re-start" Veno:
@@ -193,6 +194,7 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct veno *veno = inet_csk_ca(sk);
 
+	veno->loss_cwnd = tp->snd_cwnd;
 	if (veno->diff < beta)
 		/* in "non-congestive state", cut cwnd by 1/5 */
 		return max(tp->snd_cwnd * 4 / 5, 2U);
@@ -201,9 +203,17 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
 		return max(tp->snd_cwnd >> 1U, 2U);
 }
 
+static u32 tcp_veno_cwnd_undo(struct sock *sk)
+{
+	const struct veno *veno = inet_csk_ca(sk);
+
+	return max(tcp_sk(sk)->snd_cwnd, veno->loss_cwnd);
+}
+
 static struct tcp_congestion_ops tcp_veno __read_mostly = {
 	.init		= tcp_veno_init,
 	.ssthresh	= tcp_veno_ssthresh,
+	.undo_cwnd	= tcp_veno_cwnd_undo,
 	.cong_avoid	= tcp_veno_cong_avoid,
 	.pkts_acked	= tcp_veno_pkts_acked,
 	.set_state	= tcp_veno_state,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 9c5fc973267f..e6ff99c4bd3b 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -37,6 +37,7 @@ struct yeah {
 	u32 fast_count;
 
 	u32 pkts_acked;
+	u32 loss_cwnd;
 };
 
 static void tcp_yeah_init(struct sock *sk)
@@ -219,13 +220,22 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
 
 	yeah->fast_count = 0;
 	yeah->reno_count = max(yeah->reno_count>>1, 2U);
+	yeah->loss_cwnd = tp->snd_cwnd;
 
 	return max_t(int, tp->snd_cwnd - reduction, 2);
 }
 
+static u32 tcp_yeah_cwnd_undo(struct sock *sk)
+{
+	const struct yeah *yeah = inet_csk_ca(sk);
+
+	return max(tcp_sk(sk)->snd_cwnd, yeah->loss_cwnd);
+}
+
 static struct tcp_congestion_ops tcp_yeah __read_mostly = {
 	.init		= tcp_yeah_init,
 	.ssthresh	= tcp_yeah_ssthresh,
+	.undo_cwnd      = tcp_yeah_cwnd_undo,
 	.cong_avoid	= tcp_yeah_cong_avoid,
 	.set_state	= tcp_vegas_state,
 	.cwnd_event	= tcp_vegas_cwnd_event,
-- 
2.7.3

^ permalink raw reply related

* [PATCH v2 next 0/2] tcp: make undo_cwnd mandatory for congestion modules
From: Florian Westphal @ 2016-11-21 13:18 UTC (permalink / raw)
  To: netdev

highspeed, illinois, scalable, veno and yeah congestion control algorithms
don't provide a 'cwnd_undo' function.  This makes the stack default to a
'reno undo' which doubles cwnd.  However, the ssthresh implementation of
these algorithms do not halve the slowstart threshold. This causes similar
issue as the one fixed for dctcp in ce6dd23329b1e ("dctcp: avoid bogus
doubling of cwnd after loss").

In light of this it seems better to remove the fallback and make undo_cwnd
mandatory.

First patch fixes those spots where reno undo seems incorrect by providing
.cwnd_undo functions, second patch removes the fallback.

^ permalink raw reply

* [PATCH net-next 6/7] net/mlx5: E-Switch, Add control for inline mode
From: Saeed Mahameed @ 2016-11-21 13:06 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Or Gerlitz, Roi Dayan, Saeed Mahameed
In-Reply-To: <1479733561-26601-1-git-send-email-saeedm@mellanox.com>

From: Roi Dayan <roid@mellanox.com>

Implement devlink show and set of HW inline-mode.
The supported modes: none, link, network, transport.
We currently support one mode for all vports so set is done on all vports.
When eswitch is first initialized the inline-mode is queried from the FW.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |   4 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 141 +++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   2 +
 4 files changed, 148 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 9734ac8..d6807c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1798,6 +1798,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	esw->total_vports = total_vports;
 	esw->enabled_vports = 0;
 	esw->mode = SRIOV_NONE;
+	esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
 
 	dev->priv.eswitch = esw;
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 40482e8..cf1aa56 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -200,6 +200,7 @@ struct mlx5_esw_offload {
 	struct mlx5_flow_group *vport_rx_group;
 	struct mlx5_eswitch_rep *vport_reps;
 	DECLARE_HASHTABLE(encap_tbl, 8);
+	u8 inline_mode;
 };
 
 struct mlx5_eswitch {
@@ -309,6 +310,9 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw,
 
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode);
+int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 				     int vport_index,
 				     struct mlx5_eswitch_rep *rep);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 731f286..5c01550 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -657,6 +657,14 @@ static int esw_offloads_start(struct mlx5_eswitch *esw)
 		if (err1)
 			esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err);
 	}
+	if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
+		if (mlx5_eswitch_inline_mode_get(esw,
+						 num_vfs,
+						 &esw->offloads.inline_mode)) {
+			esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
+			esw_warn(esw->dev, "Inline mode is different between vports\n");
+		}
+	}
 	return err;
 }
 
@@ -771,6 +779,50 @@ static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
 	return 0;
 }
 
+static int esw_inline_mode_from_devlink(u8 mode, u8 *mlx5_mode)
+{
+	switch (mode) {
+	case DEVLINK_ESWITCH_INLINE_MODE_NONE:
+		*mlx5_mode = MLX5_INLINE_MODE_NONE;
+		break;
+	case DEVLINK_ESWITCH_INLINE_MODE_LINK:
+		*mlx5_mode = MLX5_INLINE_MODE_L2;
+		break;
+	case DEVLINK_ESWITCH_INLINE_MODE_NETWORK:
+		*mlx5_mode = MLX5_INLINE_MODE_IP;
+		break;
+	case DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT:
+		*mlx5_mode = MLX5_INLINE_MODE_TCP_UDP;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
+{
+	switch (mlx5_mode) {
+	case MLX5_INLINE_MODE_NONE:
+		*mode = DEVLINK_ESWITCH_INLINE_MODE_NONE;
+		break;
+	case MLX5_INLINE_MODE_L2:
+		*mode = DEVLINK_ESWITCH_INLINE_MODE_LINK;
+		break;
+	case MLX5_INLINE_MODE_IP:
+		*mode = DEVLINK_ESWITCH_INLINE_MODE_NETWORK;
+		break;
+	case MLX5_INLINE_MODE_TCP_UDP:
+		*mode = DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 {
 	struct mlx5_core_dev *dev;
@@ -815,6 +867,95 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 	return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
 }
 
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	int num_vports = esw->enabled_vports;
+	int err;
+	int vport;
+	u8 mlx5_mode;
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager))
+		return -EOPNOTSUPP;
+
+	if (esw->mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
+	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+		return -EOPNOTSUPP;
+
+	err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
+	if (err)
+		goto out;
+
+	for (vport = 1; vport < num_vports; vport++) {
+		err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
+		if (err) {
+			esw_warn(dev, "Failed to set min inline on vport %d\n",
+				 vport);
+			goto revert_inline_mode;
+		}
+	}
+
+	esw->offloads.inline_mode = mlx5_mode;
+	return 0;
+
+revert_inline_mode:
+	while (--vport > 0)
+		mlx5_modify_nic_vport_min_inline(dev,
+						 vport,
+						 esw->offloads.inline_mode);
+out:
+	return err;
+}
+
+int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager))
+		return -EOPNOTSUPP;
+
+	if (esw->mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
+	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+		return -EOPNOTSUPP;
+
+	return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
+}
+
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+	int vport;
+	u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager))
+		return -EOPNOTSUPP;
+
+	if (esw->mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
+	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+		return -EOPNOTSUPP;
+
+	for (vport = 1; vport <= nvfs; vport++) {
+		mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
+		if (vport > 1 && prev_mlx5_mode != mlx5_mode)
+			return -EINVAL;
+		prev_mlx5_mode = mlx5_mode;
+	}
+
+	*mode = mlx5_mode;
+	return 0;
+}
+
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 				     int vport_index,
 				     struct mlx5_eswitch_rep *__rep)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f28df33..b440a16 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1239,6 +1239,8 @@ static const struct devlink_ops mlx5_devlink_ops = {
 #ifdef CONFIG_MLX5_CORE_EN
 	.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
 	.eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
+	.eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
+	.eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
 #endif
 };
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 7/7] net/mlx5e: Enforce min inline mode when offloading flows
From: Saeed Mahameed @ 2016-11-21 13:06 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Or Gerlitz, Roi Dayan, Saeed Mahameed
In-Reply-To: <1479733561-26601-1-git-send-email-saeedm@mellanox.com>

From: Roi Dayan <roid@mellanox.com>

A flow should be offloaded only if the matches are
allowed according to min inline mode.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 46 +++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 4b99112..4d06fab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -279,8 +279,10 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 	return 0;
 }
 
-static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
-			    struct tc_cls_flower_offload *f)
+static int __parse_cls_flower(struct mlx5e_priv *priv,
+			      struct mlx5_flow_spec *spec,
+			      struct tc_cls_flower_offload *f,
+			      u8 *min_inline)
 {
 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 				       outer_headers);
@@ -289,6 +291,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
 	u16 addr_type = 0;
 	u8 ip_proto = 0;
 
+	*min_inline = MLX5_INLINE_MODE_L2;
+
 	if (f->dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
@@ -362,6 +366,9 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
 			 mask->ip_proto);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
 			 key->ip_proto);
+
+		if (mask->ip_proto)
+			*min_inline = MLX5_INLINE_MODE_IP;
 	}
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
@@ -432,6 +439,9 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 		       &key->dst, sizeof(key->dst));
+
+		if (mask->src || mask->dst)
+			*min_inline = MLX5_INLINE_MODE_IP;
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
@@ -457,6 +467,10 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 		       &key->dst, sizeof(key->dst));
+
+		if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
+		    ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
+			*min_inline = MLX5_INLINE_MODE_IP;
 	}
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
@@ -497,11 +511,39 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
 				   "Only UDP and TCP transport are supported\n");
 			return -EINVAL;
 		}
+
+		if (mask->src || mask->dst)
+			*min_inline = MLX5_INLINE_MODE_TCP_UDP;
 	}
 
 	return 0;
 }
 
+static int parse_cls_flower(struct mlx5e_priv *priv,
+			    struct mlx5_flow_spec *spec,
+			    struct tc_cls_flower_offload *f)
+{
+	struct mlx5_core_dev *dev = priv->mdev;
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	u8 min_inline;
+	int err;
+
+	err = __parse_cls_flower(priv, spec, f, &min_inline);
+
+	if (!err && esw->mode == SRIOV_OFFLOADS &&
+	    rep->vport != FDB_UPLINK_VPORT) {
+		if (min_inline > esw->offloads.inline_mode) {
+			netdev_warn(priv->netdev,
+				    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
+				    min_inline, esw->offloads.inline_mode);
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return err;
+}
+
 static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 				u32 *action, u32 *flow_tag)
 {
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 2/7] net/mlx5e: Support HW (offloaded) and SW counters for SRIOV switchdev mode
From: Saeed Mahameed @ 2016-11-21 13:05 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Or Gerlitz, Roi Dayan, Saeed Mahameed
In-Reply-To: <1479733561-26601-1-git-send-email-saeedm@mellanox.com>

From: Or Gerlitz <ogerlitz@mellanox.com>

Switchdev driver net-device port statistics should follow the model introduced
in commit a5ea31f57309 'Merge branch net-offloaded-stats'.

For VF reps we return the SRIOV eswitch vport stats as the usual ones and SW stats
if asked. For the PF, if we're in the switchdev mode, we return the uplink stats
and SW stats if asked, otherwise as before. The uplink stats are implemented using
the PPCNT 802_3 counters which are already being read/cached by the driver.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   9 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  31 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 111 +++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |   1 +
 4 files changed, 128 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ac09767..ebf5dbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -874,6 +874,7 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
 void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
 int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
 void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+void mlx5e_update_hw_rep_counters(struct mlx5e_priv *priv);
 
 int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
 void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
@@ -890,12 +891,16 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
 int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
 void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
-struct rtnl_link_stats64 *
-mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
 u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
 void mlx5e_add_vxlan_port(struct net_device *netdev,
 			  struct udp_tunnel_info *ti);
 void mlx5e_del_vxlan_port(struct net_device *netdev,
 			  struct udp_tunnel_info *ti);
 
+int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+			    void *sp);
+bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id);
+
+bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
+bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv);
 #endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bd0732d..480875f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -470,16 +470,6 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq)
 	kfree(rq->mpwqe.info);
 }
 
-static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
-{
-	struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
-
-	if (rep && rep->vport != FDB_UPLINK_VPORT)
-		return true;
-
-	return false;
-}
-
 static int mlx5e_create_rq(struct mlx5e_channel *c,
 			   struct mlx5e_rq_param *param,
 			   struct mlx5e_rq *rq)
@@ -2659,7 +2649,7 @@ static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
 	return mlx5e_setup_tc(dev, tc->tc);
 }
 
-struct rtnl_link_stats64 *
+static struct rtnl_link_stats64 *
 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
@@ -2667,13 +2657,20 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	struct mlx5e_vport_stats *vstats = &priv->stats.vport;
 	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 
-	stats->rx_packets = sstats->rx_packets;
-	stats->rx_bytes   = sstats->rx_bytes;
-	stats->tx_packets = sstats->tx_packets;
-	stats->tx_bytes   = sstats->tx_bytes;
+	if (mlx5e_is_uplink_rep(priv)) {
+		stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
+		stats->rx_bytes   = PPORT_802_3_GET(pstats, a_octets_received_ok);
+		stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
+		stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
+	} else {
+		stats->rx_packets = sstats->rx_packets;
+		stats->rx_bytes   = sstats->rx_bytes;
+		stats->tx_packets = sstats->tx_packets;
+		stats->tx_bytes   = sstats->tx_bytes;
+		stats->tx_dropped = sstats->tx_queue_dropped;
+	}
 
 	stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
-	stats->tx_dropped = sstats->tx_queue_dropped;
 
 	stats->rx_length_errors =
 		PPORT_802_3_GET(pstats, a_in_range_length_errors) +
@@ -3276,6 +3273,8 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller     = mlx5e_netpoll,
 #endif
+	.ndo_has_offload_stats	 = mlx5e_has_offload_stats,
+	.ndo_get_offload_stats	 = mlx5e_get_offload_stats,
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index a84825d..e0d1a56 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -72,7 +72,29 @@ static void mlx5e_rep_get_strings(struct net_device *dev,
 	}
 }
 
-static void mlx5e_update_sw_rep_counters(struct mlx5e_priv *priv)
+static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	struct rtnl_link_stats64 *vport_stats;
+	struct ifla_vf_stats vf_stats;
+	int err;
+
+	err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats);
+	if (err) {
+		pr_warn("vport %d error %d reading stats\n", rep->vport, err);
+		return;
+	}
+
+	vport_stats = &priv->stats.vf_vport;
+	/* flip tx/rx as we are reporting the counters for the switch vport */
+	vport_stats->rx_packets = vf_stats.tx_packets;
+	vport_stats->rx_bytes   = vf_stats.tx_bytes;
+	vport_stats->tx_packets = vf_stats.rx_packets;
+	vport_stats->tx_bytes   = vf_stats.rx_bytes;
+}
+
+static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5e_sw_stats *s = &priv->stats.sw;
 	struct mlx5e_rq_stats *rq_stats;
@@ -95,6 +117,12 @@ static void mlx5e_update_sw_rep_counters(struct mlx5e_priv *priv)
 	}
 }
 
+static void mlx5e_rep_update_stats(struct mlx5e_priv *priv)
+{
+	mlx5e_rep_update_sw_counters(priv);
+	mlx5e_rep_update_hw_counters(priv);
+}
+
 static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
 					struct ethtool_stats *stats, u64 *data)
 {
@@ -106,7 +134,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
 
 	mutex_lock(&priv->state_lock);
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
-		mlx5e_update_sw_rep_counters(priv);
+		mlx5e_rep_update_sw_counters(priv);
 	mutex_unlock(&priv->state_lock);
 
 	for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++)
@@ -245,6 +273,77 @@ static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
 	}
 }
 
+bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	if (rep && rep->vport == FDB_UPLINK_VPORT && esw->mode == SRIOV_OFFLOADS)
+		return true;
+
+	return false;
+}
+
+bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
+
+	if (rep && rep->vport != FDB_UPLINK_VPORT)
+		return true;
+
+	return false;
+}
+
+bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	switch (attr_id) {
+	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+		if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv))
+			return true;
+	}
+
+	return false;
+}
+
+static int
+mlx5e_get_sw_stats64(const struct net_device *dev,
+		     struct rtnl_link_stats64 *stats)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_sw_stats *sstats = &priv->stats.sw;
+
+	stats->rx_packets = sstats->rx_packets;
+	stats->rx_bytes   = sstats->rx_bytes;
+	stats->tx_packets = sstats->tx_packets;
+	stats->tx_bytes   = sstats->tx_bytes;
+
+	stats->tx_dropped = sstats->tx_queue_dropped;
+
+	return 0;
+}
+
+int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+			    void *sp)
+{
+	switch (attr_id) {
+	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+		return mlx5e_get_sw_stats64(dev, sp);
+	}
+
+	return -EINVAL;
+}
+
+static struct rtnl_link_stats64 *
+mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
+	return stats;
+}
+
 static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
 	.switchdev_port_attr_get	= mlx5e_attr_get,
 };
@@ -255,9 +354,9 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
 	.ndo_start_xmit          = mlx5e_xmit,
 	.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
 	.ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
-	.ndo_get_stats64         = mlx5e_get_stats,
-	.ndo_udp_tunnel_add	 = mlx5e_add_vxlan_port,
-	.ndo_udp_tunnel_del	 = mlx5e_del_vxlan_port,
+	.ndo_get_stats64         = mlx5e_rep_get_stats,
+	.ndo_has_offload_stats	 = mlx5e_has_offload_stats,
+	.ndo_get_offload_stats	 = mlx5e_get_offload_stats,
 };
 
 static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev,
@@ -407,7 +506,7 @@ static struct mlx5e_profile mlx5e_rep_profile = {
 	.cleanup_rx		= mlx5e_cleanup_rep_rx,
 	.init_tx		= mlx5e_init_rep_tx,
 	.cleanup_tx		= mlx5e_cleanup_nic_tx,
-	.update_stats           = mlx5e_update_sw_rep_counters,
+	.update_stats           = mlx5e_rep_update_stats,
 	.max_nch		= mlx5e_get_rep_max_num_channels,
 	.max_tc			= 1,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 5da6a1c..f202f87 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -407,6 +407,7 @@ struct mlx5e_stats {
 	struct mlx5e_vport_stats vport;
 	struct mlx5e_pport_stats pport;
 	struct mlx5e_pcie_stats pcie;
+	struct rtnl_link_stats64 vf_vport;
 };
 
 static const struct counter_desc mlx5e_pme_status_desc[] = {
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 0/7] Mellanox 100G mlx5 SRIOV switchdev update
From: Saeed Mahameed @ 2016-11-21 13:05 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Or Gerlitz, Roi Dayan, Saeed Mahameed

Hi Dave,

This series from Roi and Or further enhances the new SRIOV switchdev mode.

Roi's patches deal with allowing users to configure though devlink
the level of inline headers that the VF should be setting in order for
the eswitch HW to do proper matching. We also enforce that the matching
required for offloaded TC rules is aligned with that level on the PF driver.

Or's patches deals with allowing the user to control on the VF operational
link state through admin directives on the mlx5 VF rep link. Also in this series
is implementation of HW and SW counters for the mlx5 VF rep which is aligned
with the design set by commit a5ea31f57309 'Merge branch net-offloaded-stats'.

This series was generated against commit
f81a8a02bb3b ("Merge branch 'mV88e6xxx-interrupt-fixes'")

Thanks,
Saeed.

Or Gerlitz (3):
  net: Add net-device param to the get offloaded stats ndo
  net/mlx5e: Support HW (offloaded) and SW counters for SRIOV switchdev
    mode
  net/mlx5e: Support VF vport link state control for SRIOV switchdev
    mode

Roi Dayan (4):
  devlink: Add E-Switch inline mode control
  net/mlx5: Enable to query min inline for a specific vport
  net/mlx5: E-Switch, Add control for inline mode
  net/mlx5e: Enforce min inline mode when offloading flows

 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  15 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  42 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 144 +++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  46 ++++++-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |   4 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 141 ++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   2 +
 drivers/net/ethernet/mellanox/mlx5/core/vport.c    |  14 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     |   2 +-
 include/linux/mlx5/vport.h                         |  10 +-
 include/linux/netdevice.h                          |   4 +-
 include/net/devlink.h                              |   2 +
 include/uapi/linux/devlink.h                       |   8 ++
 net/core/devlink.c                                 |  70 +++++++---
 net/core/rtnetlink.c                               |   4 +-
 17 files changed, 438 insertions(+), 72 deletions(-)

-- 
2.7.4

^ permalink raw reply

* [PATCH net-next 4/7] devlink: Add E-Switch inline mode control
From: Saeed Mahameed @ 2016-11-21 13:05 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Or Gerlitz, Roi Dayan, Saeed Mahameed
In-Reply-To: <1479733561-26601-1-git-send-email-saeedm@mellanox.com>

From: Roi Dayan <roid@mellanox.com>

Some HWs need the VF driver to put part of the packet headers on the
TX descriptor so the e-switch can do proper matching and steering.

The supported modes: none, link, network, transport.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/net/devlink.h        |  2 ++
 include/uapi/linux/devlink.h |  8 +++++
 net/core/devlink.c           | 70 ++++++++++++++++++++++++++++++++------------
 3 files changed, 61 insertions(+), 19 deletions(-)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 211bd3c..d29e5fc 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -92,6 +92,8 @@ struct devlink_ops {
 
 	int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode);
 	int (*eswitch_mode_set)(struct devlink *devlink, u16 mode);
+	int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode);
+	int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 915bfa7..9014c33 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -102,6 +102,13 @@ enum devlink_eswitch_mode {
 	DEVLINK_ESWITCH_MODE_SWITCHDEV,
 };
 
+enum devlink_eswitch_inline_mode {
+	DEVLINK_ESWITCH_INLINE_MODE_NONE,
+	DEVLINK_ESWITCH_INLINE_MODE_LINK,
+	DEVLINK_ESWITCH_INLINE_MODE_NETWORK,
+	DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT,
+};
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
@@ -133,6 +140,7 @@ enum devlink_attr {
 	DEVLINK_ATTR_SB_OCC_CUR,		/* u32 */
 	DEVLINK_ATTR_SB_OCC_MAX,		/* u32 */
 	DEVLINK_ATTR_ESWITCH_MODE,		/* u16 */
+	DEVLINK_ATTR_ESWITCH_INLINE_MODE,	/* u8 */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index c14f8b6..2b5bf9e 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1394,26 +1394,45 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
 
 static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
 				enum devlink_command cmd, u32 portid,
-				u32 seq, int flags, u16 mode)
+				u32 seq, int flags)
 {
+	const struct devlink_ops *ops = devlink->ops;
 	void *hdr;
+	int err = 0;
+	u16 mode;
+	u8 inline_mode;
 
 	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
 	if (!hdr)
 		return -EMSGSIZE;
 
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
+	err = devlink_nl_put_handle(msg, devlink);
+	if (err)
+		goto out;
 
-	if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode))
-		goto nla_put_failure;
+	err = ops->eswitch_mode_get(devlink, &mode);
+	if (err)
+		goto out;
+	err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
+	if (err)
+		goto out;
+
+	if (ops->eswitch_inline_mode_get) {
+		err = ops->eswitch_inline_mode_get(devlink, &inline_mode);
+		if (err)
+			goto out;
+		err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE,
+				 inline_mode);
+		if (err)
+			goto out;
+	}
 
 	genlmsg_end(msg, hdr);
 	return 0;
 
-nla_put_failure:
+out:
 	genlmsg_cancel(msg, hdr);
-	return -EMSGSIZE;
+	return err;
 }
 
 static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
@@ -1422,22 +1441,17 @@ static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
 	struct devlink *devlink = info->user_ptr[0];
 	const struct devlink_ops *ops = devlink->ops;
 	struct sk_buff *msg;
-	u16 mode;
 	int err;
 
 	if (!ops || !ops->eswitch_mode_get)
 		return -EOPNOTSUPP;
 
-	err = ops->eswitch_mode_get(devlink, &mode);
-	if (err)
-		return err;
-
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
 
 	err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET,
-				   info->snd_portid, info->snd_seq, 0, mode);
+				   info->snd_portid, info->snd_seq, 0);
 
 	if (err) {
 		nlmsg_free(msg);
@@ -1453,15 +1467,32 @@ static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb,
 	struct devlink *devlink = info->user_ptr[0];
 	const struct devlink_ops *ops = devlink->ops;
 	u16 mode;
+	u8 inline_mode;
+	int err = 0;
 
-	if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE])
-		return -EINVAL;
+	if (!ops)
+		return -EOPNOTSUPP;
 
-	mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+	if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
+		if (!ops->eswitch_mode_set)
+			return -EOPNOTSUPP;
+		mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+		err = ops->eswitch_mode_set(devlink, mode);
+		if (err)
+			return err;
+	}
 
-	if (ops && ops->eswitch_mode_set)
-		return ops->eswitch_mode_set(devlink, mode);
-	return -EOPNOTSUPP;
+	if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) {
+		if (!ops->eswitch_inline_mode_set)
+			return -EOPNOTSUPP;
+		inline_mode = nla_get_u8(
+				info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
+		err = ops->eswitch_inline_mode_set(devlink, inline_mode);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
@@ -1478,6 +1509,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
 	[DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
 	[DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
+	[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 5/7] net/mlx5: Enable to query min inline for a specific vport
From: Saeed Mahameed @ 2016-11-21 13:05 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Or Gerlitz, Roi Dayan, Saeed Mahameed
In-Reply-To: <1479733561-26601-1-git-send-email-saeedm@mellanox.com>

From: Roi Dayan <roid@mellanox.com>

Also move the inline capablities enum to a shared header vport.h

Signed-off-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |  6 ------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++------
 drivers/net/ethernet/mellanox/mlx5/core/vport.c   | 14 ++++++++------
 include/linux/mlx5/vport.h                        | 10 ++++++++--
 4 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ebf5dbc..a2b32ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -150,12 +150,6 @@ static inline int mlx5_max_log_rq_size(int wq_type)
 	}
 }
 
-enum {
-	MLX5E_INLINE_MODE_L2,
-	MLX5E_INLINE_MODE_VPORT_CONTEXT,
-	MLX5_INLINE_MODE_NOT_REQUIRED,
-};
-
 struct mlx5e_tx_wqe {
 	struct mlx5_wqe_ctrl_seg ctrl;
 	struct mlx5_wqe_eth_seg  eth;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 480875f..d25bc0f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -952,7 +952,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
 	sq->max_inline  = param->max_inline;
 	sq->min_inline_mode =
-		MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5E_INLINE_MODE_VPORT_CONTEXT ?
+		MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT ?
 		param->min_inline_mode : 0;
 
 	err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu));
@@ -3403,14 +3403,13 @@ static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev,
 				   u8 *min_inline_mode)
 {
 	switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) {
-	case MLX5E_INLINE_MODE_L2:
+	case MLX5_CAP_INLINE_MODE_L2:
 		*min_inline_mode = MLX5_INLINE_MODE_L2;
 		break;
-	case MLX5E_INLINE_MODE_VPORT_CONTEXT:
-		mlx5_query_nic_vport_min_inline(mdev,
-						min_inline_mode);
+	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+		mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode);
 		break;
-	case MLX5_INLINE_MODE_NOT_REQUIRED:
+	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
 		*min_inline_mode = MLX5_INLINE_MODE_NONE;
 		break;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 525f17a..269e440 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -113,15 +113,17 @@ static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in,
 	return mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
 }
 
-void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
-				     u8 *min_inline_mode)
+int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+				    u16 vport, u8 *min_inline)
 {
 	u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0};
+	int err;
 
-	mlx5_query_nic_vport_context(mdev, 0, out, sizeof(out));
-
-	*min_inline_mode = MLX5_GET(query_nic_vport_context_out, out,
-				    nic_vport_context.min_wqe_inline_mode);
+	err = mlx5_query_nic_vport_context(mdev, vport, out, sizeof(out));
+	if (!err)
+		*min_inline = MLX5_GET(query_nic_vport_context_out, out,
+				       nic_vport_context.min_wqe_inline_mode);
+	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline);
 
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 451b0bd..ec35157 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -36,6 +36,12 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/device.h>
 
+enum {
+	MLX5_CAP_INLINE_MODE_L2,
+	MLX5_CAP_INLINE_MODE_VPORT_CONTEXT,
+	MLX5_CAP_INLINE_MODE_NOT_REQUIRED,
+};
+
 u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
 u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				u16 vport);
@@ -43,8 +49,8 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				  u16 vport, u8 state);
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 				     u16 vport, u8 *addr);
-void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
-				     u8 *min_inline);
+int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+				    u16 vport, u8 *min_inline);
 int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
 				     u16 vport, u8 min_inline);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 3/7] net/mlx5e: Support VF vport link state control for SRIOV switchdev mode
From: Saeed Mahameed @ 2016-11-21 13:05 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Or Gerlitz, Roi Dayan, Saeed Mahameed
In-Reply-To: <1479733561-26601-1-git-send-email-saeedm@mellanox.com>

From: Or Gerlitz <ogerlitz@mellanox.com>

Reflect the administative link changes done on the VF representor to the
VF e-switch vport. This means that doing ip link set down/up commands on
the VF rep will modify the e-switch vport state which in turn will make
proper VF drivers to set their carrier accordingly.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 33 ++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index e0d1a56..5e33f6b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -236,6 +236,35 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
 	mlx5e_tc_init(priv);
 }
 
+static int mlx5e_rep_open(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	int err;
+
+	err = mlx5e_open(dev);
+	if (err)
+		return err;
+
+	err = mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_UP);
+	if (!err)
+		netif_carrier_on(dev);
+
+	return 0;
+}
+
+static int mlx5e_rep_close(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	(void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
+
+	return mlx5e_close(dev);
+}
+
 static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
 					char *buf, size_t len)
 {
@@ -349,8 +378,8 @@ static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
 };
 
 static const struct net_device_ops mlx5e_netdev_ops_rep = {
-	.ndo_open                = mlx5e_open,
-	.ndo_stop                = mlx5e_close,
+	.ndo_open                = mlx5e_rep_open,
+	.ndo_stop                = mlx5e_rep_close,
 	.ndo_start_xmit          = mlx5e_xmit,
 	.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
 	.ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 1/7] net: Add net-device param to the get offloaded stats ndo
From: Saeed Mahameed @ 2016-11-21 13:05 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Or Gerlitz, Roi Dayan, Saeed Mahameed
In-Reply-To: <1479733561-26601-1-git-send-email-saeedm@mellanox.com>

From: Or Gerlitz <ogerlitz@mellanox.com>

Some drivers would need to check few internal matters for
that. To be used in downstream mlx5 commit.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +-
 include/linux/netdevice.h                      | 4 ++--
 net/core/rtnetlink.c                           | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 4a1f9d5..be32f47 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -857,7 +857,7 @@ mlxsw_sp_port_get_sw_stats64(const struct net_device *dev,
 	return 0;
 }
 
-static bool mlxsw_sp_port_has_offload_stats(int attr_id)
+static bool mlxsw_sp_port_has_offload_stats(struct net_device *dev, int attr_id)
 {
 	switch (attr_id) {
 	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e84800e..ae32a27 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -925,7 +925,7 @@ struct netdev_xdp {
  *	3. Update dev->stats asynchronously and atomically, and define
  *	   neither operation.
  *
- * bool (*ndo_has_offload_stats)(int attr_id)
+ * bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id)
  *	Return true if this device supports offload stats of this attr_id.
  *
  * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev,
@@ -1165,7 +1165,7 @@ struct net_device_ops {
 
 	struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
 						     struct rtnl_link_stats64 *storage);
-	bool			(*ndo_has_offload_stats)(int attr_id);
+	bool			(*ndo_has_offload_stats)(const struct net_device *dev, int attr_id);
 	int			(*ndo_get_offload_stats)(int attr_id,
 							 const struct net_device *dev,
 							 void *attr_data);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index db313ec..f5a8d8a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3665,7 +3665,7 @@ static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev,
 		if (!size)
 			continue;
 
-		if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+		if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
 			continue;
 
 		attr = nla_reserve_64bit(skb, attr_id, size,
@@ -3706,7 +3706,7 @@ static int rtnl_get_offload_stats_size(const struct net_device *dev)
 
 	for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
 	     attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
-		if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+		if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
 			continue;
 		size = rtnl_get_offload_stats_attr_size(attr_id);
 		nla_size += nla_total_size_64bit(size);
-- 
2.7.4

^ permalink raw reply related

* Re: ip -s macsec show: Statistics for OutOctets... and OutPkts... switched
From: Rami Rosen @ 2016-11-21 13:02 UTC (permalink / raw)
  To: Daniel.Hopf; +Cc: Netdev
In-Reply-To: <OF4B44CECE.BF9C7A12-ONC1258072.0037C56C-C1258072.003F5731@continental-corporation.com>

+1
Rami Rosen

^ permalink raw reply

* Re: Synopsys Ethernet QoS Driver
From: Giuseppe CAVALLARO @ 2016-11-21 12:52 UTC (permalink / raw)
  To: Joao Pinto, Rayagond Kokatanur, Rabin Vincent
  Cc: andreas.irestal, Alexandre TORGUE, saeedm, netdev, linux-kernel,
	CARLOS.PALMINHA, idosch, mued dib, jiri, Jeff Kirsher,
	David Miller, linux-arm-kernel@lists.infradead.org, lars.persson
In-Reply-To: <1248f4ce-4859-10e6-fef9-342ea543f8d4@synopsys.com>

Hello Joao

On 11/21/2016 1:32 PM, Joao Pinto wrote:
> Hello,
>
> On 21-11-2016 05:29, Rayagond Kokatanur wrote:
>> On Sat, Nov 19, 2016 at 7:26 PM, Rabin Vincent <rabin@rab.in> wrote:
>>> On Fri, Nov 18, 2016 at 02:20:27PM +0000, Joao Pinto wrote:
>>>> For now we are interesting in improving the synopsys QoS driver under
>>>> /nect/ethernet/synopsys. For now the driver structure consists of a single file
>>>> called dwc_eth_qos.c, containing synopsys ethernet qos common ops and platform
>>>> related stuff.
>>>>
>>>> Our strategy would be:
>>>>
>>>> a) Implement a platform glue driver (dwc_eth_qos_pltfm.c)
>>>> b) Implement a pci glue driver (dwc_eth_qos_pci.c)
>>>> c) Implement a "core driver" (dwc_eth_qos.c) that would only have Ethernet QoS
>>>> related stuff to be reused by the platform / pci drivers
>>>> d) Add a set of features to the "core driver" that we have available internally
>>>
>>> Note that there are actually two drivers in mainline for this hardware:
>>>
>>>  drivers/net/ethernet/synopsis/
>>>  drivers/net/ethernet/stmicro/stmmac/
>>
>> Yes the later driver (drivers/net/ethernet/stmicro/stmmac/) supports
>> both 3.x and 4.x. It has glue layer for pci, platform, core etc,
>> please refer this driver once before you start.
>>
>> You can start adding missing feature of 4.x in stmmac driver.
>
> Thanks you all for all the info.
> Well, I think we are in a good position to organize the ethernet drivers
> concerning Synopsys IPs.
>
> First of all, in my opinion, it does not make sense to have a ethernet/synopsis
> (typo :)) when ethernet/stmicro is also for a synopsys IP. If we have another
> vendor using the same IP it should be able to reuse the commonn operations. But
> I would put that discussion for later :)
>
> For now I suggest that for we create ethernet/qos and create there a folder
> called dwc (designware controller) where all the synopsys qos IP specific code
> in order to be reused for example by ethernet/stmicro/stmmac/. We just have to
> figure out a clean interface for "client drivers" like stmmac to interact with
> the new qos driver.
>
> What do you think about this approach?

The stmmac drivers run since many years on several platforms
(sh4, stm32, arm, x86, mips ...) and it supports an huge of amount of
configurations starting from 3.1x to 3.7x databooks.

It also supports QoS hardware; for example, 4.00a, 4.10a and 4.20a
are fully working.

Also the stmmac has platform, device-tree and pcie supports and
a lot of maintained glue-logic files.

It is fully documented inside the kernel tree.

I am happy to have new enhancements from other developers.
So, on my side, if you want to spend your time on improving it on your
platforms please feel free to do it!

Concerning the stmicro/stmmac naming, these come from a really old
story and have no issue to adopt new folder/file names.

I am also open to merge fixes and changes from ethernet/synopsis.
I want to point you on some benchmarks made by Alex some months ago
(IIRC) that showed an stmmac winner (due to the several optimizations
analyzed and reviewed in this mailing list).

Peppe

>
>
>>
>>>
>>> (See http://lists.openwall.net/netdev/2016/02/29/127)
>>>
>>> The former only supports 4.x of the hardware.
>>>
>>> The later supports 4.x and 3.x and already has a platform glue driver
>>> with support for several platforms, a PCI glue driver, and a core driver
>>> with several features not present in the former (for example: TX/RX
>>> interrupt coalescing, EEE, PTP).
>>>
>>> Have you evaluated both drivers?  Why have you decided to work on the
>>> former rather than the latter?
>>
>>
>
> Thanks.
>
>
>
>

^ permalink raw reply

* Re: [PATCH v7 1/2] nl80211: multicast_to_unicast can be changed while IFF_UP
From: Johannes Berg @ 2016-11-21 12:50 UTC (permalink / raw)
  To: Michael Braun
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	projekt-wlan-3kN+8DYepx7zMJDuovMtMLNAH6kLmebB
In-Reply-To: <1477921260-24556-1-git-send-email-michael-dev-1SGGS//iJ+Y38rf8aCqVIw@public.gmane.org>

On Mon, 2016-10-31 at 14:40 +0100, Michael Braun wrote:
> There is no need to prevent toggling multicast_to_unicast while
> interface is already up. This change simplifies reconfiguration
> from hostapd.

Applied. This check never should've been there anyway, if desired,
NEED_NETDEV_UP should've been used.

johannes

^ permalink raw reply

* Re: Synopsys Ethernet QoS Driver
From: Joao Pinto @ 2016-11-21 12:32 UTC (permalink / raw)
  To: Rayagond Kokatanur, Rabin Vincent, Giuseppe Cavallaro
  Cc: Joao Pinto, mued dib, David Miller, Jeff Kirsher, jiri, saeedm,
	idosch, netdev, linux-kernel, CARLOS.PALMINHA, andreas.irestal,
	alexandre.torgue, lars.persson
In-Reply-To: <CAJ3bTp7PpUJcz=1JohX=A54UP6vVAafWnB0PGUBSxb5JJiO8Lg@mail.gmail.com>

Hello,

On 21-11-2016 05:29, Rayagond Kokatanur wrote:
> On Sat, Nov 19, 2016 at 7:26 PM, Rabin Vincent <rabin@rab.in> wrote:
>> On Fri, Nov 18, 2016 at 02:20:27PM +0000, Joao Pinto wrote:
>>> For now we are interesting in improving the synopsys QoS driver under
>>> /nect/ethernet/synopsys. For now the driver structure consists of a single file
>>> called dwc_eth_qos.c, containing synopsys ethernet qos common ops and platform
>>> related stuff.
>>>
>>> Our strategy would be:
>>>
>>> a) Implement a platform glue driver (dwc_eth_qos_pltfm.c)
>>> b) Implement a pci glue driver (dwc_eth_qos_pci.c)
>>> c) Implement a "core driver" (dwc_eth_qos.c) that would only have Ethernet QoS
>>> related stuff to be reused by the platform / pci drivers
>>> d) Add a set of features to the "core driver" that we have available internally
>>
>> Note that there are actually two drivers in mainline for this hardware:
>>
>>  drivers/net/ethernet/synopsis/
>>  drivers/net/ethernet/stmicro/stmmac/
> 
> Yes the later driver (drivers/net/ethernet/stmicro/stmmac/) supports
> both 3.x and 4.x. It has glue layer for pci, platform, core etc,
> please refer this driver once before you start.
> 
> You can start adding missing feature of 4.x in stmmac driver.

Thanks you all for all the info.
Well, I think we are in a good position to organize the ethernet drivers
concerning Synopsys IPs.

First of all, in my opinion, it does not make sense to have a ethernet/synopsis
(typo :)) when ethernet/stmicro is also for a synopsys IP. If we have another
vendor using the same IP it should be able to reuse the commonn operations. But
I would put that discussion for later :)

For now I suggest that for we create ethernet/qos and create there a folder
called dwc (designware controller) where all the synopsys qos IP specific code
in order to be reused for example by ethernet/stmicro/stmmac/. We just have to
figure out a clean interface for "client drivers" like stmmac to interact with
the new qos driver.

What do you think about this approach?


> 
>>
>> (See http://lists.openwall.net/netdev/2016/02/29/127)
>>
>> The former only supports 4.x of the hardware.
>>
>> The later supports 4.x and 3.x and already has a platform glue driver
>> with support for several platforms, a PCI glue driver, and a core driver
>> with several features not present in the former (for example: TX/RX
>> interrupt coalescing, EEE, PTP).
>>
>> Have you evaluated both drivers?  Why have you decided to work on the
>> former rather than the latter?
> 
> 

Thanks.

^ permalink raw reply

* Re: [PATCH v9 0/8] thunderbolt: Introducing Thunderbolt(TM) Networking
From: gregkh @ 2016-11-21 12:22 UTC (permalink / raw)
  To: Levy, Amir (Jer)
  Cc: Simon Guinot, andreas.noever@gmail.com, bhelgaas@google.com,
	corbet@lwn.net, linux-kernel@vger.kernel.org,
	linux-pci@vger.kernel.org, netdev@vger.kernel.org,
	linux-doc@vger.kernel.org, mario_limonciello@dell.com,
	thunderbolt-linux, Westerberg, Mika, Winkler, Tomas,
	Zhang, Xiong Y, Jamet, Michael, remi.rerolle@seagate.com
In-Reply-To: <E607265CB020454880711A6F96C05A03BE2214E2@hasmsx107.ger.corp.intel.com>

On Sun, Nov 20, 2016 at 06:30:19AM +0000, Levy, Amir (Jer) wrote:
> On Fri, Nov 18 2016, 12:07 PM, gregkh@linuxfoundation.org wrote:
> > On Fri, Nov 18, 2016 at 08:48:36AM +0000, Levy, Amir (Jer) wrote:
> > > > BTW, it is quite a shame that the Thunderbolt firmware version can't
> > > > be read from Linux.
> > > >
> > >
> > > This is WIP, once this patch will be upstream, we will be able to
> > > focus more on aligning Linux with the Thunderbolt features that we have
> > for windows.
> > 
> > Why is this patch somehow holding that work back?  You aren't just sitting
> > around waiting for people to review this and not doing anything else, right?
> > Is there some basic building block in these patches that your firmware
> > download code is going to rely on?
> > 
> > confused,
> > 
> > greg k-h
> 
> All the Thunderbolt SW features (including networking and FW update) depend 
> on the communication with FW, which is patch 3/8 in the series.
> The patch also sets up a generic netlink for user space communication.

It's that "generic netlink" connection that I really want a whole lot of
revewers to read over as it's very unusual and "different" from all
other driver subsystems.

thanks,

greg k-h

^ permalink raw reply

* [PATCH net-next 2/2] bridge: mcast: add MLDv2 querier support
From: Nikolay Aleksandrov @ 2016-11-21 12:03 UTC (permalink / raw)
  To: netdev; +Cc: roopa, sashok, stephen, davem, liuhangbin, Nikolay Aleksandrov
In-Reply-To: <1479729805-23108-1-git-send-email-nikolay@cumulusnetworks.com>

This patch adds basic support for MLDv2 queries, the default is MLDv1
as before. A new multicast option - multicast_mld_version, adds the
ability to change it between 1 and 2 via netlink and sysfs.
The MLD option is disabled if CONFIG_IPV6 is disabled.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
---
 include/uapi/linux/if_link.h |  1 +
 net/bridge/br_multicast.c    | 89 +++++++++++++++++++++++++++++++++-----------
 net/bridge/br_netlink.c      | 19 +++++++++-
 net/bridge/br_private.h      |  4 ++
 net/bridge/br_sysfs_br.c     | 22 +++++++++++
 5 files changed, 113 insertions(+), 22 deletions(-)

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 325d2601150d..92b2d4928bf1 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -276,6 +276,7 @@ enum {
 	IFLA_BR_VLAN_STATS_ENABLED,
 	IFLA_BR_MCAST_STATS_ENABLED,
 	IFLA_BR_MCAST_IGMP_VERSION,
+	IFLA_BR_MCAST_MLD_VERSION,
 	__IFLA_BR_MAX,
 };
 
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 66192c11aa45..b30e77e8427c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -459,15 +459,20 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 						    const struct in6_addr *grp,
 						    u8 *igmp_type)
 {
-	struct sk_buff *skb;
+	struct mld2_query *mld2q;
+	unsigned long interval;
 	struct ipv6hdr *ip6h;
 	struct mld_msg *mldq;
+	size_t mld_hdr_size;
+	struct sk_buff *skb;
 	struct ethhdr *eth;
 	u8 *hopopt;
-	unsigned long interval;
 
+	mld_hdr_size = sizeof(*mldq);
+	if (br->multicast_mld_version == 2)
+		mld_hdr_size = sizeof(*mld2q);
 	skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) +
-						 8 + sizeof(*mldq));
+						 8 + mld_hdr_size);
 	if (!skb)
 		goto out;
 
@@ -486,7 +491,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	ip6h = ipv6_hdr(skb);
 
 	*(__force __be32 *)ip6h = htonl(0x60000000);
-	ip6h->payload_len = htons(8 + sizeof(*mldq));
+	ip6h->payload_len = htons(8 + mld_hdr_size);
 	ip6h->nexthdr = IPPROTO_HOPOPTS;
 	ip6h->hop_limit = 1;
 	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
@@ -514,26 +519,47 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 
 	/* ICMPv6 */
 	skb_set_transport_header(skb, skb->len);
-	mldq = (struct mld_msg *) icmp6_hdr(skb);
-
 	interval = ipv6_addr_any(grp) ?
 			br->multicast_query_response_interval :
 			br->multicast_last_member_interval;
-
 	*igmp_type = ICMPV6_MGM_QUERY;
-	mldq->mld_type = ICMPV6_MGM_QUERY;
-	mldq->mld_code = 0;
-	mldq->mld_cksum = 0;
-	mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
-	mldq->mld_reserved = 0;
-	mldq->mld_mca = *grp;
-
-	/* checksum */
-	mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
-					  sizeof(*mldq), IPPROTO_ICMPV6,
-					  csum_partial(mldq,
-						       sizeof(*mldq), 0));
-	skb_put(skb, sizeof(*mldq));
+	switch (br->multicast_mld_version) {
+	case 1:
+		mldq = (struct mld_msg *)icmp6_hdr(skb);
+		mldq->mld_type = ICMPV6_MGM_QUERY;
+		mldq->mld_code = 0;
+		mldq->mld_cksum = 0;
+		mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
+		mldq->mld_reserved = 0;
+		mldq->mld_mca = *grp;
+		mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+						  sizeof(*mldq), IPPROTO_ICMPV6,
+						  csum_partial(mldq,
+							       sizeof(*mldq),
+							       0));
+		break;
+	case 2:
+		mld2q = (struct mld2_query *)icmp6_hdr(skb);
+		mld2q->mld2q_mrc = ntohs((u16)jiffies_to_msecs(interval));
+		mld2q->mld2q_type = ICMPV6_MGM_QUERY;
+		mld2q->mld2q_code = 0;
+		mld2q->mld2q_cksum = 0;
+		mld2q->mld2q_resv1 = 0;
+		mld2q->mld2q_resv2 = 0;
+		mld2q->mld2q_suppress = 0;
+		mld2q->mld2q_qrv = 2;
+		mld2q->mld2q_nsrcs = 0;
+		mld2q->mld2q_qqic = br->multicast_query_interval / HZ;
+		mld2q->mld2q_mca = *grp;
+		mld2q->mld2q_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+						     sizeof(*mld2q),
+						     IPPROTO_ICMPV6,
+						     csum_partial(mld2q,
+								  sizeof(*mld2q),
+								  0));
+		break;
+	}
+	skb_put(skb, mld_hdr_size);
 
 	__skb_pull(skb, sizeof(*eth));
 
@@ -1842,7 +1868,6 @@ void br_multicast_init(struct net_bridge *br)
 	br->hash_elasticity = 4;
 	br->hash_max = 512;
 
-	br->multicast_igmp_version = 2;
 	br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
 	br->multicast_querier = 0;
 	br->multicast_query_use_ifaddr = 0;
@@ -1858,7 +1883,9 @@ void br_multicast_init(struct net_bridge *br)
 
 	br->ip4_other_query.delay_time = 0;
 	br->ip4_querier.port = NULL;
+	br->multicast_igmp_version = 2;
 #if IS_ENABLED(CONFIG_IPV6)
+	br->multicast_mld_version = 1;
 	br->ip6_other_query.delay_time = 0;
 	br->ip6_querier.port = NULL;
 #endif
@@ -2177,6 +2204,26 @@ int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val)
+{
+	/* Currently we support version 1 and 2 */
+	switch (val) {
+	case 1:
+	case 2:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	spin_lock_bh(&br->multicast_lock);
+	br->multicast_mld_version = val;
+	spin_unlock_bh(&br->multicast_lock);
+
+	return 0;
+}
+#endif
+
 /**
  * br_multicast_list_adjacent - Returns snooped multicast addresses
  * @dev:	The bridge port adjacent to which to retrieve addresses
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 10b9b80f778f..71c7453268c1 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -859,6 +859,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
 	[IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 },
 	[IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 },
 	[IFLA_BR_MCAST_IGMP_VERSION] = { .type = NLA_U8 },
+	[IFLA_BR_MCAST_MLD_VERSION] = { .type = NLA_U8 },
 };
 
 static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -1079,6 +1080,17 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
 		if (err)
 			return err;
 	}
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (data[IFLA_BR_MCAST_MLD_VERSION]) {
+		__u8 mld_version;
+
+		mld_version = nla_get_u8(data[IFLA_BR_MCAST_MLD_VERSION]);
+		err = br_multicast_set_mld_version(br, mld_version);
+		if (err)
+			return err;
+	}
+#endif
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	if (data[IFLA_BR_NF_CALL_IPTABLES]) {
@@ -1146,6 +1158,7 @@ static size_t br_get_size(const struct net_device *brdev)
 	       nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */
 	       nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */
 	       nla_total_size(sizeof(u8)) +	/* IFLA_BR_MCAST_IGMP_VERSION */
+	       nla_total_size(sizeof(u8)) +	/* IFLA_BR_MCAST_MLD_VERSION */
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_NF_CALL_IPTABLES */
@@ -1225,7 +1238,11 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 	    nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION,
 		       br->multicast_igmp_version))
 		return -EMSGSIZE;
-
+#if IS_ENABLED(CONFIG_IPV6)
+	if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION,
+		       br->multicast_mld_version))
+		return -EMSGSIZE;
+#endif
 	clockval = jiffies_to_clock_t(br->multicast_last_member_interval);
 	if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval,
 			      IFLA_BR_PAD))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3d207d92d899..26aec2366bc3 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -355,6 +355,7 @@ struct net_bridge
 	struct bridge_mcast_other_query	ip6_other_query;
 	struct bridge_mcast_own_query	ip6_own_query;
 	struct bridge_mcast_querier	ip6_querier;
+	u8				multicast_mld_version;
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 #endif
 
@@ -585,6 +586,9 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val);
 int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
 int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
 int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val);
+#if IS_ENABLED(CONFIG_IPV6)
+int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val);
+#endif
 struct net_bridge_mdb_entry *
 br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, struct br_ip *dst);
 struct net_bridge_mdb_entry *
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index f00d1690658c..c9d2e0abfb89 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -659,6 +659,25 @@ static ssize_t multicast_stats_enabled_store(struct device *d,
 	return store_bridge_parm(d, buf, len, set_stats_enabled);
 }
 static DEVICE_ATTR_RW(multicast_stats_enabled);
+
+#if IS_ENABLED(CONFIG_IPV6)
+static ssize_t multicast_mld_version_show(struct device *d,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+
+	return sprintf(buf, "%u\n", br->multicast_mld_version);
+}
+
+static ssize_t multicast_mld_version_store(struct device *d,
+					   struct device_attribute *attr,
+					   const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, br_multicast_set_mld_version);
+}
+static DEVICE_ATTR_RW(multicast_mld_version);
+#endif
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static ssize_t nf_call_iptables_show(
@@ -827,6 +846,9 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_multicast_startup_query_interval.attr,
 	&dev_attr_multicast_stats_enabled.attr,
 	&dev_attr_multicast_igmp_version.attr,
+#if IS_ENABLED(CONFIG_IPV6)
+	&dev_attr_multicast_mld_version.attr,
+#endif
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	&dev_attr_nf_call_iptables.attr,
-- 
2.1.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox