Netdev List
 help / color / mirror / Atom feed
* [PATCH v4] net: dev_weight: TX/RX orthogonality
From: Matthias Tafelmeier @ 2016-12-29 19:23 UTC (permalink / raw)
  To: netdev; +Cc: hagen, fw, edumazet, daniel
In-Reply-To: <20161229.140854.1456743873101323068.davem@davemloft.net>

Oftenly, introducing side effects on packet processing on the other half
of the stack by adjusting one of TX/RX via sysctl is not desirable.
There are cases of demand for asymmetric, orthogonal configurability.

This holds true especially for nodes where RPS for RFS usage on top is
configured and therefore use the 'old dev_weight'. This is quite a
common base configuration setup nowadays, even with NICs of superior processing
support (e.g. aRFS).

A good example use case are nodes acting as noSQL data bases with a
large number of tiny requests and rather fewer but large packets as responses.
It's affordable to have large budget and rx dev_weights for the
requests. But as a side effect having this large a number on TX
processed in one run can overwhelm drivers.

This patch therefore introduces an independent configurability via sysctl to
userland.

Signed-off-by: Matthias Tafelmeier <matthias.tafelmeier@gmx.net>
---
 Documentation/sysctl/net.txt | 21 +++++++++++++++++++++
 include/linux/netdevice.h    |  4 ++++
 net/core/dev.c               |  6 +++++-
 net/core/sysctl_net_core.c   | 31 ++++++++++++++++++++++++++++++-
 net/sched/sch_generic.c      |  2 +-
 5 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index f0480f7..53cef32 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -61,6 +61,27 @@ The maximum number of packets that kernel can handle on a NAPI interrupt,
 it's a Per-CPU variable.
 Default: 64
 
+dev_weight_rx_bias
+--------------
+
+RPS (e.g. RFS, aRFS) processing is competing with the registered NAPI poll function
+of the driver for the per softirq cycle netdev_budget. This parameter influences
+the proportion of the configured netdev_budget that is spent on RPS based packet
+processing during RX softirq cycles. It is further meant for making current
+dev_weight adaptable for asymmetric CPU needs on RX/TX side of the network stack.
+(see dev_weight_tx_bias) It is effective on a per CPU basis. Determination is based
+on dev_weight and is calculated multiplicative (dev_weight * dev_weight_rx_bias).
+Default: 1
+
+dev_weight_tx_bias
+--------------
+
+Scales the maximum number of packets that can be processed during a TX softirq cycle.
+Effective on a per CPU basis. Allows scaling of current dev_weight for asymmetric
+net stack processing needs. Be careful to avoid making TX softirq processing a CPU hog.
+Calculation is based on dev_weight (dev_weight * dev_weight_tx_bias).
+Default: 1
+
 default_qdisc
 --------------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 994f742..ecd78b3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3795,6 +3795,10 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
+extern int		dev_weight_rx_bias;
+extern int		dev_weight_tx_bias;
+extern int		dev_rx_weight;
+extern int		dev_tx_weight;
 
 bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
 struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 8db5a0b..f2fe98b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3428,6 +3428,10 @@ EXPORT_SYMBOL(netdev_max_backlog);
 int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
+int dev_weight_rx_bias __read_mostly = 1;            /* bias for backlog weight */
+int dev_weight_tx_bias __read_mostly = 1;            /* bias for output_queue quota */
+int dev_rx_weight __read_mostly = weight_p;
+int dev_tx_weight __read_mostly = weight_p;
 
 /* Called with irq disabled */
 static inline void ____napi_schedule(struct softnet_data *sd,
@@ -4833,7 +4837,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		net_rps_action_and_irq_enable(sd);
 	}
 
-	napi->weight = weight_p;
+	napi->weight = dev_rx_weight;
 	while (again) {
 		struct sk_buff *skb;
 
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 2a46e40..698ddd7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -222,6 +222,21 @@ static int set_default_qdisc(struct ctl_table *table, int write,
 }
 #endif
 
+static int proc_do_dev_weight(struct ctl_table *table, int write,
+			   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (ret != 0)
+		return ret;
+
+	dev_rx_weight = weight_p * dev_weight_rx_bias;
+	dev_tx_weight = weight_p * dev_weight_tx_bias;
+
+	return ret;
+}
+
 static int proc_do_rss_key(struct ctl_table *table, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -273,7 +288,21 @@ static struct ctl_table net_core_table[] = {
 		.data		= &weight_p,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_do_dev_weight,
+	},
+	{
+		.procname	= "dev_weight_rx_bias",
+		.data		= &dev_weight_rx_bias,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_dev_weight,
+	},
+	{
+		.procname	= "dev_weight_tx_bias",
+		.data		= &dev_weight_tx_bias,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_dev_weight,
 	},
 	{
 		.procname	= "netdev_max_backlog",
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6eb9c8e..b052b27 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -247,7 +247,7 @@ static inline int qdisc_restart(struct Qdisc *q, int *packets)
 
 void __qdisc_run(struct Qdisc *q)
 {
-	int quota = weight_p;
+	int quota = dev_tx_weight;
 	int packets;
 
 	while (qdisc_restart(q, &packets)) {
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH net-next 00/14] bnxt_en: updates for net-next.
From: David Miller @ 2016-12-29 19:42 UTC (permalink / raw)
  To: michael.chan; +Cc: netdev
In-Reply-To: <1483031624-20076-1-git-send-email-michael.chan@broadcom.com>

From: Michael Chan <michael.chan@broadcom.com>
Date: Thu, 29 Dec 2016 12:13:30 -0500

> This patch series for net-next contains cleanups, new features and minor
> fixes.  The driver specific busy polling code is removed to use busy
> polling support in core networking.  Hardware RFS support is enhanced with
> added ipv6 flows support and VF support.  A new scheme to allocate TX
> rings from the firmware is implemented for newer chips and firmware.  Plus
> some misc. cleanups, minor fixes, and to add the maintainer entry.  Please
> review.

Looks good, series applied, thanks Michael.

^ permalink raw reply

* Re: [PATCH net-next] sctp: refactor sctp_datamsg_from_user
From: David Miller @ 2016-12-29 19:44 UTC (permalink / raw)
  To: marcelo.leitner; +Cc: netdev, linux-sctp, nhorman, vyasevich
In-Reply-To: <ba007bd30f61decbdae5c23541cbdb397c030b68.1483033905.git.marcelo.leitner@gmail.com>

From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Thu, 29 Dec 2016 15:53:28 -0200

> This patch refactors sctp_datamsg_from_user() in an attempt to make it
> better to read and avoid code duplication for handling the last
> fragment.
> 
> It also avoids doing division and remaining operations. Even though, it
> should still operate similarly as before this patch.
> 
> Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH v4] net: dev_weight: TX/RX orthogonality
From: David Miller @ 2016-12-29 19:44 UTC (permalink / raw)
  To: matthias.tafelmeier; +Cc: netdev, hagen, fw, edumazet, daniel
In-Reply-To: <1483039398-28017-1-git-send-email-matthias.tafelmeier@gmx.net>

From: Matthias Tafelmeier <matthias.tafelmeier@gmx.net>
Date: Thu, 29 Dec 2016 20:23:18 +0100

> Oftenly, introducing side effects on packet processing on the other half
> of the stack by adjusting one of TX/RX via sysctl is not desirable.
> There are cases of demand for asymmetric, orthogonal configurability.
> 
> This holds true especially for nodes where RPS for RFS usage on top is
> configured and therefore use the 'old dev_weight'. This is quite a
> common base configuration setup nowadays, even with NICs of superior processing
> support (e.g. aRFS).
> 
> A good example use case are nodes acting as noSQL data bases with a
> large number of tiny requests and rather fewer but large packets as responses.
> It's affordable to have large budget and rx dev_weights for the
> requests. But as a side effect having this large a number on TX
> processed in one run can overwhelm drivers.
> 
> This patch therefore introduces an independent configurability via sysctl to
> userland.
> 
> Signed-off-by: Matthias Tafelmeier <matthias.tafelmeier@gmx.net>

Applied.

^ permalink raw reply

* Re: [PATCH v4] net: dev_weight: TX/RX orthogonality
From: David Miller @ 2016-12-29 19:45 UTC (permalink / raw)
  To: matthias.tafelmeier; +Cc: netdev, hagen, fw, edumazet, daniel
In-Reply-To: <20161229.144456.2187353715676829840.davem@davemloft.net>


Actually, reverted, you didn't even build test this:

net/core/dev.c:3433:35: error: initializer element is not constant
 int dev_rx_weight __read_mostly = weight_p;
                                   ^~~~~~~~
net/core/dev.c:3434:35: error: initializer element is not constant
 int dev_tx_weight __read_mostly = weight_p;
                                   ^~~~~~~~

^ permalink raw reply

* Re: [PATCH v4] net: dev_weight: TX/RX orthogonality
From: Matthias Tafelmeier @ 2016-12-29 19:53 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, hagen, fw, edumazet, daniel
In-Reply-To: <20161229.144555.1740958763290967121.davem@davemloft.net>


[-- Attachment #1.1.1: Type: text/plain, Size: 435 bytes --]


> Actually, reverted, you didn't even build test this:
>
> net/core/dev.c:3433:35: error: initializer element is not constant
>  int dev_rx_weight __read_mostly = weight_p;
>                                    ^~~~~~~~
> net/core/dev.c:3434:35: error: initializer element is not constant
>  int dev_tx_weight __read_mostly = weight_p;
>                                    ^~~~~~~~

Thought I would have ... let me check.


[-- Attachment #1.1.2: 0x8ADF343B.asc --]
[-- Type: application/pgp-keys, Size: 4806 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 538 bytes --]

^ permalink raw reply

* [PATCH] wlcore: fix spelling mistake in wl1271_warning
From: Colin King @ 2016-12-29 20:14 UTC (permalink / raw)
  To: Kalle Valo, Shahar Patury, Guy Mishol, linux-wireless, netdev
  Cc: linux-kernel

From: Colin Ian King <colin.king@canonical.com>

trivial fix to spelling mistake of function name in wl1271_warning,
should be dynamic_ps_timeout instead of dyanmic_ps_timeout.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 drivers/net/wireless/ti/wlcore/debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ti/wlcore/debugfs.c b/drivers/net/wireless/ti/wlcore/debugfs.c
index 7f672f6..58e148d 100644
--- a/drivers/net/wireless/ti/wlcore/debugfs.c
+++ b/drivers/net/wireless/ti/wlcore/debugfs.c
@@ -281,7 +281,7 @@ static ssize_t dynamic_ps_timeout_write(struct file *file,
 	}
 
 	if (value < 1 || value > 65535) {
-		wl1271_warning("dyanmic_ps_timeout is not in valid range");
+		wl1271_warning("dynamic_ps_timeout is not in valid range");
 		return -ERANGE;
 	}
 
-- 
2.10.2

^ permalink raw reply related

* Re: [PATCH] stmmac: adding EEE to GMAC4
From: David Miller @ 2016-12-29 20:14 UTC (permalink / raw)
  To: Joao.Pinto
  Cc: peppe.cavallaro, alexandre.torgue, rayagond, linux-kernel, netdev
In-Reply-To: <f89aaac8024e2a92834088545ad72fbe682b32ed.1483030665.git.jpinto@synopsys.com>

From: Joao Pinto <Joao.Pinto@synopsys.com>
Date: Thu, 29 Dec 2016 17:10:27 +0000

> This patch adds Energy Efficiency Ethernet to GMAC4.
> 
> Signed-off-by: Joao Pinto <jpinto@synopsys.com>

Applied, thanks.

^ permalink raw reply

* [PATCH] [media] gp8psk: fix spelling mistake: "firmare" -> "firmware"
From: Colin King @ 2016-12-29 20:29 UTC (permalink / raw)
  To: Mauro Carvalho Chehab, Larry Finger, Chaoming Li, Kalle Valo,
	linux-media, linux-wireless, netdev
  Cc: linux-kernel

From: Colin Ian King <colin.king@canonical.com>

trivial fix to spelling mistake in err message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 drivers/media/usb/dvb-usb/gp8psk.c          | 2 +-
 drivers/net/wireless/realtek/rtlwifi/core.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/usb/dvb-usb/gp8psk.c b/drivers/media/usb/dvb-usb/gp8psk.c
index 2360e7e..26461f2 100644
--- a/drivers/media/usb/dvb-usb/gp8psk.c
+++ b/drivers/media/usb/dvb-usb/gp8psk.c
@@ -161,7 +161,7 @@ static int gp8psk_load_bcm4500fw(struct dvb_usb_device *d)
 			goto out_free;
 		}
 		if (buflen > 64) {
-			err("firmare chunk size bigger than 64 bytes.");
+			err("firmware chunk size bigger than 64 bytes.");
 			goto out_free;
 		}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index ded1493..732de0a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -1532,7 +1532,7 @@ static int rtl_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		key_type = AESCMAC_ENCRYPTION;
 		RT_TRACE(rtlpriv, COMP_SEC, DBG_DMESG, "alg:CMAC\n");
 		RT_TRACE(rtlpriv, COMP_SEC, DBG_DMESG,
-			 "HW don't support CMAC encrypiton, use software CMAC encrypiton\n");
+			 "HW don't support CMAC encryption, use software CMAC encryption\n");
 		err = -EOPNOTSUPP;
 		goto out_unlock;
 	default:
-- 
2.10.2

^ permalink raw reply related

* [PATCH v5] net: dev_weight: TX/RX orthogonality
From: Matthias Tafelmeier @ 2016-12-29 20:37 UTC (permalink / raw)
  To: netdev; +Cc: hagen, fw, edumazet, daniel
In-Reply-To: <20161229.144555.1740958763290967121.davem@davemloft.net>

Oftenly, introducing side effects on packet processing on the other half
of the stack by adjusting one of TX/RX via sysctl is not desirable.
There are cases of demand for asymmetric, orthogonal configurability.

This holds true especially for nodes where RPS for RFS usage on top is
configured and therefore use the 'old dev_weight'. This is quite a
common base configuration setup nowadays, even with NICs of superior processing
support (e.g. aRFS).

A good example use case are nodes acting as noSQL data bases with a
large number of tiny requests and rather fewer but large packets as responses.
It's affordable to have large budget and rx dev_weights for the
requests. But as a side effect having this large a number on TX
processed in one run can overwhelm drivers.

This patch therefore introduces an independent configurability via sysctl to
userland.

Signed-off-by: Matthias Tafelmeier <matthias.tafelmeier@gmx.net>
---
 Documentation/sysctl/net.txt | 21 +++++++++++++++++++++
 include/linux/netdevice.h    |  4 ++++
 net/core/dev.c               |  8 ++++++--
 net/core/sysctl_net_core.c   | 31 ++++++++++++++++++++++++++++++-
 net/sched/sch_generic.c      |  2 +-
 5 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index f0480f7..53cef32 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -61,6 +61,27 @@ The maximum number of packets that kernel can handle on a NAPI interrupt,
 it's a Per-CPU variable.
 Default: 64
 
+dev_weight_rx_bias
+--------------
+
+RPS (e.g. RFS, aRFS) processing is competing with the registered NAPI poll function
+of the driver for the per softirq cycle netdev_budget. This parameter influences
+the proportion of the configured netdev_budget that is spent on RPS based packet
+processing during RX softirq cycles. It is further meant for making current
+dev_weight adaptable for asymmetric CPU needs on RX/TX side of the network stack.
+(see dev_weight_tx_bias) It is effective on a per CPU basis. Determination is based
+on dev_weight and is calculated multiplicative (dev_weight * dev_weight_rx_bias).
+Default: 1
+
+dev_weight_tx_bias
+--------------
+
+Scales the maximum number of packets that can be processed during a TX softirq cycle.
+Effective on a per CPU basis. Allows scaling of current dev_weight for asymmetric
+net stack processing needs. Be careful to avoid making TX softirq processing a CPU hog.
+Calculation is based on dev_weight (dev_weight * dev_weight_tx_bias).
+Default: 1
+
 default_qdisc
 --------------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 994f742..ecd78b3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3795,6 +3795,10 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
+extern int		dev_weight_rx_bias;
+extern int		dev_weight_tx_bias;
+extern int		dev_rx_weight;
+extern int		dev_tx_weight;
 
 bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
 struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 8db5a0b..0d34e1c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3427,7 +3427,11 @@ EXPORT_SYMBOL(netdev_max_backlog);
 
 int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
-int weight_p __read_mostly = 64;            /* old backlog weight */
+int weight_p __read_mostly = 64;           /* old backlog weight */
+int dev_weight_rx_bias __read_mostly = 1;  /* bias for backlog weight */
+int dev_weight_tx_bias __read_mostly = 1;  /* bias for output_queue quota */
+int dev_rx_weight __read_mostly = 64;
+int dev_tx_weight __read_mostly = 64;
 
 /* Called with irq disabled */
 static inline void ____napi_schedule(struct softnet_data *sd,
@@ -4833,7 +4837,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		net_rps_action_and_irq_enable(sd);
 	}
 
-	napi->weight = weight_p;
+	napi->weight = dev_rx_weight;
 	while (again) {
 		struct sk_buff *skb;
 
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 2a46e40..698ddd7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -222,6 +222,21 @@ static int set_default_qdisc(struct ctl_table *table, int write,
 }
 #endif
 
+static int proc_do_dev_weight(struct ctl_table *table, int write,
+			   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (ret != 0)
+		return ret;
+
+	dev_rx_weight = weight_p * dev_weight_rx_bias;
+	dev_tx_weight = weight_p * dev_weight_tx_bias;
+
+	return ret;
+}
+
 static int proc_do_rss_key(struct ctl_table *table, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -273,7 +288,21 @@ static struct ctl_table net_core_table[] = {
 		.data		= &weight_p,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_do_dev_weight,
+	},
+	{
+		.procname	= "dev_weight_rx_bias",
+		.data		= &dev_weight_rx_bias,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_dev_weight,
+	},
+	{
+		.procname	= "dev_weight_tx_bias",
+		.data		= &dev_weight_tx_bias,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_dev_weight,
 	},
 	{
 		.procname	= "netdev_max_backlog",
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6eb9c8e..b052b27 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -247,7 +247,7 @@ static inline int qdisc_restart(struct Qdisc *q, int *packets)
 
 void __qdisc_run(struct Qdisc *q)
 {
-	int quota = weight_p;
+	int quota = dev_tx_weight;
 	int packets;
 
 	while (qdisc_restart(q, &packets)) {
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH] rtlwifi: fix spelling mistake: "contry" -> "country"
From: Larry Finger @ 2016-12-29 20:58 UTC (permalink / raw)
  To: Colin King, Chaoming Li, Kalle Valo, linux-wireless, netdev; +Cc: linux-kernel
In-Reply-To: <20161229160029.22117-1-colin.king@canonical.com>

On 12/29/2016 10:00 AM, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
>
> trivial fix to spelling mistake in RT_TRACE message
>
> Signed-off-by: Colin Ian King <colin.king@canonical.com>

Acked-by: Larry Finger <Larry.Finger@lwfinger.net>

Larry

> ---
>  drivers/net/wireless/realtek/rtlwifi/regd.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/wireless/realtek/rtlwifi/regd.c b/drivers/net/wireless/realtek/rtlwifi/regd.c
> index 6ee6bf8..558c31b 100644
> --- a/drivers/net/wireless/realtek/rtlwifi/regd.c
> +++ b/drivers/net/wireless/realtek/rtlwifi/regd.c
> @@ -440,7 +440,7 @@ int rtl_regd_init(struct ieee80211_hw *hw,
>
>  	if (rtlpriv->regd.country_code >= COUNTRY_CODE_MAX) {
>  		RT_TRACE(rtlpriv, COMP_REGD, DBG_DMESG,
> -			 "rtl: EEPROM indicates invalid contry code, world wide 13 should be used\n");
> +			 "rtl: EEPROM indicates invalid country code, world wide 13 should be used\n");
>
>  		rtlpriv->regd.country_code = COUNTRY_CODE_WORLD_WIDE_13;
>  	}
>

^ permalink raw reply

* [PATCH] sh_eth: fix branch prediction in sh_eth_interrupt()
From: Sergei Shtylyov @ 2016-12-29 21:07 UTC (permalink / raw)
  To: netdev, linux-renesas-soc; +Cc: ben.hutchings

IIUC, likely()/unlikely() should apply to the whole *if* statement's
expression, not a part of it  -- fix such expression in  sh_eth_interrupt()
accordingly...

Fixes: 283e38db65e7 ("sh_eth: Fix serialisation of interrupt disable with interrupt & NAPI handlers")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

---
The patch is against DaveM's 'net-next.git' repo; I'm not sure if it should
be  targeted to the 'net.git' repo instead...

 drivers/net/ethernet/renesas/sh_eth.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: net-next/drivers/net/ethernet/renesas/sh_eth.c
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.c
+++ net-next/drivers/net/ethernet/renesas/sh_eth.c
@@ -1656,7 +1656,7 @@ static irqreturn_t sh_eth_interrupt(int
 	else
 		goto out;
 
-	if (!likely(mdp->irq_enabled)) {
+	if (unlikely(!mdp->irq_enabled)) {
 		sh_eth_write(ndev, 0, EESIPR);
 		goto out;
 	}

^ permalink raw reply

* Re: [PATCH] [media] gp8psk: fix spelling mistake: "firmare" -> "firmware"
From: VDR User @ 2016-12-29 21:23 UTC (permalink / raw)
  To: Colin King
  Cc: Mauro Carvalho Chehab, Larry Finger, Chaoming Li, Kalle Valo,
	mailing list: linux-media, linux-wireless, netdev,
	Linux Kernel Mailing List
In-Reply-To: <20161229202952.27448-1-colin.king@canonical.com>

> -                       err("firmare chunk size bigger than 64 bytes.");
> +                       err("firmware chunk size bigger than 64 bytes.");

Yup.

> -                        "HW don't support CMAC encrypiton, use software CMAC encrypiton\n");
> +                        "HW don't support CMAC encryption, use software CMAC encryption\n");

Should be: "HW doesn't support CMAC encryption, use software CMAC
encryption\n");

^ permalink raw reply

* [net-next PATCH 0/6] i40e: Add VF port representator support or SR-IOV VFs
From: Sridhar Samudrala @ 2016-12-30  6:20 UTC (permalink / raw)
  To: alexander.h.duyck, john.r.fastabend, anjali.singhai,
	jakub.kicinski, intel-wired-lan, netdev

- Patch 1 introduces devlink interface to get/set the mode of SRIOV switch.
- Patch 2 adds support to create VF port representor(VFPR) netdevs associated
  with SR-IOV VFs that can be used to control/configure VFs from PF name space.
- Patch 3 enables syncing link state between VFs and VFPRs.
- Patch 4 adds a new type to metadata_dst to allow passing VF id to lower device.
- Patch 5 adds TX and RX support to VFPR netdevs.
- Patch 6 enables HW and SW VFPR statistics to be exposed via netlink on VFPR
  netdevs.

Jakub Kicinski (1):
  net: store port/representator id in metadata_dst

Sridhar Samudrala (5):
  i40e: Introduce devlink interface.
  i40e: Introduce VF Port Representator(VFPR) netdevs.
  i40e: Sync link state between VFs and VFPRs
  i40e: Add TX and RX support in switchdev mode.
  i40e: Add support for exposing VF port statistics via VFPR netdev on
    the host.

 drivers/net/ethernet/intel/Kconfig                 |   1 +
 drivers/net/ethernet/intel/i40e/i40e.h             |   4 +
 drivers/net/ethernet/intel/i40e/i40e_main.c        |  96 +++++-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c        | 132 ++++++++-
 drivers/net/ethernet/intel/i40e/i40e_txrx.h        |   1 +
 drivers/net/ethernet/intel/i40e/i40e_type.h        |   3 +
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 321 ++++++++++++++++++++-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h |  25 ++
 include/net/dst_metadata.h                         |  35 ++-
 net/core/dst.c                                     |  15 +-
 net/core/filter.c                                  |   1 +
 net/ipv4/ip_tunnel_core.c                          |   6 +-
 net/openvswitch/flow_netlink.c                     |   4 +-
 13 files changed, 610 insertions(+), 34 deletions(-)

-- 
2.5.5

^ permalink raw reply

* [net-next PATCH 1/6] i40e: Introduce devlink interface.
From: Sridhar Samudrala @ 2016-12-30  6:20 UTC (permalink / raw)
  To: alexander.h.duyck, john.r.fastabend, anjali.singhai,
	jakub.kicinski, intel-wired-lan, netdev
In-Reply-To: <1483078863-22026-1-git-send-email-sridhar.samudrala@intel.com>

Add initial devlink support to get/set the mode of SRIOV switch.
This patch sets the default mode as 'legacy' and enables getting the mode
and and setting it to 'legacy'.

The switch mode can be get/set via following 'devlink' commands.

# devlink dev eswitch show pci/0000:05:00.0
pci/0000:05:00.0: mode legacy
# devlink dev eswitch set pci/0000:05:00.0 mode switchdev
devlink answers: Operation not supported
# devlink dev eswitch set pci/0000:05:00.0 mode legacy
# devlink dev eswitch show pci/0000:05:00.0
pci/0000:05:00.0: mode legacy

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/Kconfig          |  1 +
 drivers/net/ethernet/intel/i40e/i40e.h      |  3 ++
 drivers/net/ethernet/intel/i40e/i40e_main.c | 80 ++++++++++++++++++++++++++---
 3 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 1349b45..0dbb87e 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -215,6 +215,7 @@ config I40E
 	tristate "Intel(R) Ethernet Controller XL710 Family support"
 	imply PTP_1588_CLOCK
 	depends on PCI
+	depends on MAY_USE_DEVLINK
 	---help---
 	  This driver supports Intel(R) Ethernet Controller XL710 Family of
 	  devices.  For more information on how to identify your adapter, go
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index ba8d309..410f83d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -54,6 +54,8 @@
 #include <linux/clocksource.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
+#include <net/devlink.h>
+
 #include "i40e_type.h"
 #include "i40e_prototype.h"
 #ifdef I40E_FCOE
@@ -448,6 +450,7 @@ struct i40e_pf {
 	u32 ioremap_len;
 	u32 fd_inv;
 	u16 phy_led_val;
+	enum devlink_eswitch_mode eswitch_mode;
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index ad4cf63..c01a620 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -10910,6 +10910,57 @@ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf)
 }
 
 /**
+ * i40e_devlink_eswitch_mode_get
+ *
+ * @devlink: pointer to devlink struct
+ * @mode: sr-iov switch mode pointer
+ *
+ * Returns the switch mode of the associated PF in the @mode pointer.
+ */
+static int i40e_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+	struct i40e_pf *pf = devlink_priv(devlink);
+
+	*mode = pf->eswitch_mode;
+
+	return 0;
+}
+
+/**
+ * i40e_devlink_eswitch_mode_set
+ *
+ * @devlink: pointer to devlink struct
+ * @mode: sr-iov switch mode
+ *
+ * Set the switch mode of the associated PF.
+ * Returns 0 on success and -EOPNOTSUPP on error.
+ */
+static int i40e_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+	struct i40e_pf *pf = devlink_priv(devlink);
+	int err = 0;
+
+	if (mode == pf->eswitch_mode)
+		goto done;
+
+	switch (mode) {
+	case DEVLINK_ESWITCH_MODE_LEGACY:
+		pf->eswitch_mode = mode;
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+done:
+	return err;
+}
+
+static const struct devlink_ops i40e_devlink_ops = {
+	.eswitch_mode_get = i40e_devlink_eswitch_mode_get,
+	.eswitch_mode_set = i40e_devlink_eswitch_mode_set,
+};
+
+/**
  * i40e_probe - Device initialization routine
  * @pdev: PCI device information struct
  * @ent: entry in i40e_pci_tbl
@@ -10926,6 +10977,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct i40e_pf *pf;
 	struct i40e_hw *hw;
 	static u16 pfs_found;
+	struct devlink *devlink;
 	u16 wol_nvm_bits;
 	u16 link_status;
 	int err;
@@ -10959,20 +11011,28 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_enable_pcie_error_reporting(pdev);
 	pci_set_master(pdev);
 
+	devlink = devlink_alloc(&i40e_devlink_ops, sizeof(*pf));
+	if (!devlink) {
+		dev_err(&pdev->dev, "devlink_alloc failed\n");
+		err = -ENOMEM;
+		goto err_devlink_alloc;
+	}
+
 	/* Now that we have a PCI connection, we need to do the
 	 * low level device setup.  This is primarily setting up
 	 * the Admin Queue structures and then querying for the
 	 * device's current profile information.
 	 */
-	pf = kzalloc(sizeof(*pf), GFP_KERNEL);
-	if (!pf) {
-		err = -ENOMEM;
-		goto err_pf_alloc;
-	}
+	pf = devlink_priv(devlink);
 	pf->next_vsi = 0;
 	pf->pdev = pdev;
 	set_bit(__I40E_DOWN, &pf->state);
 
+	pf->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+	err = devlink_register(devlink, &pdev->dev);
+	if (err)
+		goto err_devlink_register;
+
 	hw = &pf->hw;
 	hw->back = pf;
 
@@ -11445,8 +11505,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 err_pf_reset:
 	iounmap(hw->hw_addr);
 err_ioremap:
-	kfree(pf);
-err_pf_alloc:
+	devlink_unregister(devlink);
+err_devlink_register:
+	devlink_free(devlink);
+err_devlink_alloc:
 	pci_disable_pcie_error_reporting(pdev);
 	pci_release_mem_regions(pdev);
 err_pci_reg:
@@ -11468,6 +11530,7 @@ static void i40e_remove(struct pci_dev *pdev)
 {
 	struct i40e_pf *pf = pci_get_drvdata(pdev);
 	struct i40e_hw *hw = &pf->hw;
+	struct devlink *devlink = priv_to_devlink(pf);
 	i40e_status ret_code;
 	int i;
 
@@ -11554,7 +11617,8 @@ static void i40e_remove(struct pci_dev *pdev)
 	kfree(pf->vsi);
 
 	iounmap(hw->hw_addr);
-	kfree(pf);
+	devlink_unregister(devlink);
+	devlink_free(devlink);
 	pci_release_mem_regions(pdev);
 
 	pci_disable_pcie_error_reporting(pdev);
-- 
2.5.5

^ permalink raw reply related

* [net-next PATCH 2/6] i40e: Introduce VF Port Representator(VFPR) netdevs.
From: Sridhar Samudrala @ 2016-12-30  6:20 UTC (permalink / raw)
  To: alexander.h.duyck, john.r.fastabend, anjali.singhai,
	jakub.kicinski, intel-wired-lan, netdev
In-Reply-To: <1483078863-22026-1-git-send-email-sridhar.samudrala@intel.com>

VF Port Representator netdevs are created for each VF if the switch mode
is set to 'switchdev'. These netdevs can be used to control and configure
VFs from PFs namespace. They enable exposing VF statistics, configure and
monitor link state, mtu, filters, fdb/vlan entries etc. of VFs.
Broadcast filters are not enabled in switchdev mode.

Sample script to create VF port representors
# rmmod i40e; modprobe i40e
# devlink dev eswitch set pci/0000:05:00.0 mode switchdev
# echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs
# ip l show
297: enp5s0f0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop portid 6805ca2e7268 state DOWN mode DEFAULT group default qlen 1000
     link/ether 68:05:ca:2e:72:68 brd ff:ff:ff:ff:ff:ff
     vf 0 MAC 00:00:00:00:00:00, spoof checking on, link-state auto, trust off
     vf 1 MAC 00:00:00:00:00:00, spoof checking on, link-state auto, trust off
299: enp5s0f0-vf0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
300: enp5s0f0-vf1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c        |  14 +-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 151 ++++++++++++++++++++-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h |  14 ++
 3 files changed, 172 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index c01a620..03d07dd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -10938,15 +10938,27 @@ static int i40e_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 static int i40e_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 {
 	struct i40e_pf *pf = devlink_priv(devlink);
-	int err = 0;
+	struct i40e_vf *vf;
+	int i, err = 0;
 
 	if (mode == pf->eswitch_mode)
 		goto done;
 
 	switch (mode) {
 	case DEVLINK_ESWITCH_MODE_LEGACY:
+		for (i = 0; i < pf->num_alloc_vfs; i++) {
+			vf = &(pf->vf[i]);
+			i40e_free_vfpr_netdev(vf);
+		}
 		pf->eswitch_mode = mode;
 		break;
+	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+		for (i = 0; i < pf->num_alloc_vfs; i++) {
+			vf = &(pf->vf[i]);
+			i40e_alloc_vfpr_netdev(vf, i);
+		}
+                pf->eswitch_mode = mode;
+		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index a6198b7..6c5b296 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -697,12 +697,16 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 					 "Could not add MAC filter %pM for VF %d\n",
 					vf->default_lan_addr.addr, vf->vf_id);
 		}
-		eth_broadcast_addr(broadcast);
-		f = i40e_add_filter(vsi, broadcast,
-				    vf->port_vlan_id ? vf->port_vlan_id : -1);
-		if (!f)
-			dev_info(&pf->pdev->dev,
-				 "Could not allocate VF broadcast filter\n");
+
+		/* Add VF broadcast filter only in 'legacy' mode */
+		if (vsi->back->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY) {
+			eth_broadcast_addr(broadcast);
+			f = i40e_add_filter(vsi, broadcast,
+					    vf->port_vlan_id ? vf->port_vlan_id : -1);
+			if (!f)
+				dev_info(&pf->pdev->dev,
+					 "Could not allocate VF broadcast filter\n");
+		}
 		spin_unlock_bh(&vsi->mac_filter_hash_lock);
 		i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id),
 				  (u32)hena);
@@ -1020,6 +1024,136 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
 }
 
 /**
+ * i40e_vfpr_netdev_open
+ * @dev: network interface device structure
+ *
+ * Called when vfpr netdevice is brought up.
+ **/
+static int i40e_vfpr_netdev_open(struct net_device *dev)
+{
+	return 0;
+}
+
+/**
+ * i40e_vfpr_netdev_stop
+ * @dev: network interface device structure
+ *
+ * Called when vfpr netdevice is brought down.
+ **/
+static int i40e_vfpr_netdev_stop(struct net_device *dev)
+{
+	return 0;
+}
+
+static const struct net_device_ops i40e_vfpr_netdev_ops = {
+	.ndo_open       	= i40e_vfpr_netdev_open,
+	.ndo_stop       	= i40e_vfpr_netdev_stop,
+};
+
+/**
+ * i40e_update_vf_broadcast_filter
+ * @vf: pointer to the VF structure
+ * @enable: boolean flag indicating add/delete
+ *
+ * add/delete VFs broadcast filter
+ **/
+void i40e_update_vf_broadcast_filter(struct i40e_vf *vf, bool enable)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
+	u8 broadcast[ETH_ALEN];
+	int err;
+
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	eth_broadcast_addr(broadcast);
+	if (enable)
+		i40e_add_filter(vsi, broadcast, vf->port_vlan_id ? vf->port_vlan_id : -1);
+	else
+		i40e_del_filter(vsi, broadcast, vf->port_vlan_id ? vf->port_vlan_id : -1);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	/* update broadcast filter */
+	err = i40e_sync_vsi_filters(vsi);
+	if (err)
+		dev_err(&pf->pdev->dev, "Unable to program bcast filter\n");
+}
+
+/**
+ * i40e_alloc_vfpr_netdev
+ * @vf: pointer to the VF structure
+ * @vf_num: VF number
+ *
+ * Create VF Port representor netdev
+ **/
+int i40e_alloc_vfpr_netdev(struct i40e_vf *vf, u16 vf_num)
+{
+	struct net_device *vfpr_netdev;
+	char netdev_name[IFNAMSIZ];
+	struct i40e_vfpr_netdev_priv *priv;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	int err;
+
+	snprintf(netdev_name, IFNAMSIZ, "%s-vf%d", vsi->netdev->name, vf_num);
+	vfpr_netdev = alloc_netdev(sizeof(struct i40e_vfpr_netdev_priv),
+				   netdev_name, NET_NAME_UNKNOWN, ether_setup);
+	if (!vfpr_netdev) {
+		dev_err(&pf->pdev->dev, "alloc_netdev failed for vf:%d\n",
+			vf_num);
+		return -ENOMEM;
+	}
+
+	pf->vf[vf_num].vfpr_netdev = vfpr_netdev;
+
+	priv = netdev_priv(vfpr_netdev);
+	priv->vf = &(pf->vf[vf_num]);
+
+	vfpr_netdev->netdev_ops = &i40e_vfpr_netdev_ops;
+
+	netif_carrier_off(vfpr_netdev);
+	netif_tx_disable(vfpr_netdev);
+
+	err = register_netdev(vfpr_netdev);
+	if (err) {
+		dev_err(&pf->pdev->dev, "register_netdev failed for vf: %s\n",
+			vf->vfpr_netdev->name);
+		free_netdev(vfpr_netdev);
+		return err;
+	}
+
+	dev_info(&pf->pdev->dev, "VF Port representor(%s) created for VF %d\n",
+		 vf->vfpr_netdev->name, vf_num);
+
+	/* Delete broadcast filter for VF */
+	i40e_update_vf_broadcast_filter(vf, false);
+
+	return 0;
+}
+
+/**
+ * i40e_free_vfpr_netdev
+ * @vf: pointer to the VF structure
+ *
+ * Free VF Port representor netdev
+ **/
+void i40e_free_vfpr_netdev(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+
+	if (!vf->vfpr_netdev)
+		return;
+
+	dev_info(&pf->pdev->dev, "Freeing VF Port representor(%s)\n",
+		 vf->vfpr_netdev->name);
+
+	unregister_netdev(vf->vfpr_netdev);
+	free_netdev(vf->vfpr_netdev);
+
+	/* Add broadcast filter to VF */
+	i40e_update_vf_broadcast_filter(vf, true);
+}
+
+/**
  * i40e_free_vfs
  * @pf: pointer to the PF structure
  *
@@ -1060,6 +1194,9 @@ void i40e_free_vfs(struct i40e_pf *pf)
 			i40e_free_vf_res(&pf->vf[i]);
 		/* disable qp mappings */
 		i40e_disable_vf_mappings(&pf->vf[i]);
+
+		if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+			i40e_free_vfpr_netdev(&pf->vf[i]);
 	}
 
 	kfree(pf->vf);
@@ -1127,6 +1264,8 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
 		/* VF resources get allocated during reset */
 		i40e_reset_vf(&vfs[i], false);
 
+		if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+			i40e_alloc_vfpr_netdev(&vfs[i], i);
 	}
 	pf->num_alloc_vfs = num_alloc_vfs;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 4012d06..1e5def1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -72,10 +72,21 @@ enum i40e_vf_capabilities {
 	I40E_VIRTCHNL_VF_CAP_IWARP,
 };
 
+/* VF Port representor netdev private structure */
+struct i40e_vfpr_netdev_priv {
+	struct i40e_vf *vf;
+};
+
 /* VF information structure */
 struct i40e_vf {
 	struct i40e_pf *pf;
 
+	/* VF Port representor netdev that allows control and configuration
+	 * of VFs from the host. Enables returning VF stats, configuring link
+	 * state, mtu, fdb/vlans etc.
+	 */
+	struct net_device *vfpr_netdev;
+
 	/* VF id in the PF space */
 	s16 vf_id;
 	/* all VF vsis connect to the same parent */
@@ -142,4 +153,7 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable);
 void i40e_vc_notify_link_state(struct i40e_pf *pf);
 void i40e_vc_notify_reset(struct i40e_pf *pf);
 
+int i40e_alloc_vfpr_netdev(struct i40e_vf *vf, u16 vf_num);
+void i40e_free_vfpr_netdev(struct i40e_vf *vf);
+
 #endif /* _I40E_VIRTCHNL_PF_H_ */
-- 
2.5.5

^ permalink raw reply related

* [net-next PATCH 3/6] i40e: Sync link state between VFs and VFPRs
From: Sridhar Samudrala @ 2016-12-30  6:21 UTC (permalink / raw)
  To: alexander.h.duyck, john.r.fastabend, anjali.singhai,
	jakub.kicinski, intel-wired-lan, netdev
In-Reply-To: <1483078863-22026-1-git-send-email-sridhar.samudrala@intel.com>

This patch enables
- reflecting the link state of VFPR based on VF admin state & link state
  of VF based on admin state of VFPR.
- bringing up/down the VFPR sends a notification to update VF link state.
- bringing up/down the VF will cause the link state update of VFPR.
- enable/disable VF link state via ndo_set_vf_link_state will update the
  admin state of associated VFPR.

PF: enp5s0f0, VFs: enp5s2,enp5s2f1 VFPRs:enp5s0f0-vf0, enp5s0f0-vf1
# rmmod i40e; modprobe i40e
# devlink dev eswitch set pci/0000:05:00.0 mode switchdev
# echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs

# ip link set enp5s2 up
# ip link set enp5s0f0-vf0 up
# ip link set enp5s0f0-vf1 up

/* enp5s2 UP -> enp5s0f0-vf0 CARRIER ON */
# ip link show enp5s0f0-vf0
215: enp5s0f0-vf0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP mode DEFAULT group default qlen 1000
     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff

/* enp5s0f0-vf0 UP -> enp5s2 CARRIER ON */
# ip link show enp5s2
218: enp5s2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000
     link/ether ea:4d:60:bc:6f:85 brd ff:ff:ff:ff:ff:ff

/* enp5s2f1 DOWN -> enp5s0f0-vf1 NO-CARRIER */
# ip link show enp5s0f0-vf1
216: enp5s0f0-vf1: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc fq_codel state DOWN mode DEFAULT group default qlen 1000
     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff

# ip link set enp5s0f0-vf0 down
# ip link set enp5s2f1 up

/* enp5s0-vf0 DOWN -> enp5s2 NO_CARRIER */
# ip link show enp5s2
218: enp5s2: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000
     link/ether ea:4d:60:bc:6f:85 brd ff:ff:ff:ff:ff:ff

# ip -d link show enp5s0f0
213: enp5s0f0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop portid 6805ca27268 state DOWN mode DEFAULT group default qlen 1000
     link/ether 68:05:ca:2e:72:68 brd ff:ff:ff:ff:ff:ff promiscuity 0 addrgenmode eui64
     vf 0 MAC 00:00:00:00:00:00, spoof checking on, link-state disable, trust off
     vf 1 MAC 00:00:00:00:00:00, spoof checking on, link-state enable, trust off

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 44 ++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 6c5b296..3ea7235 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1031,6 +1031,13 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
  **/
 static int i40e_vfpr_netdev_open(struct net_device *dev)
 {
+	struct i40e_vfpr_netdev_priv *priv = netdev_priv(dev);
+	struct i40e_vf *vf = priv->vf;
+
+	vf->link_forced = true;
+	vf->link_up = true;
+	i40e_vc_notify_vf_link_state(vf);
+
 	return 0;
 }
 
@@ -1042,6 +1049,13 @@ static int i40e_vfpr_netdev_open(struct net_device *dev)
  **/
 static int i40e_vfpr_netdev_stop(struct net_device *dev)
 {
+	struct i40e_vfpr_netdev_priv *priv = netdev_priv(dev);
+	struct i40e_vf *vf = priv->vf;
+
+	vf->link_forced = true;
+	vf->link_up = false;
+	i40e_vc_notify_vf_link_state(vf);
+
 	return 0;
 }
 
@@ -1127,6 +1141,13 @@ int i40e_alloc_vfpr_netdev(struct i40e_vf *vf, u16 vf_num)
 	/* Delete broadcast filter for VF */
 	i40e_update_vf_broadcast_filter(vf, false);
 
+	/* Reset VF link as we are changing the mode to 'switchdev'. VFPR netdev
+	 * needs to be brought up to enable VF link.
+	 */
+	vf->link_forced = true;
+	vf->link_up = false;
+	i40e_vc_notify_vf_link_state(vf);
+
 	return 0;
 }
 
@@ -1151,6 +1172,10 @@ void i40e_free_vfpr_netdev(struct i40e_vf *vf)
 
 	/* Add broadcast filter to VF */
 	i40e_update_vf_broadcast_filter(vf, true);
+
+	/* In legacy mode, VF link is not controlled by VFPR */
+	vf->link_forced = false;
+	i40e_vc_notify_vf_link_state(vf);
 }
 
 /**
@@ -1907,6 +1932,10 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 	if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx]))
 		aq_ret = I40E_ERR_TIMEOUT;
+
+	if ((0 == aq_ret) && vf->vfpr_netdev)
+		netif_carrier_on(vf->vfpr_netdev);
+
 error_param:
 	/* send the response to the VF */
 	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES,
@@ -1946,6 +1975,9 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 	i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]);
 
+	if ((0 == aq_ret) && vf->vfpr_netdev)
+		netif_carrier_off(vf->vfpr_netdev);
+
 error_param:
 	/* send the response to the VF */
 	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES,
@@ -3186,6 +3218,7 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_pf *pf = np->vsi->back;
 	struct i40e_virtchnl_pf_event pfe;
+	struct net_device *vfpr_netdev;
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_vf *vf;
 	int abs_vf_id;
@@ -3228,6 +3261,17 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
 		ret = -EINVAL;
 		goto error_out;
 	}
+
+	vfpr_netdev = vf->vfpr_netdev;
+	if (vfpr_netdev) {
+		unsigned int flags = vfpr_netdev->flags;
+
+		if (vf->link_up)
+			dev_change_flags(vfpr_netdev, flags | IFF_UP);
+		else
+			dev_change_flags(vfpr_netdev, flags & ~IFF_UP);
+	}
+
 	/* Notify the VF of its new link state */
 	i40e_aq_send_msg_to_vf(hw, abs_vf_id, I40E_VIRTCHNL_OP_EVENT,
 			       0, (u8 *)&pfe, sizeof(pfe), NULL);
-- 
2.5.5

^ permalink raw reply related

* [net-next PATCH 5/6] i40e: Add TX and RX support in switchdev mode.
From: Sridhar Samudrala @ 2016-12-30  6:21 UTC (permalink / raw)
  To: alexander.h.duyck, john.r.fastabend, anjali.singhai,
	jakub.kicinski, intel-wired-lan, netdev
In-Reply-To: <1483078863-22026-1-git-send-email-sridhar.samudrala@intel.com>

In switchdev mode, broadcast filter is not enabled on VFs, The broadcasts and
unknown frames from VFs are received by the PF and passed to corresponding VF
port representator netdev.
A host based switching entity like a linux bridge or OVS redirects these frames
to the right VFs via VFPR netdevs. Any frames sent via VFPR netdevs are sent as
directed transmits to the corresponding VFs. To enable directed transmit, skb
metadata dst is used to pass the VF id and the frame is requeued to call the PFs
transmit routine.

Small script to demonstrate inter VF pings in switchdev mode.
PF: enp5s0f0, VFs: enp5s2,enp5s2f1 VFPRs:enp5s0f0-vf0, enp5s0f0-vf1

# rmmod i40e; modprobe i40e
# devlink dev eswitch set pci/0000:05:00.0 mode switchdev
# echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs
# ip link set enp5s0f0 vf 0 mac 00:11:22:33:44:55
# ip link set enp5s0f0 vf 1 mac 00:11:22:33:44:56
# rmmod i40evf; modprobe i40evf

/* Create 2 namespaces and move the VFs to
# ip netns add ns0
# ip link set enp5s2 netns ns0
# ip netns exec ns0 ip addr add 192.168.1.10/24 dev enp5s2
# ip netns exec ns0 ip link set enp5s2 up
# ip netns add ns1
# ip link set enp5s2f1 netns ns1
# ip netns exec ns1 ip addr add 192.168.1.11/24 dev enp5s2f1
# ip netns exec ns1 ip link set enp5s2f1 up

/* bring up pf and vfpr netdevs */
# ip link set enp5s0f0 up
# ip link set enp5s0f0-vf0 up
# ip link set enp5s0f0-vf1 up

/* Create a linux bridge and add vfpr netdevs to it. */
# ip link add vfpr-br type bridge
# ip link set enp5s0f0-vf0 master vfpr-br
# ip link set enp5s0f0-vf1 master vfpr-br
# ip addr add 192.168.1.1/24 dev vfpr-br
# ip link set vfpr-br up

# ip netns exec ns0 ping -c3 192.168.1.11
# ip netns exec ns1 ping -c3 192.168.1.10

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h             |  1 +
 drivers/net/ethernet/intel/i40e/i40e_main.c        |  4 +
 drivers/net/ethernet/intel/i40e/i40e_txrx.c        | 92 ++++++++++++++++++++--
 drivers/net/ethernet/intel/i40e/i40e_txrx.h        |  1 +
 drivers/net/ethernet/intel/i40e/i40e_type.h        |  3 +
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 19 ++++-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h |  1 +
 7 files changed, 114 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 410f83d..1c88db5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -55,6 +55,7 @@
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
 #include <net/devlink.h>
+#include <net/dst_metadata.h>
 
 #include "i40e_type.h"
 #include "i40e_prototype.h"
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 03d07dd..431694e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -10938,6 +10938,7 @@ static int i40e_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 static int i40e_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 {
 	struct i40e_pf *pf = devlink_priv(devlink);
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 	struct i40e_vf *vf;
 	int i, err = 0;
 
@@ -10951,6 +10952,8 @@ static int i40e_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 			i40e_free_vfpr_netdev(vf);
 		}
 		pf->eswitch_mode = mode;
+		vsi->netdev->priv_flags |=
+			(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
 		break;
 	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
 		for (i = 0; i < pf->num_alloc_vfs; i++) {
@@ -10958,6 +10961,7 @@ static int i40e_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 			i40e_alloc_vfpr_netdev(vf, i);
 		}
                 pf->eswitch_mode = mode;
+		netif_keep_dst(vsi->netdev);
 		break;
 	default:
 		err = -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 352cf7c..b46ddaa 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1176,16 +1176,37 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
  * @rx_ring:  rx ring in play
  * @skb: packet to send up
  * @vlan_tag: vlan tag for packet
+ * @lpbk: is it a loopback frame?
  **/
 static void i40e_receive_skb(struct i40e_ring *rx_ring,
-			     struct sk_buff *skb, u16 vlan_tag)
+			     struct sk_buff *skb, u16 vlan_tag, bool lpbk)
 {
 	struct i40e_q_vector *q_vector = rx_ring->q_vector;
+	struct i40e_pf *pf = rx_ring->vsi->back;
+	struct i40e_vf *vf;
+	struct ethhdr *eth;
+	int vf_id;
 
 	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
 	    (vlan_tag & VLAN_VID_MASK))
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
 
+	if ((pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY) || !lpbk)
+		goto gro_receive;
+
+	/* If a loopback packet is received from a VF in switchdev mode, pass the
+	 * frame to the corresponding VFPR netdev based on the source MAC in the frame.
+	 */
+	eth = (struct ethhdr *)skb_mac_header(skb);
+	for (vf_id = 0; vf_id < pf->num_alloc_vfs; vf_id++) {
+		vf = &pf->vf[vf_id];
+		if (ether_addr_equal(eth->h_source, vf->default_lan_addr.addr)) {
+			skb->dev = vf->vfpr_netdev;
+			break;
+		}
+	}
+
+gro_receive:
 	napi_gro_receive(&q_vector->napi, skb);
 }
 
@@ -1394,6 +1415,7 @@ static inline void i40e_rx_hash(struct i40e_ring *ring,
  * @rx_desc: pointer to the EOP Rx descriptor
  * @skb: pointer to current skb being populated
  * @rx_ptype: the packet type decoded by hardware
+ * @lpbk: is it a loopback frame?
  *
  * This function checks the ring, descriptor, and packet information in
  * order to populate the hash, checksum, VLAN, protocol, and
@@ -1402,7 +1424,7 @@ static inline void i40e_rx_hash(struct i40e_ring *ring,
 static inline
 void i40e_process_skb_fields(struct i40e_ring *rx_ring,
 			     union i40e_rx_desc *rx_desc, struct sk_buff *skb,
-			     u8 rx_ptype)
+			     u8 rx_ptype, bool *lpbk)
 {
 	u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
 	u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
@@ -1411,6 +1433,9 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
 	u32 tsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
 		   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT;
 
+	*lpbk = !!((rx_status & I40E_RXD_QW1_STATUS_LPBK_MASK) >>
+		   I40E_RXD_QW1_STATUS_LPBK_SHIFT);
+
 	if (unlikely(tsynvalid))
 		i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, tsyn);
 
@@ -1736,6 +1761,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	while (likely(total_rx_packets < budget)) {
 		union i40e_rx_desc *rx_desc;
 		struct sk_buff *skb;
+		bool lpbk;
 		u16 vlan_tag;
 		u8 rx_ptype;
 		u64 qword;
@@ -1794,7 +1820,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 			   I40E_RXD_QW1_PTYPE_SHIFT;
 
 		/* populate checksum, VLAN, and protocol */
-		i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+		i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype, &lpbk);
 
 #ifdef I40E_FCOE
 		if (unlikely(
@@ -1808,7 +1834,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 		vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ?
 			   le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0;
 
-		i40e_receive_skb(rx_ring, skb, vlan_tag);
+		i40e_receive_skb(rx_ring, skb, vlan_tag, lpbk);
 
 		/* update budget accounting */
 		total_rx_packets++;
@@ -2346,6 +2372,27 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
 }
 
 /**
+ * i40e_tvsi - set up the target vsi in TX context descriptor
+ * @tx_ring:  ptr to the target vsi
+ * @cd_type_cmd_tso_mss: Quad Word 1
+ *
+ * Returns 0
+ **/
+static int i40e_tvsi(struct i40e_vsi *tvsi, u64 *cd_type_cmd_tso_mss)
+{
+	u64 cd_cmd, cd_tvsi;
+
+	cd_cmd = I40E_TX_CTX_DESC_SWTCH_VSI;
+	cd_tvsi = tvsi->id;
+	cd_tvsi = (cd_tvsi << I40E_TXD_CTX_QW1_VSI_SHIFT) &
+		   I40E_TXD_CTX_QW1_VSI_MASK;
+	*cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
+				 cd_tvsi;
+
+	return 0;
+}
+
+/**
  * i40e_tsyn - set up the tsyn context descriptor
  * @tx_ring:  ptr to the ring to send
  * @skb:      ptr to the skb we're sending
@@ -2887,8 +2934,12 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
 					struct i40e_ring *tx_ring)
 {
 	u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
+	struct metadata_dst *md_dst = skb_metadata_dst(skb);
 	u32 cd_tunneling = 0, cd_l2tag2 = 0;
 	struct i40e_tx_buffer *first;
+	struct i40e_vsi *t_vsi = NULL;
+	struct i40e_vf *t_vf;
+	struct i40e_pf *pf;
 	u32 td_offset = 0;
 	u32 tx_flags = 0;
 	__be16 protocol;
@@ -2935,7 +2986,22 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
 	else if (protocol == htons(ETH_P_IPV6))
 		tx_flags |= I40E_TX_FLAGS_IPV6;
 
-	tso = i40e_tso(skb, &hdr_len, &cd_type_cmd_tso_mss);
+	/* If skb metadata dst points to a VF id, do a directed transmit to
+	 * that VSI. TSO is mutually exclusive with this option. So TSO is not
+	 * enabled when doing a directed transmit.
+	 */
+	if (md_dst && (md_dst->type == METADATA_HW_PORT_MUX)) {
+		pf = tx_ring->vsi->back;
+		if (md_dst->u.port_info.port_id < pf->num_alloc_vfs) {
+			t_vf = &pf->vf[md_dst->u.port_info.port_id];
+			t_vsi = pf->vsi[t_vf->lan_vsi_idx];
+		}
+	}
+
+	if (t_vsi)
+		tso = i40e_tvsi(t_vsi, &cd_type_cmd_tso_mss);
+	else
+		tso = i40e_tso(skb, &hdr_len, &cd_type_cmd_tso_mss);
 
 	if (tso < 0)
 		goto out_drop;
@@ -2998,3 +3064,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 
 	return i40e_xmit_frame_ring(skb, tx_ring);
 }
+
+netdev_tx_t i40e_vfpr_netdev_start_xmit(struct sk_buff *skb,
+				        struct net_device *dev)
+{
+	struct i40e_vfpr_netdev_priv *priv = netdev_priv(dev);
+	struct i40e_vf *vf = priv->vf;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+
+	skb_dst_drop(skb);
+	dst_hold(&priv->vfpr_dst->dst);
+	skb_dst_set(skb, &priv->vfpr_dst->dst);
+	skb->dev = vsi->netdev;
+
+	return dev_queue_xmit(skb);
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index e065321..850723f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -366,6 +366,7 @@ struct i40e_ring_container {
 
 bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+netdev_tx_t i40e_vfpr_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev);
 void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
 void i40e_clean_rx_ring(struct i40e_ring *rx_ring);
 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index edc0abd..c136cc9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -728,6 +728,9 @@ enum i40e_rx_desc_status_bits {
 #define I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT  I40E_RX_DESC_STATUS_TSYNVALID_SHIFT
 #define I40E_RXD_QW1_STATUS_TSYNVALID_MASK \
 				    BIT_ULL(I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT)
+#define I40E_RXD_QW1_STATUS_LPBK_SHIFT  I40E_RX_DESC_STATUS_LPBK_SHIFT
+#define I40E_RXD_QW1_STATUS_LPBK_MASK \
+				BIT_ULL(I40E_RXD_QW1_STATUS_LPBK_SHIFT)
 
 enum i40e_rx_desc_fltstat_values {
 	I40E_RX_DESC_FLTSTAT_NO_DATA	= 0,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 3ea7235..e3fded1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1062,6 +1062,7 @@ static int i40e_vfpr_netdev_stop(struct net_device *dev)
 static const struct net_device_ops i40e_vfpr_netdev_ops = {
 	.ndo_open       	= i40e_vfpr_netdev_open,
 	.ndo_stop       	= i40e_vfpr_netdev_stop,
+	.ndo_start_xmit		= i40e_vfpr_netdev_start_xmit,
 };
 
 /**
@@ -1121,16 +1122,22 @@ int i40e_alloc_vfpr_netdev(struct i40e_vf *vf, u16 vf_num)
 
 	priv = netdev_priv(vfpr_netdev);
 	priv->vf = &(pf->vf[vf_num]);
+	priv->vfpr_dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL);
+	priv->vfpr_dst->u.port_info.lower_dev = vsi->netdev;
+	priv->vfpr_dst->u.port_info.port_id = vf->vf_id;
 
 	vfpr_netdev->netdev_ops = &i40e_vfpr_netdev_ops;
 
 	netif_carrier_off(vfpr_netdev);
 	netif_tx_disable(vfpr_netdev);
 
+	eth_hw_addr_inherit(vfpr_netdev, vsi->netdev);
+
 	err = register_netdev(vfpr_netdev);
 	if (err) {
 		dev_err(&pf->pdev->dev, "register_netdev failed for vf: %s\n",
 			vf->vfpr_netdev->name);
+		dst_release((struct dst_entry *)priv->vfpr_dst);
 		free_netdev(vfpr_netdev);
 		return err;
 	}
@@ -1159,14 +1166,18 @@ int i40e_alloc_vfpr_netdev(struct i40e_vf *vf, u16 vf_num)
  **/
 void i40e_free_vfpr_netdev(struct i40e_vf *vf)
 {
+	struct i40e_vfpr_netdev_priv *priv;
 	struct i40e_pf *pf = vf->pf;
 
 	if (!vf->vfpr_netdev)
 		return;
 
+	priv = netdev_priv(vf->vfpr_netdev);
+
 	dev_info(&pf->pdev->dev, "Freeing VF Port representor(%s)\n",
 		 vf->vfpr_netdev->name);
 
+	dst_release((struct dst_entry *)priv->vfpr_dst);
 	unregister_netdev(vf->vfpr_netdev);
 	free_netdev(vf->vfpr_netdev);
 
@@ -1933,8 +1944,10 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx]))
 		aq_ret = I40E_ERR_TIMEOUT;
 
-	if ((0 == aq_ret) && vf->vfpr_netdev)
+	if ((0 == aq_ret) && vf->vfpr_netdev) {
+		netif_tx_start_all_queues(vf->vfpr_netdev);
 		netif_carrier_on(vf->vfpr_netdev);
+	}
 
 error_param:
 	/* send the response to the VF */
@@ -1975,8 +1988,10 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 	i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]);
 
-	if ((0 == aq_ret) && vf->vfpr_netdev)
+	if ((0 == aq_ret) && vf->vfpr_netdev) {
+		netif_tx_stop_all_queues(vf->vfpr_netdev);
 		netif_carrier_off(vf->vfpr_netdev);
+	}
 
 error_param:
 	/* send the response to the VF */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 1e5def1..3c3db8a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -74,6 +74,7 @@ enum i40e_vf_capabilities {
 
 /* VF Port representor netdev private structure */
 struct i40e_vfpr_netdev_priv {
+	struct metadata_dst *vfpr_dst;
 	struct i40e_vf *vf;
 };
 
-- 
2.5.5

^ permalink raw reply related

* [net-next PATCH 6/6] i40e: Add support for exposing VF port statistics via VFPR netdev on the host.
From: Sridhar Samudrala @ 2016-12-30  6:21 UTC (permalink / raw)
  To: alexander.h.duyck, john.r.fastabend, anjali.singhai,
	jakub.kicinski, intel-wired-lan, netdev
In-Reply-To: <1483078863-22026-1-git-send-email-sridhar.samudrala@intel.com>

By default stats counted by HW are returned via the original ndo_get_stats64()
api. Stats counted in SW are returned via ndo_get_offload_stats() api.

Small script to demonstrate vfpr stats in switchdev mode.
PF: enp5s0f0, VFs: enp5s2,enp5s2f1 VFPRs:enp5s0f0-vf0, enp5s0f0-vf1

# rmmod i40e; modprobe i40e
# devlink dev eswitch set pci/0000:05:00.0 mode switchdev
# echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs
# ip link set enp5s0f0 vf 0 mac 00:11:22:33:44:55
# ip link set enp5s0f0 vf 1 mac 00:11:22:33:44:56
# rmmod i40evf; modprobe i40evf

/* Create 2 namespaces and move the VFs to
# ip netns add ns0
# ip link set enp5s2 netns ns0
# ip netns exec ns0 ip addr add 192.168.1.10/24 dev enp5s2
# ip netns exec ns0 ip link set enp5s2 up
# ip netns add ns1
# ip link set enp5s2f1 netns ns1
# ip netns exec ns1 ip addr add 192.168.1.11/24 dev enp5s2f1
# ip netns exec ns1 ip link set enp5s2f1 up

/* bring up pf and vfpr netdevs */
# ip link set enp5s0f0 up
# ip link set enp5s0f0-vf0 up
# ip link set enp5s0f0-vf1 up

/* Create a linux bridge and add vfpr netdevs to it. */
# ip link add vfpr-br type bridge
# ip link set enp5s0f0-vf0 master vfpr-br
# ip link set enp5s0f0-vf1 master vfpr-br
# ip addr add 192.168.1.1/24 dev vfpr-br
# ip link set vfpr-br up

# ip netns exec ns0 ping -c3 192.168.1.11
# ip netns exec ns1 ping -c3 192.168.1.10

# ip netns exec ns0 ip -s l show enp5s2
56: enp5s2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000
    link/ether 00:11:22:33:44:55 brd ff:ff:ff:ff:ff:ff
    RX: bytes  packets  errors  dropped overrun mcast
    1468       18       0       0       0       0
    TX: bytes  packets  errors  dropped carrier collsns
    1398       17       0       0       0       0
# ip -s l show enp5s0f0-vf0
52: enp5s0f0-vf0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel master vfpr-br state UP mode DEFAULT group default qlen 1000
    link/ether 68:05:ca:2e:72:68 brd ff:ff:ff:ff:ff:ff
    RX: bytes  packets  errors  dropped overrun mcast
    1398       17       0       0       0       0
    TX: bytes  packets  errors  dropped carrier collsns
    1468       18       0       0       0       0
# ip netns exec ns1 ip -s l show enp5s2f1
57: enp5s2f1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000
    link/ether 00:11:22:33:44:56 brd ff:ff:ff:ff:ff:ff
    RX: bytes  packets  errors  dropped overrun mcast
    1486       18       0       0       0       0
    TX: bytes  packets  errors  dropped carrier collsns
    1538       19       0       0       0       0
# ip -s l show enp5s0f0-vf1
53: enp5s0f0-vf1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel master vfpr-br state UP mode DEFAULT group default qlen 1000
    link/ether 68:05:ca:2e:72:68 brd ff:ff:ff:ff:ff:ff
    RX: bytes  packets  errors  dropped overrun mcast
    1538       19       0       0       0       0
    TX: bytes  packets  errors  dropped carrier collsns
    1486       18       0       0       0       0

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c        |  44 +++++++-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 111 +++++++++++++++++++++
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h |  10 ++
 3 files changed, 163 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index b46ddaa..9f04337 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1172,6 +1172,32 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
 }
 
 /**
+ * i40e_vfpr_receive_skb
+ * @vf: pointer to VF
+ * @skb: packet to send up
+ *
+ * Update skb dev to vfpr netdev and rx stats.
+ **/
+static void i40e_vfpr_receive_skb(struct i40e_vf *vf, struct sk_buff *skb)
+{
+	struct i40e_vfpr_netdev_priv *priv;
+	struct vfpr_pcpu_stats *vfpr_stats;
+
+	if (!vf->vfpr_netdev)
+		return;
+
+	skb->dev = vf->vfpr_netdev;
+
+	priv = netdev_priv(vf->vfpr_netdev);
+	vfpr_stats = this_cpu_ptr(priv->vfpr_stats);
+
+	u64_stats_update_begin(&vfpr_stats->syncp);
+	vfpr_stats->rx_packets++;
+	vfpr_stats->rx_bytes += skb->len;
+	u64_stats_update_end(&vfpr_stats->syncp);
+}
+
+/**
  * i40e_receive_skb - Send a completed packet up the stack
  * @rx_ring:  rx ring in play
  * @skb: packet to send up
@@ -1201,7 +1227,7 @@ static void i40e_receive_skb(struct i40e_ring *rx_ring,
 	for (vf_id = 0; vf_id < pf->num_alloc_vfs; vf_id++) {
 		vf = &pf->vf[vf_id];
 		if (ether_addr_equal(eth->h_source, vf->default_lan_addr.addr)) {
-			skb->dev = vf->vfpr_netdev;
+			i40e_vfpr_receive_skb(vf, skb);
 			break;
 		}
 	}
@@ -3072,11 +3098,25 @@ netdev_tx_t i40e_vfpr_netdev_start_xmit(struct sk_buff *skb,
 	struct i40e_vf *vf = priv->vf;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	int ret;
 
 	skb_dst_drop(skb);
 	dst_hold(&priv->vfpr_dst->dst);
 	skb_dst_set(skb, &priv->vfpr_dst->dst);
 	skb->dev = vsi->netdev;
 
-	return dev_queue_xmit(skb);
+	ret = dev_queue_xmit(skb);
+
+	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
+		struct vfpr_pcpu_stats *vfpr_stats = this_cpu_ptr(priv->vfpr_stats);
+
+		u64_stats_update_begin(&vfpr_stats->syncp);
+		vfpr_stats->tx_packets++;
+		vfpr_stats->tx_bytes += skb->len;
+		u64_stats_update_end(&vfpr_stats->syncp);
+	} else {
+		this_cpu_inc(priv->vfpr_stats->tx_drops);
+	}
+
+	return ret;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index e3fded1..d4a1763 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1059,10 +1059,113 @@ static int i40e_vfpr_netdev_stop(struct net_device *dev)
 	return 0;
 }
 
+/**
+ * i40e_vfpr_netdev_get_stats64
+ * @dev: network interface device structure
+ * @stats: netlink stats structure
+ *
+ * Returns the hw statistics from the VSI corresponding to the associated VFPR
+ **/
+static struct rtnl_link_stats64 *
+i40e_vfpr_netdev_get_stats64(struct net_device *netdev,
+			     struct rtnl_link_stats64 *stats)
+{
+	struct i40e_vfpr_netdev_priv *priv = netdev_priv(netdev);
+	struct i40e_vf *vf = priv->vf;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi;
+	struct i40e_eth_stats *estats;
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	i40e_update_stats(vsi);
+
+	estats = &vsi->eth_stats;
+
+	/* TX and RX stats are flipped as we are returning the stats as seen
+	 * at the switch port correspoding to the VF.
+	 */
+	stats->rx_packets = estats->tx_unicast + estats->tx_multicast +
+			    estats->tx_broadcast;
+	stats->tx_packets = estats->rx_unicast + estats->rx_multicast +
+			    estats->rx_broadcast;
+	stats->rx_bytes = estats->tx_bytes;
+	stats->tx_bytes = estats->rx_bytes;
+	stats->rx_dropped = estats->tx_discards;
+	stats->tx_dropped = estats->rx_discards;
+
+	return stats;
+}
+
+
+/**
+ * i40e_vfpr_get_cpu_hit_stats64
+ * @dev: network interface device structure
+ * @stats: netlink stats structure
+ *
+ * stats are filled from the priv structure. correspond to the packets
+ * that are seen by the cpu and sent/received via vfpr netdev.
+ **/
+static int
+i40e_vfpr_get_cpu_hit_stats64(const struct net_device *dev,
+			      struct rtnl_link_stats64 *stats)
+{
+	struct i40e_vfpr_netdev_priv *priv = netdev_priv(dev);
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct vfpr_pcpu_stats *vfpr_stats;
+		u64 tbytes, tpkts, tdrops, rbytes, rpkts;
+		unsigned int start;
+
+		vfpr_stats = per_cpu_ptr(priv->vfpr_stats, i);
+		do {
+			start = u64_stats_fetch_begin_irq(&vfpr_stats->syncp);
+			tbytes = vfpr_stats->tx_bytes;
+			tpkts = vfpr_stats->tx_packets;
+			tdrops = vfpr_stats->tx_drops;
+			rbytes = vfpr_stats->rx_bytes;
+			rpkts = vfpr_stats->rx_packets;
+		} while (u64_stats_fetch_retry_irq(&vfpr_stats->syncp, start));
+		stats->tx_bytes += tbytes;
+		stats->tx_packets += tpkts;
+		stats->tx_dropped += tdrops;
+		stats->rx_bytes += rbytes;
+		stats->rx_packets += rpkts;
+	}
+
+	return 0;
+}
+
+static bool
+i40e_vfpr_netdev_has_offload_stats(const struct net_device *dev, int attr_id)
+{
+	switch (attr_id) {
+	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+		return true;
+	}
+
+	return false;
+}
+
+static int
+i40e_vfpr_netdev_get_offload_stats(int attr_id, const struct net_device *dev,
+				   void *sp)
+{
+	switch (attr_id) {
+	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+		return i40e_vfpr_get_cpu_hit_stats64(dev, sp);
+	}
+
+	return -EINVAL;
+}
+
 static const struct net_device_ops i40e_vfpr_netdev_ops = {
 	.ndo_open       	= i40e_vfpr_netdev_open,
 	.ndo_stop       	= i40e_vfpr_netdev_stop,
 	.ndo_start_xmit		= i40e_vfpr_netdev_start_xmit,
+	.ndo_get_stats64	= i40e_vfpr_netdev_get_stats64,
+	.ndo_has_offload_stats	= i40e_vfpr_netdev_has_offload_stats,
+	.ndo_get_offload_stats	= i40e_vfpr_netdev_get_offload_stats,
 };
 
 /**
@@ -1121,6 +1224,13 @@ int i40e_alloc_vfpr_netdev(struct i40e_vf *vf, u16 vf_num)
 	pf->vf[vf_num].vfpr_netdev = vfpr_netdev;
 
 	priv = netdev_priv(vfpr_netdev);
+	priv->vfpr_stats = netdev_alloc_pcpu_stats(struct vfpr_pcpu_stats);
+	if (!priv->vfpr_stats) {
+		dev_err(&pf->pdev->dev, "alloc_pcpu_stats failed for vf:%d\n",
+			vf_num);
+		free_netdev(vfpr_netdev);
+		return -ENOMEM;
+	}
 	priv->vf = &(pf->vf[vf_num]);
 	priv->vfpr_dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL);
 	priv->vfpr_dst->u.port_info.lower_dev = vsi->netdev;
@@ -1178,6 +1288,7 @@ void i40e_free_vfpr_netdev(struct i40e_vf *vf)
 		 vf->vfpr_netdev->name);
 
 	dst_release((struct dst_entry *)priv->vfpr_dst);
+	free_percpu(priv->vfpr_stats);
 	unregister_netdev(vf->vfpr_netdev);
 	free_netdev(vf->vfpr_netdev);
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 3c3db8a..faa6ea7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -72,10 +72,20 @@ enum i40e_vf_capabilities {
 	I40E_VIRTCHNL_VF_CAP_IWARP,
 };
 
+struct vfpr_pcpu_stats {
+	u64			tx_packets;
+	u64			tx_bytes;
+	u64			tx_drops;
+	u64			rx_packets;
+	u64 			rx_bytes;
+	struct u64_stats_sync	syncp;
+};
+
 /* VF Port representor netdev private structure */
 struct i40e_vfpr_netdev_priv {
 	struct metadata_dst *vfpr_dst;
 	struct i40e_vf *vf;
+	struct vfpr_pcpu_stats *vfpr_stats;
 };
 
 /* VF information structure */
-- 
2.5.5

^ permalink raw reply related

* Re: [PATCH] [media] gp8psk: fix spelling mistake: "firmare" -> "firmware"
From: Colin Ian King @ 2016-12-29 21:35 UTC (permalink / raw)
  To: VDR User
  Cc: Mauro Carvalho Chehab, Larry Finger, Chaoming Li, Kalle Valo,
	mailing list: linux-media, linux-wireless, netdev,
	Linux Kernel Mailing List
In-Reply-To: <CAA7C2qjAk6LqTru2zimRr4_JUYXK+4d8VENpyYXjyE0-eJ+RKQ@mail.gmail.com>

On 29/12/16 21:23, VDR User wrote:
>> -                       err("firmare chunk size bigger than 64 bytes.");
>> +                       err("firmware chunk size bigger than 64 bytes.");
> 
> Yup.
> 
>> -                        "HW don't support CMAC encrypiton, use software CMAC encrypiton\n");
>> +                        "HW don't support CMAC encryption, use software CMAC encryption\n");
> 
> Should be: "HW doesn't support CMAC encryption, use software CMAC
> encryption\n");
> 
Very true, I was so focused on the spelling I overlooked the grammar.
I'll re-send with that fixed.

Colin

^ permalink raw reply

* [PATCH][V2] [media] gp8psk: fix spelling mistake: "firmare" -> "firmware"
From: Colin King @ 2016-12-29 21:38 UTC (permalink / raw)
  To: Mauro Carvalho Chehab, Larry Finger, Chaoming Li, Kalle Valo,
	linux-media, linux-wireless, netdev
  Cc: linux-kernel

From: Colin Ian King <colin.king@canonical.com>

Trivial fix to spelling mistake in err message. Also change "don't" to
"does not".

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 drivers/media/usb/dvb-usb/gp8psk.c          | 2 +-
 drivers/net/wireless/realtek/rtlwifi/core.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/usb/dvb-usb/gp8psk.c b/drivers/media/usb/dvb-usb/gp8psk.c
index 2360e7e..26461f2 100644
--- a/drivers/media/usb/dvb-usb/gp8psk.c
+++ b/drivers/media/usb/dvb-usb/gp8psk.c
@@ -161,7 +161,7 @@ static int gp8psk_load_bcm4500fw(struct dvb_usb_device *d)
 			goto out_free;
 		}
 		if (buflen > 64) {
-			err("firmare chunk size bigger than 64 bytes.");
+			err("firmware chunk size bigger than 64 bytes.");
 			goto out_free;
 		}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index ded1493..732de0a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -1532,7 +1532,7 @@ static int rtl_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		key_type = AESCMAC_ENCRYPTION;
 		RT_TRACE(rtlpriv, COMP_SEC, DBG_DMESG, "alg:CMAC\n");
 		RT_TRACE(rtlpriv, COMP_SEC, DBG_DMESG,
-			 "HW don't support CMAC encrypiton, use software CMAC encrypiton\n");
+			 "HW does not support CMAC encryption, use software CMAC encryption\n");
 		err = -EOPNOTSUPP;
 		goto out_unlock;
 	default:
-- 
2.10.2

^ permalink raw reply related

* [PATCH net-next] net: dsa: Implement ndo_get_phys_port_id
From: Florian Fainelli @ 2016-12-29 22:20 UTC (permalink / raw)
  To: netdev
  Cc: Florian Fainelli, Andrew Lunn, Vivien Didelot, David S. Miller,
	open list

Implement ndo_get_phys_port_id() by returning the physical port number
of the switch this per-port DSA created network interface corresponds
to.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 net/dsa/slave.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 68c9eea00518..ffd91969b830 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -984,6 +984,17 @@ static void dsa_slave_poll_controller(struct net_device *dev)
 }
 #endif
 
+static int dsa_slave_get_phys_port_id(struct net_device *dev,
+				      struct netdev_phys_item_id *ppid)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+
+	ppid->id_len = sizeof(p->port);
+	memcpy(ppid->id, &p->port, ppid->id_len);
+
+	return 0;
+}
+
 void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
 {
 	ops->get_sset_count = dsa_cpu_port_get_sset_count;
@@ -1031,6 +1042,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_bridge_getlink	= switchdev_port_bridge_getlink,
 	.ndo_bridge_setlink	= switchdev_port_bridge_setlink,
 	.ndo_bridge_dellink	= switchdev_port_bridge_dellink,
+	.ndo_get_phys_port_id	= dsa_slave_get_phys_port_id,
 };
 
 static const struct switchdev_ops dsa_slave_switchdev_ops = {
-- 
2.9.3

^ permalink raw reply related

* [PATCH net] net: ipv4: dst for local input routes should use l3mdev if relevant
From: David Ahern @ 2016-12-29 23:29 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern

IPv4 output routes already use l3mdev device instead of loopback for dst's
if it is applicable. Change local input routes to do the same.

This fixes icmp responses for unreachable UDP ports which are directed
to the wrong table after commit 9d1a6c4ea43e4 because local_input
routes use the loopback device. Moving from ingress device to loopback
loses the L3 domain causing responses based on the dst to get to lost.

Fixes: 9d1a6c4ea43e4 ("net: icmp_route_lookup should use rt dev to
		       determine L3 domain")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
 net/ipv4/route.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a82a11747b3f..0fcac8e7a2b2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1914,7 +1914,8 @@ out:	return err;
 		}
 	}
 
-	rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type,
+	rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
+			   flags | RTCF_LOCAL, res.type,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
 	if (!rth)
 		goto e_nobufs;
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next] net: Allow IP_MULTICAST_IF to set index to L3 slave
From: David Ahern @ 2016-12-29 23:39 UTC (permalink / raw)
  To: netdev; +Cc: darwin.dingel, Mark.Tomlinson, David Ahern

IP_MULTICAST_IF fails if sk_bound_dev_if is already set and the new index
does not match it. e.g.,

    ntpd[15381]: setsockopt IP_MULTICAST_IF 192.168.1.23 fails: Invalid argument

Relax the check in setsockopt to allow setting mc_index to an L3 slave if
sk_bound_dev_if points to an L3 master.

Make a similar change for IPv6. In this case change the device lookup to
take the rcu_read_lock avoiding a refcnt. The rcu lock is also needed for
the lookup of a potential L3 master device.

This really only silences a setsockopt failure since uses of mc_index are
secondary to sk_bound_dev_if if it is set. In both cases, if either index
is an L3 slave or master, lookups are directed to the same FIB table so
relaxing the check at setsockopt time causes no harm.

Patch is based on a suggested change by Darwin for a problem noted in
their code base.

Suggested-by: Darwin Dingel <darwin.dingel@alliedtelesis.co.nz>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
 net/ipv4/ip_sockglue.c   |  7 ++++++-
 net/ipv6/ipv6_sockglue.c | 16 ++++++++++++----
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8b13881ed064..72071a9a348d 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -843,6 +843,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 	{
 		struct ip_mreqn mreq;
 		struct net_device *dev = NULL;
+		int midx;
 
 		if (sk->sk_type == SOCK_STREAM)
 			goto e_inval;
@@ -887,11 +888,15 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		err = -EADDRNOTAVAIL;
 		if (!dev)
 			break;
+
+		midx = l3mdev_master_ifindex(dev);
+
 		dev_put(dev);
 
 		err = -EINVAL;
 		if (sk->sk_bound_dev_if &&
-		    mreq.imr_ifindex != sk->sk_bound_dev_if)
+		    mreq.imr_ifindex != sk->sk_bound_dev_if &&
+		    (!midx || midx != sk->sk_bound_dev_if))
 			break;
 
 		inet->mc_index = mreq.imr_ifindex;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 3ba530373560..bdf1227fd433 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -595,16 +595,24 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 
 		if (val) {
 			struct net_device *dev;
+			int midx;
 
-			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
-				goto e_inval;
+			rcu_read_lock();
 
-			dev = dev_get_by_index(net, val);
+			dev = dev_get_by_index_rcu(net, val);
 			if (!dev) {
+				rcu_read_unlock();
 				retv = -ENODEV;
 				break;
 			}
-			dev_put(dev);
+			midx = l3mdev_master_ifindex_rcu(dev);
+
+			rcu_read_unlock();
+
+			if (sk->sk_bound_dev_if &&
+			    sk->sk_bound_dev_if != val &&
+			    (!midx || midx != sk->sk_bound_dev_if))
+				goto e_inval;
 		}
 		np->mcast_oif = val;
 		retv = 0;
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next] liquidio: optimize reads from Octeon PCI console
From: Felix Manlunas @ 2016-12-30  1:04 UTC (permalink / raw)
  To: davem; +Cc: netdev, raghu.vatsavayi, derek.chickles, satananda.burla

Reads from Octeon PCI console are inefficient because before each read
operation, a dynamic mapping to Octeon DRAM is set up.  This patch replaces
the repeated setup of a dynamic mapping with a one-time setup of a static
mapping.

Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@cavium.com>
Signed-off-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Satanand Burla <satananda.burla@cavium.com>
---
 .../net/ethernet/cavium/liquidio/octeon_config.h    | 10 +++-------
 .../net/ethernet/cavium/liquidio/octeon_console.c   | 10 ++++++++++
 .../net/ethernet/cavium/liquidio/octeon_device.h    |  6 ++++++
 .../net/ethernet/cavium/liquidio/octeon_mem_ops.c   | 21 ++++++++++++++++++++-
 4 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
index 1cb3514..b3dc2e9 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
@@ -429,15 +429,11 @@ struct octeon_config {
 
 /* The following config values are fixed and should not be modified. */
 
-/* Maximum address space to be mapped for Octeon's BAR1 index-based access. */
-#define  MAX_BAR1_MAP_INDEX                     2
+#define  BAR1_INDEX_DYNAMIC_MAP          2
+#define  BAR1_INDEX_STATIC_MAP          15
 #define  OCTEON_BAR1_ENTRY_SIZE         (4 * 1024 * 1024)
 
-/* BAR1 Index 0 to (MAX_BAR1_MAP_INDEX - 1) for normal mapped memory access.
- * Bar1 register at MAX_BAR1_MAP_INDEX used by driver for dynamic access.
- */
-#define  MAX_BAR1_IOREMAP_SIZE  ((MAX_BAR1_MAP_INDEX + 1) * \
-				 OCTEON_BAR1_ENTRY_SIZE)
+#define  MAX_BAR1_IOREMAP_SIZE  (16 * OCTEON_BAR1_ENTRY_SIZE)
 
 /* Response lists - 1 ordered, 1 unordered-blocking, 1 unordered-nonblocking
  * NoResponse Lists are now maintained with each IQ. (Dec' 2007).
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
index 3265e0b..42b673d 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
@@ -549,6 +549,16 @@ int octeon_init_consoles(struct octeon_device *oct)
 		return ret;
 	}
 
+	/* Dedicate one of Octeon's BAR1 index registers to create a static
+	 * mapping to a region of Octeon DRAM that contains the PCI console
+	 * named block.
+	 */
+	oct->console_nb_info.bar1_index = BAR1_INDEX_STATIC_MAP;
+	oct->fn_list.bar1_idx_setup(oct, addr, oct->console_nb_info.bar1_index,
+				    true);
+	oct->console_nb_info.dram_region_base = addr
+		& ~(OCTEON_BAR1_ENTRY_SIZE - 1ULL);
+
 	/* num_consoles > 0, is an indication that the consoles
 	 * are accessible
 	 */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 18f6836..c301a38 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -477,6 +477,12 @@ struct octeon_device {
 	/* Console caches */
 	struct octeon_console console[MAX_OCTEON_MAPS];
 
+	/* Console named block info */
+	struct {
+		u64 dram_region_base;
+		int bar1_index;
+	} console_nb_info;
+
 	/* Coprocessor clock rate. */
 	u64 coproc_clock_rate;
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
index 13a18c9..5cd96e7 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
@@ -23,7 +23,7 @@
 #include "response_manager.h"
 #include "octeon_device.h"
 
-#define MEMOPS_IDX   MAX_BAR1_MAP_INDEX
+#define MEMOPS_IDX   BAR1_INDEX_DYNAMIC_MAP
 
 #ifdef __BIG_ENDIAN_BITFIELD
 static inline void
@@ -96,6 +96,25 @@ __octeon_pci_rw_core_mem(struct octeon_device *oct, u64 addr,
 	u32 copy_len = 0, index_reg_val = 0;
 	unsigned long flags;
 	u8 __iomem *mapped_addr;
+	u64 static_mapping_base;
+
+	static_mapping_base = oct->console_nb_info.dram_region_base;
+
+	if (static_mapping_base &&
+	    static_mapping_base == (addr & ~(OCTEON_BAR1_ENTRY_SIZE - 1ULL))) {
+		int bar1_index = oct->console_nb_info.bar1_index;
+
+		mapped_addr = oct->mmio[1].hw_addr
+			+ (bar1_index << ilog2(OCTEON_BAR1_ENTRY_SIZE))
+			+ (addr & (OCTEON_BAR1_ENTRY_SIZE - 1ULL));
+
+		if (op)
+			octeon_pci_fastread(oct, mapped_addr, hostbuf, len);
+		else
+			octeon_pci_fastwrite(oct, mapped_addr, hostbuf, len);
+
+		return;
+	}
 
 	spin_lock_irqsave(&oct->mem_access_lock, flags);
 

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox