Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next] sctp: refactor sctp_datamsg_from_user
From: Marcelo Ricardo Leitner @ 2016-12-29 17:53 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Neil Horman, Vlad Yasevich

This patch refactors sctp_datamsg_from_user() in an attempt to make it
better to read and avoid code duplication for handling the last
fragment.

It also avoids doing division and remaining operations. Even though, it
should still operate similarly as before this patch.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 net/sctp/chunk.c | 107 +++++++++++++++++--------------------------------------
 1 file changed, 32 insertions(+), 75 deletions(-)

diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 615f0ddd41dfb1ff46a9d4e564716de8e7b60ea6..e3621cb4827fadb5f5cb41ebe8455dfa3300a765 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -165,14 +165,12 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 					    struct sctp_sndrcvinfo *sinfo,
 					    struct iov_iter *from)
 {
-	int max, whole, i, offset, over, err;
-	int len, first_len;
-	int max_data;
+	size_t len, first_len, max_data, remaining;
+	size_t msg_len = iov_iter_count(from);
+	struct list_head *pos, *temp;
 	struct sctp_chunk *chunk;
 	struct sctp_datamsg *msg;
-	struct list_head *pos, *temp;
-	size_t msg_len = iov_iter_count(from);
-	__u8 frag;
+	int err;
 
 	msg = sctp_datamsg_new(GFP_KERNEL);
 	if (!msg)
@@ -185,7 +183,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	    (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags) ||
 	     !SCTP_PR_POLICY(sinfo->sinfo_flags)))
 		msg->expires_at = jiffies +
-				    msecs_to_jiffies(sinfo->sinfo_timetolive);
+				  msecs_to_jiffies(sinfo->sinfo_timetolive);
 
 	/* This is the biggest possible DATA chunk that can fit into
 	 * the packet
@@ -195,7 +193,6 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 		   sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk);
 	max_data = SCTP_TRUNC4(max_data);
 
-	max = asoc->frag_point;
 	/* If the the peer requested that we authenticate DATA chunks
 	 * we need to account for bundling of the AUTH chunks along with
 	 * DATA.
@@ -208,12 +205,11 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 					      hmac_desc->hmac_len);
 	}
 
-	/* Now, check if we need to reduce our max */
-	if (max > max_data)
-		max = max_data;
+	/* Check what's our max considering the above */
+	max_data = min_t(size_t, max_data, asoc->frag_point);
 
-	whole = 0;
-	first_len = max;
+	/* Set first_len and then account for possible bundles on first frag */
+	first_len = max_data;
 
 	/* Check to see if we have a pending SACK and try to let it be bundled
 	 * with this message.  Do this if we don't have any data queued already.
@@ -224,40 +220,38 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	if (timer_pending(&asoc->timers[SCTP_EVENT_TIMEOUT_SACK]) &&
 	    asoc->outqueue.out_qlen == 0 &&
 	    list_empty(&asoc->outqueue.retransmit) &&
-	    msg_len > max)
-		max_data -= SCTP_PAD4(sizeof(sctp_sack_chunk_t));
+	    msg_len > max_data)
+		first_len -= SCTP_PAD4(sizeof(sctp_sack_chunk_t));
 
 	/* Encourage Cookie-ECHO bundling. */
 	if (asoc->state < SCTP_STATE_COOKIE_ECHOED)
-		max_data -= SCTP_ARBITRARY_COOKIE_ECHO_LEN;
-
-	/* Now that we adjusted completely, reset first_len */
-	if (first_len > max_data)
-		first_len = max_data;
+		first_len -= SCTP_ARBITRARY_COOKIE_ECHO_LEN;
 
 	/* Account for a different sized first fragment */
 	if (msg_len >= first_len) {
-		msg_len -= first_len;
-		whole = 1;
 		msg->can_delay = 0;
-	}
-
-	/* How many full sized?  How many bytes leftover? */
-	whole += msg_len / max;
-	over = msg_len % max;
-	offset = 0;
-
-	if ((whole > 1) || (whole && over))
 		SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS);
+	} else {
+		/* Which may be the only one... */
+		first_len = msg_len;
+	}
 
-	/* Create chunks for all the full sized DATA chunks. */
-	for (i = 0, len = first_len; i < whole; i++) {
-		frag = SCTP_DATA_MIDDLE_FRAG;
+	/* Create chunks for all DATA chunks. */
+	for (remaining = msg_len; remaining; remaining -= len) {
+		u8 frag = SCTP_DATA_MIDDLE_FRAG;
 
-		if (0 == i)
+		if (remaining == msg_len) {
+			/* First frag, which may also be the last */
 			frag |= SCTP_DATA_FIRST_FRAG;
+			len = first_len;
+		} else {
+			/* Middle frags */
+			len = max_data;
+		}
 
-		if ((i == (whole - 1)) && !over) {
+		if (len >= remaining) {
+			/* Last frag, which may also be the first */
+			len = remaining;
 			frag |= SCTP_DATA_LAST_FRAG;
 
 			/* The application requests to set the I-bit of the
@@ -271,7 +265,6 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 
 		chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag,
 						 0, GFP_KERNEL);
-
 		if (!chunk) {
 			err = -ENOMEM;
 			goto errout;
@@ -282,45 +275,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 			goto errout_chunk_free;
 
 		/* Put the chunk->skb back into the form expected by send.  */
-		__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr
-			   - (__u8 *)chunk->skb->data);
-
-		sctp_datamsg_assign(msg, chunk);
-		list_add_tail(&chunk->frag_list, &msg->chunks);
-
-		/* The first chunk, the first chunk was likely short
-		 * to allow bundling, so reset to full size.
-		 */
-		if (0 == i)
-			len = max;
-	}
-
-	/* .. now the leftover bytes. */
-	if (over) {
-		if (!whole)
-			frag = SCTP_DATA_NOT_FRAG;
-		else
-			frag = SCTP_DATA_LAST_FRAG;
-
-		if ((sinfo->sinfo_flags & SCTP_EOF) ||
-		    (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY))
-			frag |= SCTP_DATA_SACK_IMM;
-
-		chunk = sctp_make_datafrag_empty(asoc, sinfo, over, frag,
-						 0, GFP_KERNEL);
-
-		if (!chunk) {
-			err = -ENOMEM;
-			goto errout;
-		}
-
-		err = sctp_user_addto_chunk(chunk, over, from);
-
-		/* Put the chunk->skb back into the form expected by send.  */
-		__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr
-			   - (__u8 *)chunk->skb->data);
-		if (err < 0)
-			goto errout_chunk_free;
+		__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr -
+				       chunk->skb->data);
 
 		sctp_datamsg_assign(msg, chunk);
 		list_add_tail(&chunk->frag_list, &msg->chunks);
@@ -338,6 +294,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 		sctp_chunk_free(chunk);
 	}
 	sctp_datamsg_put(msg);
+
 	return ERR_PTR(err);
 }
 
-- 
2.9.3

^ permalink raw reply related

* Re: [PATCH iproute2 net-next] tc: flower: support matching flags
From: Stephen Hemminger @ 2016-12-29 18:43 UTC (permalink / raw)
  To: Paul Blakey
  Cc: netdev, David S. Miller, Hadar Hen Zion, Or Gerlitz, Roi Dayan
In-Reply-To: <1482930409-55059-1-git-send-email-paulb@mellanox.com>

On Wed, 28 Dec 2016 15:06:49 +0200
Paul Blakey <paulb@mellanox.com> wrote:

> Enhance flower to support matching on flags.
> 
> The 1st flag allows to match on whether the packet is
> an IP fragment.
> 
> Example:
> 
> 	# add a flower filter that will drop fragmented packets
> 	# (bit 0 of control flags)
> 	tc filter add dev ens4f0 protocol ip parent ffff: \
> 		flower \
> 		src_mac e4:1d:2d:fd:8b:01 \
> 		dst_mac e4:1d:2d:fd:8b:02 \
> 		indev ens4f0 \
> 		matching_flags 0x1/0x1 \
> 	action drop
> 
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
> Reviewed-by: Roi Dayan <roid@mellanox.com>

Applied. Had to manually fixup merge conflicts with other flower changes.

^ permalink raw reply

* Re: [PATCH iproute2] fix typo in ip-xfrm man page, rmd610 -> rmd160
From: Stephen Hemminger @ 2016-12-29 18:44 UTC (permalink / raw)
  To: Alexey Kodanev; +Cc: netdev, Vasily Isaenko
In-Reply-To: <1482490996-17048-1-git-send-email-alexey.kodanev@oracle.com>

On Fri, 23 Dec 2016 14:03:16 +0300
Alexey Kodanev <alexey.kodanev@oracle.com> wrote:

> Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>


Applied.

^ permalink raw reply

* Re: [PATCH] tc: add missing limits.h header
From: Stephen Hemminger @ 2016-12-29 18:44 UTC (permalink / raw)
  To: Baruch Siach; +Cc: netdev
In-Reply-To: <c18609c5d906bbba8176138cfafd88c4a38c8190.1482432767.git.baruch@tkos.co.il>

On Thu, 22 Dec 2016 20:52:48 +0200
Baruch Siach <baruch@tkos.co.il> wrote:

> This fixes under musl build issues like:
> 
> f_matchall.c: In function ‘matchall_parse_opt’:
> f_matchall.c:48:12: error: ‘LONG_MIN’ undeclared (first use in this function)
>    if (h == LONG_MIN || h == LONG_MAX) {
>             ^
> f_matchall.c:48:12: note: each undeclared identifier is reported only once for each function it appears in
> f_matchall.c:48:29: error: ‘LONG_MAX’ undeclared (first use in this function)
>    if (h == LONG_MIN || h == LONG_MAX) {
>                              ^
> 
> Signed-off-by: Baruch Siach <baruch@tkos.co.il>

Sure, applied

^ permalink raw reply

* Re: [PATCH net] rtnl: stats - add missing netlink message size checks
From: David Miller @ 2016-12-29 19:06 UTC (permalink / raw)
  To: minipli; +Cc: netdev, roopa
In-Reply-To: <1482943935-18052-1-git-send-email-minipli@googlemail.com>

From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 28 Dec 2016 17:52:15 +0100

> We miss to check if the netlink message is actually big enough to contain
> a struct if_stats_msg.
> 
> Add a check to prevent userland from sending us short messages that would
> make us access memory beyond the end of the message.
> 
> Fixes: 10c9ead9f3c6 ("rtnetlink: add new RTM_GETSTATS message to dump...")
> Signed-off-by: Mathias Krause <minipli@googlemail.com>

Looks good, applied and queued up for -stable.

^ permalink raw reply

* Re: [PATCH v2] net: fix incorrect original ingress device index in PKTINFO
From: David Miller @ 2016-12-29 19:08 UTC (permalink / raw)
  To: asuka.com; +Cc: kuznet, jmorris, yoshfuji, kaber, dsa, netdev, linux-kernel
In-Reply-To: <1483001104-17614-1-git-send-email-asuka.com@163.com>

From: Wei Zhang <asuka.com@163.com>
Date: Thu, 29 Dec 2016 16:45:04 +0800

> When we send a packet for our own local address on a non-loopback
> interface (e.g. eth0), due to the change had been introduced from
> commit 0b922b7a829c ("net: original ingress device index in PKTINFO"), the
> original ingress device index would be set as the loopback interface.
> However, the packet should be considered as if it is being arrived via the
> sending interface (eth0), otherwise it would break the expectation of the
> userspace application (e.g. the DHCPRELEASE message from dhcp_release
> binary would be ignored by the dnsmasq daemon, since it come from lo which
> is not the interface dnsmasq bind to)
> 
> Fixes: 0b922b7a829c ("net: original ingress device index in PKTINFO")
> Acked-by: David Ahern <dsa@cumulusnetworks.com>
> Signed-off-by: Wei Zhang <asuka.com@163.com>

Applied and queued up for -stable.

^ permalink raw reply

* Re: [PATCH v4] net: dev_weight: TX/RX orthogonality
From: David Miller @ 2016-12-29 19:08 UTC (permalink / raw)
  To: matthias.tafelmeier; +Cc: netdev, hagen, fw, edumazet, daniel
In-Reply-To: <1483005521-27799-1-git-send-email-matthias.tafelmeier@gmx.net>

From: Matthias Tafelmeier <matthias.tafelmeier@gmx.net>
Date: Thu, 29 Dec 2016 10:58:41 +0100

> Oftenly, introducing side effects on packet processing on the other half
> of the stack by adjusting one of TX/RX via sysctl is not desirable.
> There are cases of demand for asymmetric, orthogonal configurability.
> 
> This holds true especially for nodes where RPS for RFS usage on top is
> configured and therefore use the 'old dev_weight'. This is quite a
> common base configuration setup nowadays, even with NICs of superior processing
> support (e.g. aRFS).
> 
> A good example use case are nodes acting as noSQL data bases with a
> large number of tiny requests and rather fewer but large packets as responses.
> It's affordable to have large budget and rx dev_weights for the
> requests. But as a side effect having this large a number on TX
> processed in one run can overwhelm drivers.
> 
> This patch therefore introduces an independent configurability via sysctl to
> userland.

This is missing a signoff.

^ permalink raw reply

* Re: [PATCH net 0/5] mlx4 misc fixes
From: David Miller @ 2016-12-29 19:18 UTC (permalink / raw)
  To: tariqt; +Cc: netdev, eranbe
In-Reply-To: <1483029433-3624-1-git-send-email-tariqt@mellanox.com>

From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 29 Dec 2016 18:37:08 +0200

> This patchset contains several bug fixes from the team to the
> mlx4 Eth and Core drivers.
> 
> Series generated against net commit:
> 60133867f1f1 'net: wan: slic_ds26522: fix spelling mistake: "configurated" -> "configured"'

Series applied, thank you.

^ permalink raw reply

* [PATCH v4] net: dev_weight: TX/RX orthogonality
From: Matthias Tafelmeier @ 2016-12-29 19:23 UTC (permalink / raw)
  To: netdev; +Cc: hagen, fw, edumazet, daniel
In-Reply-To: <20161229.140854.1456743873101323068.davem@davemloft.net>

Oftenly, introducing side effects on packet processing on the other half
of the stack by adjusting one of TX/RX via sysctl is not desirable.
There are cases of demand for asymmetric, orthogonal configurability.

This holds true especially for nodes where RPS for RFS usage on top is
configured and therefore use the 'old dev_weight'. This is quite a
common base configuration setup nowadays, even with NICs of superior processing
support (e.g. aRFS).

A good example use case are nodes acting as noSQL data bases with a
large number of tiny requests and rather fewer but large packets as responses.
It's affordable to have large budget and rx dev_weights for the
requests. But as a side effect having this large a number on TX
processed in one run can overwhelm drivers.

This patch therefore introduces an independent configurability via sysctl to
userland.

Signed-off-by: Matthias Tafelmeier <matthias.tafelmeier@gmx.net>
---
 Documentation/sysctl/net.txt | 21 +++++++++++++++++++++
 include/linux/netdevice.h    |  4 ++++
 net/core/dev.c               |  6 +++++-
 net/core/sysctl_net_core.c   | 31 ++++++++++++++++++++++++++++++-
 net/sched/sch_generic.c      |  2 +-
 5 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index f0480f7..53cef32 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -61,6 +61,27 @@ The maximum number of packets that kernel can handle on a NAPI interrupt,
 it's a Per-CPU variable.
 Default: 64
 
+dev_weight_rx_bias
+--------------
+
+RPS (e.g. RFS, aRFS) processing is competing with the registered NAPI poll function
+of the driver for the per softirq cycle netdev_budget. This parameter influences
+the proportion of the configured netdev_budget that is spent on RPS based packet
+processing during RX softirq cycles. It is further meant for making current
+dev_weight adaptable for asymmetric CPU needs on RX/TX side of the network stack.
+(see dev_weight_tx_bias) It is effective on a per CPU basis. Determination is based
+on dev_weight and is calculated multiplicative (dev_weight * dev_weight_rx_bias).
+Default: 1
+
+dev_weight_tx_bias
+--------------
+
+Scales the maximum number of packets that can be processed during a TX softirq cycle.
+Effective on a per CPU basis. Allows scaling of current dev_weight for asymmetric
+net stack processing needs. Be careful to avoid making TX softirq processing a CPU hog.
+Calculation is based on dev_weight (dev_weight * dev_weight_tx_bias).
+Default: 1
+
 default_qdisc
 --------------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 994f742..ecd78b3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3795,6 +3795,10 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
+extern int		dev_weight_rx_bias;
+extern int		dev_weight_tx_bias;
+extern int		dev_rx_weight;
+extern int		dev_tx_weight;
 
 bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
 struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 8db5a0b..f2fe98b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3428,6 +3428,10 @@ EXPORT_SYMBOL(netdev_max_backlog);
 int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
+int dev_weight_rx_bias __read_mostly = 1;            /* bias for backlog weight */
+int dev_weight_tx_bias __read_mostly = 1;            /* bias for output_queue quota */
+int dev_rx_weight __read_mostly = weight_p;
+int dev_tx_weight __read_mostly = weight_p;
 
 /* Called with irq disabled */
 static inline void ____napi_schedule(struct softnet_data *sd,
@@ -4833,7 +4837,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		net_rps_action_and_irq_enable(sd);
 	}
 
-	napi->weight = weight_p;
+	napi->weight = dev_rx_weight;
 	while (again) {
 		struct sk_buff *skb;
 
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 2a46e40..698ddd7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -222,6 +222,21 @@ static int set_default_qdisc(struct ctl_table *table, int write,
 }
 #endif
 
+static int proc_do_dev_weight(struct ctl_table *table, int write,
+			   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (ret != 0)
+		return ret;
+
+	dev_rx_weight = weight_p * dev_weight_rx_bias;
+	dev_tx_weight = weight_p * dev_weight_tx_bias;
+
+	return ret;
+}
+
 static int proc_do_rss_key(struct ctl_table *table, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -273,7 +288,21 @@ static struct ctl_table net_core_table[] = {
 		.data		= &weight_p,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_do_dev_weight,
+	},
+	{
+		.procname	= "dev_weight_rx_bias",
+		.data		= &dev_weight_rx_bias,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_dev_weight,
+	},
+	{
+		.procname	= "dev_weight_tx_bias",
+		.data		= &dev_weight_tx_bias,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_dev_weight,
 	},
 	{
 		.procname	= "netdev_max_backlog",
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6eb9c8e..b052b27 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -247,7 +247,7 @@ static inline int qdisc_restart(struct Qdisc *q, int *packets)
 
 void __qdisc_run(struct Qdisc *q)
 {
-	int quota = weight_p;
+	int quota = dev_tx_weight;
 	int packets;
 
 	while (qdisc_restart(q, &packets)) {
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH net-next 00/14] bnxt_en: updates for net-next.
From: David Miller @ 2016-12-29 19:42 UTC (permalink / raw)
  To: michael.chan; +Cc: netdev
In-Reply-To: <1483031624-20076-1-git-send-email-michael.chan@broadcom.com>

From: Michael Chan <michael.chan@broadcom.com>
Date: Thu, 29 Dec 2016 12:13:30 -0500

> This patch series for net-next contains cleanups, new features and minor
> fixes.  The driver specific busy polling code is removed to use busy
> polling support in core networking.  Hardware RFS support is enhanced with
> added ipv6 flows support and VF support.  A new scheme to allocate TX
> rings from the firmware is implemented for newer chips and firmware.  Plus
> some misc. cleanups, minor fixes, and to add the maintainer entry.  Please
> review.

Looks good, series applied, thanks Michael.

^ permalink raw reply

* Re: [PATCH net-next] sctp: refactor sctp_datamsg_from_user
From: David Miller @ 2016-12-29 19:44 UTC (permalink / raw)
  To: marcelo.leitner; +Cc: netdev, linux-sctp, nhorman, vyasevich
In-Reply-To: <ba007bd30f61decbdae5c23541cbdb397c030b68.1483033905.git.marcelo.leitner@gmail.com>

From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Thu, 29 Dec 2016 15:53:28 -0200

> This patch refactors sctp_datamsg_from_user() in an attempt to make it
> better to read and avoid code duplication for handling the last
> fragment.
> 
> It also avoids doing division and remaining operations. Even though, it
> should still operate similarly as before this patch.
> 
> Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH v4] net: dev_weight: TX/RX orthogonality
From: David Miller @ 2016-12-29 19:44 UTC (permalink / raw)
  To: matthias.tafelmeier; +Cc: netdev, hagen, fw, edumazet, daniel
In-Reply-To: <1483039398-28017-1-git-send-email-matthias.tafelmeier@gmx.net>

From: Matthias Tafelmeier <matthias.tafelmeier@gmx.net>
Date: Thu, 29 Dec 2016 20:23:18 +0100

> Oftenly, introducing side effects on packet processing on the other half
> of the stack by adjusting one of TX/RX via sysctl is not desirable.
> There are cases of demand for asymmetric, orthogonal configurability.
> 
> This holds true especially for nodes where RPS for RFS usage on top is
> configured and therefore use the 'old dev_weight'. This is quite a
> common base configuration setup nowadays, even with NICs of superior processing
> support (e.g. aRFS).
> 
> A good example use case are nodes acting as noSQL data bases with a
> large number of tiny requests and rather fewer but large packets as responses.
> It's affordable to have large budget and rx dev_weights for the
> requests. But as a side effect having this large a number on TX
> processed in one run can overwhelm drivers.
> 
> This patch therefore introduces an independent configurability via sysctl to
> userland.
> 
> Signed-off-by: Matthias Tafelmeier <matthias.tafelmeier@gmx.net>

Applied.

^ permalink raw reply

* Re: [PATCH v4] net: dev_weight: TX/RX orthogonality
From: David Miller @ 2016-12-29 19:45 UTC (permalink / raw)
  To: matthias.tafelmeier; +Cc: netdev, hagen, fw, edumazet, daniel
In-Reply-To: <20161229.144456.2187353715676829840.davem@davemloft.net>


Actually, reverted, you didn't even build test this:

net/core/dev.c:3433:35: error: initializer element is not constant
 int dev_rx_weight __read_mostly = weight_p;
                                   ^~~~~~~~
net/core/dev.c:3434:35: error: initializer element is not constant
 int dev_tx_weight __read_mostly = weight_p;
                                   ^~~~~~~~

^ permalink raw reply

* Re: [PATCH v4] net: dev_weight: TX/RX orthogonality
From: Matthias Tafelmeier @ 2016-12-29 19:53 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, hagen, fw, edumazet, daniel
In-Reply-To: <20161229.144555.1740958763290967121.davem@davemloft.net>


[-- Attachment #1.1.1: Type: text/plain, Size: 435 bytes --]


> Actually, reverted, you didn't even build test this:
>
> net/core/dev.c:3433:35: error: initializer element is not constant
>  int dev_rx_weight __read_mostly = weight_p;
>                                    ^~~~~~~~
> net/core/dev.c:3434:35: error: initializer element is not constant
>  int dev_tx_weight __read_mostly = weight_p;
>                                    ^~~~~~~~

Thought I would have ... let me check.


[-- Attachment #1.1.2: 0x8ADF343B.asc --]
[-- Type: application/pgp-keys, Size: 4806 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 538 bytes --]

^ permalink raw reply

* [PATCH] wlcore: fix spelling mistake in wl1271_warning
From: Colin King @ 2016-12-29 20:14 UTC (permalink / raw)
  To: Kalle Valo, Shahar Patury, Guy Mishol, linux-wireless, netdev
  Cc: linux-kernel

From: Colin Ian King <colin.king@canonical.com>

trivial fix to spelling mistake of function name in wl1271_warning,
should be dynamic_ps_timeout instead of dyanmic_ps_timeout.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 drivers/net/wireless/ti/wlcore/debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ti/wlcore/debugfs.c b/drivers/net/wireless/ti/wlcore/debugfs.c
index 7f672f6..58e148d 100644
--- a/drivers/net/wireless/ti/wlcore/debugfs.c
+++ b/drivers/net/wireless/ti/wlcore/debugfs.c
@@ -281,7 +281,7 @@ static ssize_t dynamic_ps_timeout_write(struct file *file,
 	}
 
 	if (value < 1 || value > 65535) {
-		wl1271_warning("dyanmic_ps_timeout is not in valid range");
+		wl1271_warning("dynamic_ps_timeout is not in valid range");
 		return -ERANGE;
 	}
 
-- 
2.10.2

^ permalink raw reply related

* Re: [PATCH] stmmac: adding EEE to GMAC4
From: David Miller @ 2016-12-29 20:14 UTC (permalink / raw)
  To: Joao.Pinto
  Cc: peppe.cavallaro, alexandre.torgue, rayagond, linux-kernel, netdev
In-Reply-To: <f89aaac8024e2a92834088545ad72fbe682b32ed.1483030665.git.jpinto@synopsys.com>

From: Joao Pinto <Joao.Pinto@synopsys.com>
Date: Thu, 29 Dec 2016 17:10:27 +0000

> This patch adds Energy Efficiency Ethernet to GMAC4.
> 
> Signed-off-by: Joao Pinto <jpinto@synopsys.com>

Applied, thanks.

^ permalink raw reply

* [PATCH] [media] gp8psk: fix spelling mistake: "firmare" -> "firmware"
From: Colin King @ 2016-12-29 20:29 UTC (permalink / raw)
  To: Mauro Carvalho Chehab, Larry Finger, Chaoming Li, Kalle Valo,
	linux-media, linux-wireless, netdev
  Cc: linux-kernel

From: Colin Ian King <colin.king@canonical.com>

trivial fix to spelling mistake in err message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 drivers/media/usb/dvb-usb/gp8psk.c          | 2 +-
 drivers/net/wireless/realtek/rtlwifi/core.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/usb/dvb-usb/gp8psk.c b/drivers/media/usb/dvb-usb/gp8psk.c
index 2360e7e..26461f2 100644
--- a/drivers/media/usb/dvb-usb/gp8psk.c
+++ b/drivers/media/usb/dvb-usb/gp8psk.c
@@ -161,7 +161,7 @@ static int gp8psk_load_bcm4500fw(struct dvb_usb_device *d)
 			goto out_free;
 		}
 		if (buflen > 64) {
-			err("firmare chunk size bigger than 64 bytes.");
+			err("firmware chunk size bigger than 64 bytes.");
 			goto out_free;
 		}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index ded1493..732de0a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -1532,7 +1532,7 @@ static int rtl_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		key_type = AESCMAC_ENCRYPTION;
 		RT_TRACE(rtlpriv, COMP_SEC, DBG_DMESG, "alg:CMAC\n");
 		RT_TRACE(rtlpriv, COMP_SEC, DBG_DMESG,
-			 "HW don't support CMAC encrypiton, use software CMAC encrypiton\n");
+			 "HW don't support CMAC encryption, use software CMAC encryption\n");
 		err = -EOPNOTSUPP;
 		goto out_unlock;
 	default:
-- 
2.10.2

^ permalink raw reply related

* [PATCH v5] net: dev_weight: TX/RX orthogonality
From: Matthias Tafelmeier @ 2016-12-29 20:37 UTC (permalink / raw)
  To: netdev; +Cc: hagen, fw, edumazet, daniel
In-Reply-To: <20161229.144555.1740958763290967121.davem@davemloft.net>

Oftenly, introducing side effects on packet processing on the other half
of the stack by adjusting one of TX/RX via sysctl is not desirable.
There are cases of demand for asymmetric, orthogonal configurability.

This holds true especially for nodes where RPS for RFS usage on top is
configured and therefore use the 'old dev_weight'. This is quite a
common base configuration setup nowadays, even with NICs of superior processing
support (e.g. aRFS).

A good example use case are nodes acting as noSQL data bases with a
large number of tiny requests and rather fewer but large packets as responses.
It's affordable to have large budget and rx dev_weights for the
requests. But as a side effect having this large a number on TX
processed in one run can overwhelm drivers.

This patch therefore introduces an independent configurability via sysctl to
userland.

Signed-off-by: Matthias Tafelmeier <matthias.tafelmeier@gmx.net>
---
 Documentation/sysctl/net.txt | 21 +++++++++++++++++++++
 include/linux/netdevice.h    |  4 ++++
 net/core/dev.c               |  8 ++++++--
 net/core/sysctl_net_core.c   | 31 ++++++++++++++++++++++++++++++-
 net/sched/sch_generic.c      |  2 +-
 5 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index f0480f7..53cef32 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -61,6 +61,27 @@ The maximum number of packets that kernel can handle on a NAPI interrupt,
 it's a Per-CPU variable.
 Default: 64
 
+dev_weight_rx_bias
+--------------
+
+RPS (e.g. RFS, aRFS) processing is competing with the registered NAPI poll function
+of the driver for the per softirq cycle netdev_budget. This parameter influences
+the proportion of the configured netdev_budget that is spent on RPS based packet
+processing during RX softirq cycles. It is further meant for making current
+dev_weight adaptable for asymmetric CPU needs on RX/TX side of the network stack.
+(see dev_weight_tx_bias) It is effective on a per CPU basis. Determination is based
+on dev_weight and is calculated multiplicative (dev_weight * dev_weight_rx_bias).
+Default: 1
+
+dev_weight_tx_bias
+--------------
+
+Scales the maximum number of packets that can be processed during a TX softirq cycle.
+Effective on a per CPU basis. Allows scaling of current dev_weight for asymmetric
+net stack processing needs. Be careful to avoid making TX softirq processing a CPU hog.
+Calculation is based on dev_weight (dev_weight * dev_weight_tx_bias).
+Default: 1
+
 default_qdisc
 --------------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 994f742..ecd78b3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3795,6 +3795,10 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
+extern int		dev_weight_rx_bias;
+extern int		dev_weight_tx_bias;
+extern int		dev_rx_weight;
+extern int		dev_tx_weight;
 
 bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
 struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 8db5a0b..0d34e1c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3427,7 +3427,11 @@ EXPORT_SYMBOL(netdev_max_backlog);
 
 int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
-int weight_p __read_mostly = 64;            /* old backlog weight */
+int weight_p __read_mostly = 64;           /* old backlog weight */
+int dev_weight_rx_bias __read_mostly = 1;  /* bias for backlog weight */
+int dev_weight_tx_bias __read_mostly = 1;  /* bias for output_queue quota */
+int dev_rx_weight __read_mostly = 64;
+int dev_tx_weight __read_mostly = 64;
 
 /* Called with irq disabled */
 static inline void ____napi_schedule(struct softnet_data *sd,
@@ -4833,7 +4837,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		net_rps_action_and_irq_enable(sd);
 	}
 
-	napi->weight = weight_p;
+	napi->weight = dev_rx_weight;
 	while (again) {
 		struct sk_buff *skb;
 
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 2a46e40..698ddd7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -222,6 +222,21 @@ static int set_default_qdisc(struct ctl_table *table, int write,
 }
 #endif
 
+static int proc_do_dev_weight(struct ctl_table *table, int write,
+			   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (ret != 0)
+		return ret;
+
+	dev_rx_weight = weight_p * dev_weight_rx_bias;
+	dev_tx_weight = weight_p * dev_weight_tx_bias;
+
+	return ret;
+}
+
 static int proc_do_rss_key(struct ctl_table *table, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -273,7 +288,21 @@ static struct ctl_table net_core_table[] = {
 		.data		= &weight_p,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_do_dev_weight,
+	},
+	{
+		.procname	= "dev_weight_rx_bias",
+		.data		= &dev_weight_rx_bias,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_dev_weight,
+	},
+	{
+		.procname	= "dev_weight_tx_bias",
+		.data		= &dev_weight_tx_bias,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_dev_weight,
 	},
 	{
 		.procname	= "netdev_max_backlog",
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6eb9c8e..b052b27 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -247,7 +247,7 @@ static inline int qdisc_restart(struct Qdisc *q, int *packets)
 
 void __qdisc_run(struct Qdisc *q)
 {
-	int quota = weight_p;
+	int quota = dev_tx_weight;
 	int packets;
 
 	while (qdisc_restart(q, &packets)) {
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH] rtlwifi: fix spelling mistake: "contry" -> "country"
From: Larry Finger @ 2016-12-29 20:58 UTC (permalink / raw)
  To: Colin King, Chaoming Li, Kalle Valo, linux-wireless, netdev; +Cc: linux-kernel
In-Reply-To: <20161229160029.22117-1-colin.king@canonical.com>

On 12/29/2016 10:00 AM, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
>
> trivial fix to spelling mistake in RT_TRACE message
>
> Signed-off-by: Colin Ian King <colin.king@canonical.com>

Acked-by: Larry Finger <Larry.Finger@lwfinger.net>

Larry

> ---
>  drivers/net/wireless/realtek/rtlwifi/regd.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/wireless/realtek/rtlwifi/regd.c b/drivers/net/wireless/realtek/rtlwifi/regd.c
> index 6ee6bf8..558c31b 100644
> --- a/drivers/net/wireless/realtek/rtlwifi/regd.c
> +++ b/drivers/net/wireless/realtek/rtlwifi/regd.c
> @@ -440,7 +440,7 @@ int rtl_regd_init(struct ieee80211_hw *hw,
>
>  	if (rtlpriv->regd.country_code >= COUNTRY_CODE_MAX) {
>  		RT_TRACE(rtlpriv, COMP_REGD, DBG_DMESG,
> -			 "rtl: EEPROM indicates invalid contry code, world wide 13 should be used\n");
> +			 "rtl: EEPROM indicates invalid country code, world wide 13 should be used\n");
>
>  		rtlpriv->regd.country_code = COUNTRY_CODE_WORLD_WIDE_13;
>  	}
>

^ permalink raw reply

* [PATCH] sh_eth: fix branch prediction in sh_eth_interrupt()
From: Sergei Shtylyov @ 2016-12-29 21:07 UTC (permalink / raw)
  To: netdev, linux-renesas-soc; +Cc: ben.hutchings

IIUC, likely()/unlikely() should apply to the whole *if* statement's
expression, not a part of it  -- fix such expression in  sh_eth_interrupt()
accordingly...

Fixes: 283e38db65e7 ("sh_eth: Fix serialisation of interrupt disable with interrupt & NAPI handlers")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

---
The patch is against DaveM's 'net-next.git' repo; I'm not sure if it should
be  targeted to the 'net.git' repo instead...

 drivers/net/ethernet/renesas/sh_eth.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: net-next/drivers/net/ethernet/renesas/sh_eth.c
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.c
+++ net-next/drivers/net/ethernet/renesas/sh_eth.c
@@ -1656,7 +1656,7 @@ static irqreturn_t sh_eth_interrupt(int
 	else
 		goto out;
 
-	if (!likely(mdp->irq_enabled)) {
+	if (unlikely(!mdp->irq_enabled)) {
 		sh_eth_write(ndev, 0, EESIPR);
 		goto out;
 	}

^ permalink raw reply

* Re: [PATCH] [media] gp8psk: fix spelling mistake: "firmare" -> "firmware"
From: VDR User @ 2016-12-29 21:23 UTC (permalink / raw)
  To: Colin King
  Cc: Mauro Carvalho Chehab, Larry Finger, Chaoming Li, Kalle Valo,
	mailing list: linux-media, linux-wireless, netdev,
	Linux Kernel Mailing List
In-Reply-To: <20161229202952.27448-1-colin.king@canonical.com>

> -                       err("firmare chunk size bigger than 64 bytes.");
> +                       err("firmware chunk size bigger than 64 bytes.");

Yup.

> -                        "HW don't support CMAC encrypiton, use software CMAC encrypiton\n");
> +                        "HW don't support CMAC encryption, use software CMAC encryption\n");

Should be: "HW doesn't support CMAC encryption, use software CMAC
encryption\n");

^ permalink raw reply

* [net-next PATCH 0/6] i40e: Add VF port representator support or SR-IOV VFs
From: Sridhar Samudrala @ 2016-12-30  6:20 UTC (permalink / raw)
  To: alexander.h.duyck, john.r.fastabend, anjali.singhai,
	jakub.kicinski, intel-wired-lan, netdev

- Patch 1 introduces devlink interface to get/set the mode of SRIOV switch.
- Patch 2 adds support to create VF port representor(VFPR) netdevs associated
  with SR-IOV VFs that can be used to control/configure VFs from PF name space.
- Patch 3 enables syncing link state between VFs and VFPRs.
- Patch 4 adds a new type to metadata_dst to allow passing VF id to lower device.
- Patch 5 adds TX and RX support to VFPR netdevs.
- Patch 6 enables HW and SW VFPR statistics to be exposed via netlink on VFPR
  netdevs.

Jakub Kicinski (1):
  net: store port/representator id in metadata_dst

Sridhar Samudrala (5):
  i40e: Introduce devlink interface.
  i40e: Introduce VF Port Representator(VFPR) netdevs.
  i40e: Sync link state between VFs and VFPRs
  i40e: Add TX and RX support in switchdev mode.
  i40e: Add support for exposing VF port statistics via VFPR netdev on
    the host.

 drivers/net/ethernet/intel/Kconfig                 |   1 +
 drivers/net/ethernet/intel/i40e/i40e.h             |   4 +
 drivers/net/ethernet/intel/i40e/i40e_main.c        |  96 +++++-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c        | 132 ++++++++-
 drivers/net/ethernet/intel/i40e/i40e_txrx.h        |   1 +
 drivers/net/ethernet/intel/i40e/i40e_type.h        |   3 +
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 321 ++++++++++++++++++++-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h |  25 ++
 include/net/dst_metadata.h                         |  35 ++-
 net/core/dst.c                                     |  15 +-
 net/core/filter.c                                  |   1 +
 net/ipv4/ip_tunnel_core.c                          |   6 +-
 net/openvswitch/flow_netlink.c                     |   4 +-
 13 files changed, 610 insertions(+), 34 deletions(-)

-- 
2.5.5

^ permalink raw reply

* [net-next PATCH 1/6] i40e: Introduce devlink interface.
From: Sridhar Samudrala @ 2016-12-30  6:20 UTC (permalink / raw)
  To: alexander.h.duyck, john.r.fastabend, anjali.singhai,
	jakub.kicinski, intel-wired-lan, netdev
In-Reply-To: <1483078863-22026-1-git-send-email-sridhar.samudrala@intel.com>

Add initial devlink support to get/set the mode of SRIOV switch.
This patch sets the default mode as 'legacy' and enables getting the mode
and and setting it to 'legacy'.

The switch mode can be get/set via following 'devlink' commands.

# devlink dev eswitch show pci/0000:05:00.0
pci/0000:05:00.0: mode legacy
# devlink dev eswitch set pci/0000:05:00.0 mode switchdev
devlink answers: Operation not supported
# devlink dev eswitch set pci/0000:05:00.0 mode legacy
# devlink dev eswitch show pci/0000:05:00.0
pci/0000:05:00.0: mode legacy

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/Kconfig          |  1 +
 drivers/net/ethernet/intel/i40e/i40e.h      |  3 ++
 drivers/net/ethernet/intel/i40e/i40e_main.c | 80 ++++++++++++++++++++++++++---
 3 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 1349b45..0dbb87e 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -215,6 +215,7 @@ config I40E
 	tristate "Intel(R) Ethernet Controller XL710 Family support"
 	imply PTP_1588_CLOCK
 	depends on PCI
+	depends on MAY_USE_DEVLINK
 	---help---
 	  This driver supports Intel(R) Ethernet Controller XL710 Family of
 	  devices.  For more information on how to identify your adapter, go
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index ba8d309..410f83d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -54,6 +54,8 @@
 #include <linux/clocksource.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
+#include <net/devlink.h>
+
 #include "i40e_type.h"
 #include "i40e_prototype.h"
 #ifdef I40E_FCOE
@@ -448,6 +450,7 @@ struct i40e_pf {
 	u32 ioremap_len;
 	u32 fd_inv;
 	u16 phy_led_val;
+	enum devlink_eswitch_mode eswitch_mode;
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index ad4cf63..c01a620 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -10910,6 +10910,57 @@ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf)
 }
 
 /**
+ * i40e_devlink_eswitch_mode_get
+ *
+ * @devlink: pointer to devlink struct
+ * @mode: sr-iov switch mode pointer
+ *
+ * Returns the switch mode of the associated PF in the @mode pointer.
+ */
+static int i40e_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+	struct i40e_pf *pf = devlink_priv(devlink);
+
+	*mode = pf->eswitch_mode;
+
+	return 0;
+}
+
+/**
+ * i40e_devlink_eswitch_mode_set
+ *
+ * @devlink: pointer to devlink struct
+ * @mode: sr-iov switch mode
+ *
+ * Set the switch mode of the associated PF.
+ * Returns 0 on success and -EOPNOTSUPP on error.
+ */
+static int i40e_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+	struct i40e_pf *pf = devlink_priv(devlink);
+	int err = 0;
+
+	if (mode == pf->eswitch_mode)
+		goto done;
+
+	switch (mode) {
+	case DEVLINK_ESWITCH_MODE_LEGACY:
+		pf->eswitch_mode = mode;
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+done:
+	return err;
+}
+
+static const struct devlink_ops i40e_devlink_ops = {
+	.eswitch_mode_get = i40e_devlink_eswitch_mode_get,
+	.eswitch_mode_set = i40e_devlink_eswitch_mode_set,
+};
+
+/**
  * i40e_probe - Device initialization routine
  * @pdev: PCI device information struct
  * @ent: entry in i40e_pci_tbl
@@ -10926,6 +10977,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct i40e_pf *pf;
 	struct i40e_hw *hw;
 	static u16 pfs_found;
+	struct devlink *devlink;
 	u16 wol_nvm_bits;
 	u16 link_status;
 	int err;
@@ -10959,20 +11011,28 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_enable_pcie_error_reporting(pdev);
 	pci_set_master(pdev);
 
+	devlink = devlink_alloc(&i40e_devlink_ops, sizeof(*pf));
+	if (!devlink) {
+		dev_err(&pdev->dev, "devlink_alloc failed\n");
+		err = -ENOMEM;
+		goto err_devlink_alloc;
+	}
+
 	/* Now that we have a PCI connection, we need to do the
 	 * low level device setup.  This is primarily setting up
 	 * the Admin Queue structures and then querying for the
 	 * device's current profile information.
 	 */
-	pf = kzalloc(sizeof(*pf), GFP_KERNEL);
-	if (!pf) {
-		err = -ENOMEM;
-		goto err_pf_alloc;
-	}
+	pf = devlink_priv(devlink);
 	pf->next_vsi = 0;
 	pf->pdev = pdev;
 	set_bit(__I40E_DOWN, &pf->state);
 
+	pf->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+	err = devlink_register(devlink, &pdev->dev);
+	if (err)
+		goto err_devlink_register;
+
 	hw = &pf->hw;
 	hw->back = pf;
 
@@ -11445,8 +11505,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 err_pf_reset:
 	iounmap(hw->hw_addr);
 err_ioremap:
-	kfree(pf);
-err_pf_alloc:
+	devlink_unregister(devlink);
+err_devlink_register:
+	devlink_free(devlink);
+err_devlink_alloc:
 	pci_disable_pcie_error_reporting(pdev);
 	pci_release_mem_regions(pdev);
 err_pci_reg:
@@ -11468,6 +11530,7 @@ static void i40e_remove(struct pci_dev *pdev)
 {
 	struct i40e_pf *pf = pci_get_drvdata(pdev);
 	struct i40e_hw *hw = &pf->hw;
+	struct devlink *devlink = priv_to_devlink(pf);
 	i40e_status ret_code;
 	int i;
 
@@ -11554,7 +11617,8 @@ static void i40e_remove(struct pci_dev *pdev)
 	kfree(pf->vsi);
 
 	iounmap(hw->hw_addr);
-	kfree(pf);
+	devlink_unregister(devlink);
+	devlink_free(devlink);
 	pci_release_mem_regions(pdev);
 
 	pci_disable_pcie_error_reporting(pdev);
-- 
2.5.5

^ permalink raw reply related

* [net-next PATCH 2/6] i40e: Introduce VF Port Representator(VFPR) netdevs.
From: Sridhar Samudrala @ 2016-12-30  6:20 UTC (permalink / raw)
  To: alexander.h.duyck, john.r.fastabend, anjali.singhai,
	jakub.kicinski, intel-wired-lan, netdev
In-Reply-To: <1483078863-22026-1-git-send-email-sridhar.samudrala@intel.com>

VF Port Representator netdevs are created for each VF if the switch mode
is set to 'switchdev'. These netdevs can be used to control and configure
VFs from PFs namespace. They enable exposing VF statistics, configure and
monitor link state, mtu, filters, fdb/vlan entries etc. of VFs.
Broadcast filters are not enabled in switchdev mode.

Sample script to create VF port representors
# rmmod i40e; modprobe i40e
# devlink dev eswitch set pci/0000:05:00.0 mode switchdev
# echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs
# ip l show
297: enp5s0f0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop portid 6805ca2e7268 state DOWN mode DEFAULT group default qlen 1000
     link/ether 68:05:ca:2e:72:68 brd ff:ff:ff:ff:ff:ff
     vf 0 MAC 00:00:00:00:00:00, spoof checking on, link-state auto, trust off
     vf 1 MAC 00:00:00:00:00:00, spoof checking on, link-state auto, trust off
299: enp5s0f0-vf0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
300: enp5s0f0-vf1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c        |  14 +-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 151 ++++++++++++++++++++-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h |  14 ++
 3 files changed, 172 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index c01a620..03d07dd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -10938,15 +10938,27 @@ static int i40e_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 static int i40e_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 {
 	struct i40e_pf *pf = devlink_priv(devlink);
-	int err = 0;
+	struct i40e_vf *vf;
+	int i, err = 0;
 
 	if (mode == pf->eswitch_mode)
 		goto done;
 
 	switch (mode) {
 	case DEVLINK_ESWITCH_MODE_LEGACY:
+		for (i = 0; i < pf->num_alloc_vfs; i++) {
+			vf = &(pf->vf[i]);
+			i40e_free_vfpr_netdev(vf);
+		}
 		pf->eswitch_mode = mode;
 		break;
+	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+		for (i = 0; i < pf->num_alloc_vfs; i++) {
+			vf = &(pf->vf[i]);
+			i40e_alloc_vfpr_netdev(vf, i);
+		}
+                pf->eswitch_mode = mode;
+		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index a6198b7..6c5b296 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -697,12 +697,16 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 					 "Could not add MAC filter %pM for VF %d\n",
 					vf->default_lan_addr.addr, vf->vf_id);
 		}
-		eth_broadcast_addr(broadcast);
-		f = i40e_add_filter(vsi, broadcast,
-				    vf->port_vlan_id ? vf->port_vlan_id : -1);
-		if (!f)
-			dev_info(&pf->pdev->dev,
-				 "Could not allocate VF broadcast filter\n");
+
+		/* Add VF broadcast filter only in 'legacy' mode */
+		if (vsi->back->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY) {
+			eth_broadcast_addr(broadcast);
+			f = i40e_add_filter(vsi, broadcast,
+					    vf->port_vlan_id ? vf->port_vlan_id : -1);
+			if (!f)
+				dev_info(&pf->pdev->dev,
+					 "Could not allocate VF broadcast filter\n");
+		}
 		spin_unlock_bh(&vsi->mac_filter_hash_lock);
 		i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id),
 				  (u32)hena);
@@ -1020,6 +1024,136 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
 }
 
 /**
+ * i40e_vfpr_netdev_open
+ * @dev: network interface device structure
+ *
+ * Called when vfpr netdevice is brought up.
+ **/
+static int i40e_vfpr_netdev_open(struct net_device *dev)
+{
+	return 0;
+}
+
+/**
+ * i40e_vfpr_netdev_stop
+ * @dev: network interface device structure
+ *
+ * Called when vfpr netdevice is brought down.
+ **/
+static int i40e_vfpr_netdev_stop(struct net_device *dev)
+{
+	return 0;
+}
+
+static const struct net_device_ops i40e_vfpr_netdev_ops = {
+	.ndo_open       	= i40e_vfpr_netdev_open,
+	.ndo_stop       	= i40e_vfpr_netdev_stop,
+};
+
+/**
+ * i40e_update_vf_broadcast_filter
+ * @vf: pointer to the VF structure
+ * @enable: boolean flag indicating add/delete
+ *
+ * add/delete VFs broadcast filter
+ **/
+void i40e_update_vf_broadcast_filter(struct i40e_vf *vf, bool enable)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
+	u8 broadcast[ETH_ALEN];
+	int err;
+
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	eth_broadcast_addr(broadcast);
+	if (enable)
+		i40e_add_filter(vsi, broadcast, vf->port_vlan_id ? vf->port_vlan_id : -1);
+	else
+		i40e_del_filter(vsi, broadcast, vf->port_vlan_id ? vf->port_vlan_id : -1);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	/* update broadcast filter */
+	err = i40e_sync_vsi_filters(vsi);
+	if (err)
+		dev_err(&pf->pdev->dev, "Unable to program bcast filter\n");
+}
+
+/**
+ * i40e_alloc_vfpr_netdev
+ * @vf: pointer to the VF structure
+ * @vf_num: VF number
+ *
+ * Create VF Port representor netdev
+ **/
+int i40e_alloc_vfpr_netdev(struct i40e_vf *vf, u16 vf_num)
+{
+	struct net_device *vfpr_netdev;
+	char netdev_name[IFNAMSIZ];
+	struct i40e_vfpr_netdev_priv *priv;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	int err;
+
+	snprintf(netdev_name, IFNAMSIZ, "%s-vf%d", vsi->netdev->name, vf_num);
+	vfpr_netdev = alloc_netdev(sizeof(struct i40e_vfpr_netdev_priv),
+				   netdev_name, NET_NAME_UNKNOWN, ether_setup);
+	if (!vfpr_netdev) {
+		dev_err(&pf->pdev->dev, "alloc_netdev failed for vf:%d\n",
+			vf_num);
+		return -ENOMEM;
+	}
+
+	pf->vf[vf_num].vfpr_netdev = vfpr_netdev;
+
+	priv = netdev_priv(vfpr_netdev);
+	priv->vf = &(pf->vf[vf_num]);
+
+	vfpr_netdev->netdev_ops = &i40e_vfpr_netdev_ops;
+
+	netif_carrier_off(vfpr_netdev);
+	netif_tx_disable(vfpr_netdev);
+
+	err = register_netdev(vfpr_netdev);
+	if (err) {
+		dev_err(&pf->pdev->dev, "register_netdev failed for vf: %s\n",
+			vf->vfpr_netdev->name);
+		free_netdev(vfpr_netdev);
+		return err;
+	}
+
+	dev_info(&pf->pdev->dev, "VF Port representor(%s) created for VF %d\n",
+		 vf->vfpr_netdev->name, vf_num);
+
+	/* Delete broadcast filter for VF */
+	i40e_update_vf_broadcast_filter(vf, false);
+
+	return 0;
+}
+
+/**
+ * i40e_free_vfpr_netdev
+ * @vf: pointer to the VF structure
+ *
+ * Free VF Port representor netdev
+ **/
+void i40e_free_vfpr_netdev(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+
+	if (!vf->vfpr_netdev)
+		return;
+
+	dev_info(&pf->pdev->dev, "Freeing VF Port representor(%s)\n",
+		 vf->vfpr_netdev->name);
+
+	unregister_netdev(vf->vfpr_netdev);
+	free_netdev(vf->vfpr_netdev);
+
+	/* Add broadcast filter to VF */
+	i40e_update_vf_broadcast_filter(vf, true);
+}
+
+/**
  * i40e_free_vfs
  * @pf: pointer to the PF structure
  *
@@ -1060,6 +1194,9 @@ void i40e_free_vfs(struct i40e_pf *pf)
 			i40e_free_vf_res(&pf->vf[i]);
 		/* disable qp mappings */
 		i40e_disable_vf_mappings(&pf->vf[i]);
+
+		if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+			i40e_free_vfpr_netdev(&pf->vf[i]);
 	}
 
 	kfree(pf->vf);
@@ -1127,6 +1264,8 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
 		/* VF resources get allocated during reset */
 		i40e_reset_vf(&vfs[i], false);
 
+		if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+			i40e_alloc_vfpr_netdev(&vfs[i], i);
 	}
 	pf->num_alloc_vfs = num_alloc_vfs;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 4012d06..1e5def1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -72,10 +72,21 @@ enum i40e_vf_capabilities {
 	I40E_VIRTCHNL_VF_CAP_IWARP,
 };
 
+/* VF Port representor netdev private structure */
+struct i40e_vfpr_netdev_priv {
+	struct i40e_vf *vf;
+};
+
 /* VF information structure */
 struct i40e_vf {
 	struct i40e_pf *pf;
 
+	/* VF Port representor netdev that allows control and configuration
+	 * of VFs from the host. Enables returning VF stats, configuring link
+	 * state, mtu, fdb/vlans etc.
+	 */
+	struct net_device *vfpr_netdev;
+
 	/* VF id in the PF space */
 	s16 vf_id;
 	/* all VF vsis connect to the same parent */
@@ -142,4 +153,7 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable);
 void i40e_vc_notify_link_state(struct i40e_pf *pf);
 void i40e_vc_notify_reset(struct i40e_pf *pf);
 
+int i40e_alloc_vfpr_netdev(struct i40e_vf *vf, u16 vf_num);
+void i40e_free_vfpr_netdev(struct i40e_vf *vf);
+
 #endif /* _I40E_VIRTCHNL_PF_H_ */
-- 
2.5.5

^ permalink raw reply related

* [net-next PATCH 3/6] i40e: Sync link state between VFs and VFPRs
From: Sridhar Samudrala @ 2016-12-30  6:21 UTC (permalink / raw)
  To: alexander.h.duyck, john.r.fastabend, anjali.singhai,
	jakub.kicinski, intel-wired-lan, netdev
In-Reply-To: <1483078863-22026-1-git-send-email-sridhar.samudrala@intel.com>

This patch enables
- reflecting the link state of VFPR based on VF admin state & link state
  of VF based on admin state of VFPR.
- bringing up/down the VFPR sends a notification to update VF link state.
- bringing up/down the VF will cause the link state update of VFPR.
- enable/disable VF link state via ndo_set_vf_link_state will update the
  admin state of associated VFPR.

PF: enp5s0f0, VFs: enp5s2,enp5s2f1 VFPRs:enp5s0f0-vf0, enp5s0f0-vf1
# rmmod i40e; modprobe i40e
# devlink dev eswitch set pci/0000:05:00.0 mode switchdev
# echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs

# ip link set enp5s2 up
# ip link set enp5s0f0-vf0 up
# ip link set enp5s0f0-vf1 up

/* enp5s2 UP -> enp5s0f0-vf0 CARRIER ON */
# ip link show enp5s0f0-vf0
215: enp5s0f0-vf0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP mode DEFAULT group default qlen 1000
     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff

/* enp5s0f0-vf0 UP -> enp5s2 CARRIER ON */
# ip link show enp5s2
218: enp5s2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000
     link/ether ea:4d:60:bc:6f:85 brd ff:ff:ff:ff:ff:ff

/* enp5s2f1 DOWN -> enp5s0f0-vf1 NO-CARRIER */
# ip link show enp5s0f0-vf1
216: enp5s0f0-vf1: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc fq_codel state DOWN mode DEFAULT group default qlen 1000
     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff

# ip link set enp5s0f0-vf0 down
# ip link set enp5s2f1 up

/* enp5s0-vf0 DOWN -> enp5s2 NO_CARRIER */
# ip link show enp5s2
218: enp5s2: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000
     link/ether ea:4d:60:bc:6f:85 brd ff:ff:ff:ff:ff:ff

# ip -d link show enp5s0f0
213: enp5s0f0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop portid 6805ca27268 state DOWN mode DEFAULT group default qlen 1000
     link/ether 68:05:ca:2e:72:68 brd ff:ff:ff:ff:ff:ff promiscuity 0 addrgenmode eui64
     vf 0 MAC 00:00:00:00:00:00, spoof checking on, link-state disable, trust off
     vf 1 MAC 00:00:00:00:00:00, spoof checking on, link-state enable, trust off

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 44 ++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 6c5b296..3ea7235 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1031,6 +1031,13 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
  **/
 static int i40e_vfpr_netdev_open(struct net_device *dev)
 {
+	struct i40e_vfpr_netdev_priv *priv = netdev_priv(dev);
+	struct i40e_vf *vf = priv->vf;
+
+	vf->link_forced = true;
+	vf->link_up = true;
+	i40e_vc_notify_vf_link_state(vf);
+
 	return 0;
 }
 
@@ -1042,6 +1049,13 @@ static int i40e_vfpr_netdev_open(struct net_device *dev)
  **/
 static int i40e_vfpr_netdev_stop(struct net_device *dev)
 {
+	struct i40e_vfpr_netdev_priv *priv = netdev_priv(dev);
+	struct i40e_vf *vf = priv->vf;
+
+	vf->link_forced = true;
+	vf->link_up = false;
+	i40e_vc_notify_vf_link_state(vf);
+
 	return 0;
 }
 
@@ -1127,6 +1141,13 @@ int i40e_alloc_vfpr_netdev(struct i40e_vf *vf, u16 vf_num)
 	/* Delete broadcast filter for VF */
 	i40e_update_vf_broadcast_filter(vf, false);
 
+	/* Reset VF link as we are changing the mode to 'switchdev'. VFPR netdev
+	 * needs to be brought up to enable VF link.
+	 */
+	vf->link_forced = true;
+	vf->link_up = false;
+	i40e_vc_notify_vf_link_state(vf);
+
 	return 0;
 }
 
@@ -1151,6 +1172,10 @@ void i40e_free_vfpr_netdev(struct i40e_vf *vf)
 
 	/* Add broadcast filter to VF */
 	i40e_update_vf_broadcast_filter(vf, true);
+
+	/* In legacy mode, VF link is not controlled by VFPR */
+	vf->link_forced = false;
+	i40e_vc_notify_vf_link_state(vf);
 }
 
 /**
@@ -1907,6 +1932,10 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 	if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx]))
 		aq_ret = I40E_ERR_TIMEOUT;
+
+	if ((0 == aq_ret) && vf->vfpr_netdev)
+		netif_carrier_on(vf->vfpr_netdev);
+
 error_param:
 	/* send the response to the VF */
 	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES,
@@ -1946,6 +1975,9 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 	i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]);
 
+	if ((0 == aq_ret) && vf->vfpr_netdev)
+		netif_carrier_off(vf->vfpr_netdev);
+
 error_param:
 	/* send the response to the VF */
 	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES,
@@ -3186,6 +3218,7 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_pf *pf = np->vsi->back;
 	struct i40e_virtchnl_pf_event pfe;
+	struct net_device *vfpr_netdev;
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_vf *vf;
 	int abs_vf_id;
@@ -3228,6 +3261,17 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
 		ret = -EINVAL;
 		goto error_out;
 	}
+
+	vfpr_netdev = vf->vfpr_netdev;
+	if (vfpr_netdev) {
+		unsigned int flags = vfpr_netdev->flags;
+
+		if (vf->link_up)
+			dev_change_flags(vfpr_netdev, flags | IFF_UP);
+		else
+			dev_change_flags(vfpr_netdev, flags & ~IFF_UP);
+	}
+
 	/* Notify the VF of its new link state */
 	i40e_aq_send_msg_to_vf(hw, abs_vf_id, I40E_VIRTCHNL_OP_EVENT,
 			       0, (u8 *)&pfe, sizeof(pfe), NULL);
-- 
2.5.5

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox