Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH] net: ftgmac100: Request clock and set speed
From: Florian Fainelli @ 2017-10-10  5:04 UTC (permalink / raw)
  To: Joel Stanley, David S . Miller, Benjamin Herrenschmidt
  Cc: netdev, linux-kernel, Andrew Jeffery
In-Reply-To: <20171010044925.21078-1-joel@jms.id.au>



On 10/09/2017 09:49 PM, Joel Stanley wrote:
> According to the ASPEED datasheet, gigabit speeds require a clock of
> 100MHz or higher. Other speeds require 25MHz or higher.
> 
> Signed-off-by: Joel Stanley <joel@jms.id.au>
> ---

> @@ -161,6 +170,9 @@ static int ftgmac100_reset_and_config_mac(struct ftgmac100 *priv)
>  		break;
>  	}
>  
> +	if (freq && priv->clk)
> +		clk_set_rate(priv->clk, freq);

Checking for priv->clk should not be necessary all public clk APIs can
deal with a NULL clock pointer.

> +
>  	/* (Re)initialize the queue pointers */
>  	priv->rx_pointer = 0;
>  	priv->tx_clean_pointer = 0;
> @@ -1775,6 +1787,13 @@ static int ftgmac100_probe(struct platform_device *pdev)
>  	priv->dev = &pdev->dev;
>  	INIT_WORK(&priv->reset_task, ftgmac100_reset_task);
>  
> +	/* Enable clock if present */
> +	priv->clk = devm_clk_get(&pdev->dev, NULL);
> +	if (!IS_ERR(priv->clk))
> +		clk_prepare_enable(priv->clk);
> +	else
> +		priv->clk = NULL;

Same here.

> +
>  	/* map io memory */
>  	priv->res = request_mem_region(res->start, resource_size(res),
>  				       dev_name(&pdev->dev));
> @@ -1883,6 +1902,9 @@ static int ftgmac100_remove(struct platform_device *pdev)
>  
>  	unregister_netdev(netdev);
>  
> +	if (priv->clk)
> +		clk_disable_unprepare(priv->clk);

And here.

> +
>  	/* There's a small chance the reset task will have been re-queued,
>  	 * during stop, make sure it's gone before we free the structure.
>  	 */
> 

-- 
Florian

^ permalink raw reply

* Re: [PATCH] net: ftgmac100: Request clock and set speed
From: Joel Stanley @ 2017-10-10  5:08 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: David S . Miller, Benjamin Herrenschmidt, netdev,
	Linux Kernel Mailing List, Andrew Jeffery
In-Reply-To: <0150f64f-6093-1cca-465b-8ec158abe7e6@gmail.com>

On Tue, Oct 10, 2017 at 2:34 PM, Florian Fainelli <f.fainelli@gmail.com> wrote:
>
>
> On 10/09/2017 09:49 PM, Joel Stanley wrote:
>> According to the ASPEED datasheet, gigabit speeds require a clock of
>> 100MHz or higher. Other speeds require 25MHz or higher.
>>
>> Signed-off-by: Joel Stanley <joel@jms.id.au>
>> ---
>
>> @@ -161,6 +170,9 @@ static int ftgmac100_reset_and_config_mac(struct ftgmac100 *priv)
>>               break;
>>       }
>>
>> +     if (freq && priv->clk)
>> +             clk_set_rate(priv->clk, freq);
>
> Checking for priv->clk should not be necessary all public clk APIs can
> deal with a NULL clock pointer.

The intention was to set ->clk to NULL to indicate that there was no
clk, and therefore there is no reason to attempt to set the rate or
call prepare/unprepare.

If the we prefer to call the clk apis unconditionally I will send a v2
with the checks removed.

Thanks for the review.

Cheers,

Joel

^ permalink raw reply

* Re: [net-next 2/3] ip_gre: fix erspan tunnel mtu calculation
From: Xin Long @ 2017-10-10  5:27 UTC (permalink / raw)
  To: William Tu; +Cc: network dev, therbert, davem
In-Reply-To: <1507582067-36718-3-git-send-email-u9012063@gmail.com>

On Tue, Oct 10, 2017 at 4:47 AM, William Tu <u9012063@gmail.com> wrote:
> Remove the unnecessary -4 and +4 bytes at mtu and headroom calculation.
> In addition, erspan uses fixed 8-byte gre header, so add ERSPAN_GREHDR_LEN
> macro for better readability.
>
> Now tunnel->hlen = grehdr(8) + erspanhdr(8) = 16 byte.
> The mtu should be ETH_DATA_LEN - 16 - iph(20) = 1464.
> After the ip_tunnel_bind_dev(), the mtu is adjusted to
> 1464 - 14 (dev->hard_header_len) = 1450.
> The maximum skb->len the erspan tunnel can carry without
> being truncated is 1450 + 14 = 1464 byte.
>
> Signed-off-by: William Tu <u9012063@gmail.com>
> Cc: Xin Long <lucien.xin@gmail.com>
> ---
>  include/net/erspan.h |  1 +
>  net/ipv4/ip_gre.c    | 11 +++++------
>  2 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/include/net/erspan.h b/include/net/erspan.h
> index ca94fc86865e..e28294e248d0 100644
> --- a/include/net/erspan.h
> +++ b/include/net/erspan.h
> @@ -28,6 +28,7 @@
>   */
>
>  #define ERSPAN_VERSION 0x1
> +#define ERSPAN_GREHDR_LEN 8    /* ERSPAN has fixed 8-byte GRE header */
>
>  #define VER_MASK       0xf000
>  #define VLAN_MASK      0x0fff
> diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
> index 286065c35959..6e6e4c4811cc 100644
> --- a/net/ipv4/ip_gre.c
> +++ b/net/ipv4/ip_gre.c
> @@ -569,8 +569,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
>
>         key = &tun_info->key;
>
> -       /* ERSPAN has fixed 8 byte GRE header */
> -       tunnel_hlen = 8 + sizeof(struct erspanhdr);
> +       tunnel_hlen = ERSPAN_GREHDR_LEN + sizeof(struct erspanhdr);
>
>         rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
>         if (!rt)
> @@ -591,7 +590,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
>         erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
>                             ntohl(md->index), truncate);
>
> -       gre_build_header(skb, 8, TUNNEL_SEQ,
> +       gre_build_header(skb, ERSPAN_GREHDR_LEN, TUNNEL_SEQ,
>                          htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
>
>         df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
> @@ -1242,14 +1241,14 @@ static int erspan_tunnel_init(struct net_device *dev)
>         struct ip_tunnel *tunnel = netdev_priv(dev);
>         int t_hlen;
>
> -       tunnel->tun_hlen = 8;
> +       tunnel->tun_hlen = ERSPAN_GREHDR_LEN;
>         tunnel->parms.iph.protocol = IPPROTO_GRE;
>         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
>                        sizeof(struct erspanhdr);
>         t_hlen = tunnel->hlen + sizeof(struct iphdr);
>
> -       dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
> -       dev->mtu = ETH_DATA_LEN - t_hlen - 4;
> +       dev->needed_headroom = LL_MAX_HEADER + t_hlen;
> +       dev->mtu = ETH_DATA_LEN - t_hlen;
1. I guess '+4-4' stuff was copied from __gre_tunnel_init(), I'm thinking
it may be there for some reason.

2. 'dev->needed_headroom =' and 'dev->mtu =' are really needed ?
As I've seen both will be updated in .newlink:
ipgre_newlink() -> ip_tunnel_newlink() -> ip_tunnel_bind_dev()

>         dev->features           |= GRE_FEATURES;
>         dev->hw_features        |= GRE_FEATURES;
>         dev->priv_flags         |= IFF_LIVE_ADDR_CHANGE;
> --
> 2.7.4
>

^ permalink raw reply

* [Patch net-next] tcp: add a tracepoint for tcp_retransmit_skb()
From: Cong Wang @ 2017-10-10  5:35 UTC (permalink / raw)
  To: netdev; +Cc: Cong Wang, Eric Dumazet, Yuchung Cheng, Neal Cardwell

We need a real-time notification for tcp retransmission
for monitoring.

Of course we could use ftrace to dynamically instrument this
kernel function too, however we can't retrieve the connection
information at the same time, for example perf-tools [1] reads
/proc/net/tcp for socket details, which is slow when we have
a lots of connections.

Therefore, this patch adds a tracepoint for tcp_retransmit_skb()
and exposes src/dst IP addresses and ports of the connection.
This also makes it easier to integrate into perf.

Note, I expose both IPv4 and IPv6 addresses at the same time:
for a IPv4 socket, v4 mapped address is used as IPv6 addresses,
for a IPv6 socket, LOOPBACK4_IPV6 is already filled by kernel.

Perhaps there are other interfaces to use (for example netlink),
but tracepoint is the quickest way I can think of.

1. https://github.com/brendangregg/perf-tools/blob/master/net/tcpretrans

Cc: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 include/trace/events/tcp.h | 63 ++++++++++++++++++++++++++++++++++++++++++++++
 net/core/net-traces.c      |  1 +
 net/ipv4/tcp_output.c      |  3 +++
 3 files changed, 67 insertions(+)
 create mode 100644 include/trace/events/tcp.h

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
new file mode 100644
index 000000000000..cb22acc8aacd
--- /dev/null
+++ b/include/trace/events/tcp.h
@@ -0,0 +1,63 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tcp
+
+#if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TCP_H
+
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/tracepoint.h>
+#include <net/ipv6.h>
+
+TRACE_EVENT(tcp_retransmit_skb,
+
+	TP_PROTO(const struct sock *sk, struct sk_buff *skb, int segs),
+
+	TP_ARGS(sk, skb, segs),
+
+	TP_STRUCT__entry(
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__array(__u8, saddr, 4)
+		__array(__u8, daddr, 4)
+		__array(__u8, saddr_v6, 16)
+		__array(__u8, daddr_v6, 16)
+	),
+
+	TP_fast_assign(
+		struct ipv6_pinfo *np = inet6_sk(sk);
+		struct inet_sock *inet = inet_sk(sk);
+		struct in6_addr *pin6;
+		__be32 *p32;
+
+		__entry->sport = ntohs(inet->inet_sport);
+		__entry->dport = ntohs(inet->inet_dport);
+
+		p32 = (__be32 *) __entry->saddr;
+		*p32 = inet->inet_saddr;
+
+		p32 = (__be32 *) __entry->daddr;
+		*p32 =  inet->inet_daddr;
+
+		if (np) {
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			*pin6 = np->saddr;
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			*pin6 = *(np->daddr_cache);
+		} else {
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
+		}
+	),
+
+	TP_printk("sport=%hu, dport=%hu, saddr=%pI4, daddr=%pI4, saddrv6=%pI6, daddrv6=%pI6",
+		  __entry->sport, __entry->dport, __entry->saddr, __entry->daddr,
+		  __entry->saddr_v6, __entry->daddr_v6)
+);
+
+#endif /* _TRACE_TCP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 1132820c8e62..f4e4fa2db505 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -31,6 +31,7 @@
 #include <trace/events/napi.h>
 #include <trace/events/sock.h>
 #include <trace/events/udp.h>
+#include <trace/events/tcp.h>
 #include <trace/events/fib.h>
 #include <trace/events/qdisc.h>
 #if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 696b0a168f16..e6d6e1393578 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -42,6 +42,8 @@
 #include <linux/gfp.h>
 #include <linux/module.h>
 
+#include <trace/events/tcp.h>
+
 /* People can turn this off for buggy TCP's found in printers etc. */
 int sysctl_tcp_retrans_collapse __read_mostly = 1;
 
@@ -2899,6 +2901,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 		if (!tp->retrans_stamp)
 			tp->retrans_stamp = tcp_skb_timestamp(skb);
 
+		trace_tcp_retransmit_skb(sk, skb, segs);
 	}
 
 	if (tp->undo_retrans < 0)
-- 
2.13.0

^ permalink raw reply related

* Re: [PATCHv4 iproute2 2/2] lib/libnetlink: update rtnl_talk to support malloc buff at run time
From: Michal Kubecek @ 2017-10-10  6:41 UTC (permalink / raw)
  To: Phil Sutter, Stephen Hemminger, Hangbin Liu, netdev, Hangbin Liu
In-Reply-To: <20171009202525.GR32278@orbyte.nwl.cc>

On Mon, Oct 09, 2017 at 10:25:25PM +0200, Phil Sutter wrote:
> Hi Stephen,
> 
> On Mon, Oct 02, 2017 at 10:37:08AM -0700, Stephen Hemminger wrote:
> > On Thu, 28 Sep 2017 21:33:46 +0800
> > Hangbin Liu <haliu@redhat.com> wrote:
> > 
> > > From: Hangbin Liu <liuhangbin@gmail.com>
> > > 
> > > This is an update for 460c03f3f3cc ("iplink: double the buffer size also in
> > > iplink_get()"). After update, we will not need to double the buffer size
> > > every time when VFs number increased.
> > > 
> > > With call like rtnl_talk(&rth, &req.n, NULL, 0), we can simply remove the
> > > length parameter.
> > > 
> > > With call like rtnl_talk(&rth, nlh, nlh, sizeof(req), I add a new variable
> > > answer to avoid overwrite data in nlh, because it may has more info after
> > > nlh. also this will avoid nlh buffer not enough issue.
> > > 
> > > We need to free answer after using.
> > > 
> > > Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> > > Signed-off-by: Phil Sutter <phil@nwl.cc>
> > > ---
> > 
> > Most of the uses of rtnl_talk() don't need to this peek and dynamic sizing.
> > Can only those places that need that be targeted?
> 
> We could probably do that, by having a buffer on stack in __rtnl_talk()
> which will be used instead of the allocated one if 'answer' is NULL. Or
> maybe even introduce a dedicated API call for the dynamically allocated
> receive buffer. But I really doubt that's feasible: AFAICT, that stack
> buffer still needs to be reasonably sized since the reply might be
> larger than the request (reusing the request buffer would be the most
> simple way to tackle this), also there is support for extack which may
> bloat the response to arbitrary size. Hangbin has shown in his benchmark
> that the overhead of the second syscall is negligible, so why care about
> that and increase code complexity even further?
> 
> Not saying it's not possible, but I just doubt it's worth the effort.

Agreed. Current code is based on the assumption that we can estimate the
maximum reply length in advance and the reason for this series is that
this assumption turned out to be wrong. I'm afraid that if we replace
it by an assumption that we can estimate the maximum reply length for
most requests with only few exceptions, it's only matter of time for us
to be proven wrong again.

Michal Kubecek

^ permalink raw reply

* [PATCH net-next] cxgb4: Add support for new flash parts
From: Ganesh Goudar @ 2017-10-10  7:14 UTC (permalink / raw)
  To: netdev, davem; +Cc: nirranjan, leedom, indranil, venkatesh, Ganesh Goudar

Add support for new flash parts identification, and
also cleanup the flash Part identifying and decoding
code.

Based on the original work of Casey Leedom <leedom@chelsio.com>

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 136 ++++++++++++++++++++++++-----
 1 file changed, 116 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index b65ce26..b3fd1f4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -8205,7 +8205,7 @@ struct flash_desc {
 	u32 size_mb;
 };
 
-static int get_flash_params(struct adapter *adap)
+static int t4_get_flash_params(struct adapter *adap)
 {
 	/* Table for non-Numonix supported flash parts.  Numonix parts are left
 	 * to the preexisting code.  All flash parts have 64KB sectors.
@@ -8214,40 +8214,136 @@ static int get_flash_params(struct adapter *adap)
 		{ 0x150201, 4 << 20 },       /* Spansion 4MB S25FL032P */
 	};
 
+	unsigned int part, manufacturer;
+	unsigned int density, size;
+	u32 flashid = 0;
 	int ret;
-	u32 info;
+
+	/* Issue a Read ID Command to the Flash part.  We decode supported
+	 * Flash parts and their sizes from this.  There's a newer Query
+	 * Command which can retrieve detailed geometry information but many
+	 * Flash parts don't support it.
+	 */
 
 	ret = sf1_write(adap, 1, 1, 0, SF_RD_ID);
 	if (!ret)
-		ret = sf1_read(adap, 3, 0, 1, &info);
+		ret = sf1_read(adap, 3, 0, 1, &flashid);
 	t4_write_reg(adap, SF_OP_A, 0);                    /* unlock SF */
 	if (ret)
 		return ret;
 
-	for (ret = 0; ret < ARRAY_SIZE(supported_flash); ++ret)
-		if (supported_flash[ret].vendor_and_model_id == info) {
-			adap->params.sf_size = supported_flash[ret].size_mb;
+	/* Check to see if it's one of our non-standard supported Flash parts.
+	 */
+	for (part = 0; part < ARRAY_SIZE(supported_flash); part++)
+		if (supported_flash[part].vendor_and_model_id == flashid) {
+			adap->params.sf_size = supported_flash[part].size_mb;
 			adap->params.sf_nsec =
 				adap->params.sf_size / SF_SEC_SIZE;
-			return 0;
+			goto found;
 		}
 
-	if ((info & 0xff) != 0x20)             /* not a Numonix flash */
+	/* Decode Flash part size.  The code below looks repetative with
+	 * common encodings, but that's not guaranteed in the JEDEC
+	 * specification for the Read JADEC ID command.  The only thing that
+	 * we're guaranteed by the JADEC specification is where the
+	 * Manufacturer ID is in the returned result.  After that each
+	 * Manufacturer ~could~ encode things completely differently.
+	 * Note, all Flash parts must have 64KB sectors.
+	 */
+	manufacturer = flashid & 0xff;
+	switch (manufacturer) {
+	case 0x20: { /* Micron/Numonix */
+		/* This Density -> Size decoding table is taken from Micron
+		 * Data Sheets.
+		 */
+		density = (flashid >> 16) & 0xff;
+		switch (density) {
+		case 0x14: /* 1MB */
+			size = 1 << 20;
+			break;
+		case 0x15: /* 2MB */
+			size = 1 << 21;
+			break;
+		case 0x16: /* 4MB */
+			size = 1 << 22;
+			break;
+		case 0x17: /* 8MB */
+			size = 1 << 23;
+			break;
+		case 0x18: /* 16MB */
+			size = 1 << 24;
+			break;
+		case 0x19: /* 32MB */
+			size = 1 << 25;
+			break;
+		case 0x20: /* 64MB */
+			size = 1 << 26;
+			break;
+		case 0x21: /* 128MB */
+			size = 1 << 27;
+			break;
+		case 0x22: /* 256MB */
+			size = 1 << 28;
+			break;
+
+		default:
+			dev_err(adap->pdev_dev, "Micron Flash Part has bad size, ID = %#x, Density code = %#x\n",
+				flashid, density);
 		return -EINVAL;
-	info >>= 16;                           /* log2 of size */
-	if (info >= 0x14 && info < 0x18)
-		adap->params.sf_nsec = 1 << (info - 16);
-	else if (info == 0x18)
-		adap->params.sf_nsec = 64;
-	else
+		}
+		break;
+	}
+	case 0xc2: { /* Macronix */
+		/* This Density -> Size decoding table is taken from Macronix
+		 * Data Sheets.
+		 */
+		density = (flashid >> 16) & 0xff;
+		switch (density) {
+		case 0x17: /* 8MB */
+			size = 1 << 23;
+			break;
+		case 0x18: /* 16MB */
+			size = 1 << 24;
+			break;
+		default:
+			dev_err(adap->pdev_dev, "Macronix Flash Part has bad size, ID = %#x, Density code = %#x\n",
+				flashid, density);
+		return -EINVAL;
+		}
+	}
+	case 0xef: { /* Winbond */
+		/* This Density -> Size decoding table is taken from Winbond
+		 * Data Sheets.
+		 */
+		density = (flashid >> 16) & 0xff;
+		switch (density) {
+		case 0x17: /* 8MB */
+			size = 1 << 23;
+			break;
+		case 0x18: /* 16MB */
+			size = 1 << 24;
+			break;
+		default:
+			dev_err(adap->pdev_dev, "Winbond Flash Part has bad size, ID = %#x, Density code = %#x\n",
+				flashid, density);
 		return -EINVAL;
-	adap->params.sf_size = 1 << info;
-	adap->params.sf_fw_start =
-		t4_read_reg(adap, CIM_BOOT_CFG_A) & BOOTADDR_M;
+		}
+		break;
+	}
+	default:
+		dev_err(adap->pdev_dev, "Unsupported Flash Part, ID = %#x\n",
+			flashid);
+		return -EINVAL;
+	}
+
+	/* Store decoded Flash size and fall through into vetting code. */
+	adap->params.sf_size = size;
+	adap->params.sf_nsec = size / SF_SEC_SIZE;
 
+found:
 	if (adap->params.sf_size < FLASH_MIN_SIZE)
-		dev_warn(adap->pdev_dev, "WARNING!!! FLASH size %#x < %#x!!!\n",
-			 adap->params.sf_size, FLASH_MIN_SIZE);
+		dev_warn(adap->pdev_dev, "WARNING: Flash Part ID %#x, size %#x < %#x\n",
+			 flashid, adap->params.sf_size, FLASH_MIN_SIZE);
 	return 0;
 }
 
@@ -8285,7 +8381,7 @@ int t4_prep_adapter(struct adapter *adapter)
 	get_pci_mode(adapter, &adapter->params.pci);
 	pl_rev = REV_G(t4_read_reg(adapter, PL_REV_A));
 
-	ret = get_flash_params(adapter);
+	ret = t4_get_flash_params(adapter);
 	if (ret < 0) {
 		dev_err(adapter->pdev_dev, "error %d identifying flash\n", ret);
 		return ret;
-- 
2.1.0

^ permalink raw reply related

* [PATCH net-next] cxgb4: add new T5 pci device id's
From: Ganesh Goudar @ 2017-10-10  7:15 UTC (permalink / raw)
  To: netdev, davem; +Cc: nirranjan, indranil, venkatesh, Ganesh Goudar

Add 0x50aa and 0x50ab T5 device id's.

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index 37d90d6..c660c1a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -181,6 +181,8 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
 	CH_PCI_ID_TABLE_FENTRY(0x50a7), /* Custom T580-CR */
 	CH_PCI_ID_TABLE_FENTRY(0x50a8), /* Custom T580-KR */
 	CH_PCI_ID_TABLE_FENTRY(0x50a9), /* Custom T580-KR */
+	CH_PCI_ID_TABLE_FENTRY(0x50aa), /* Custom T580-CR */
+	CH_PCI_ID_TABLE_FENTRY(0x50ab), /* Custom T520-CR */
 
 	/* T6 adapters:
 	 */
-- 
2.1.0

^ permalink raw reply related

* [PATCH v2] XDP Program for Ip forward
From: Christina Jacob @ 2017-10-10  7:28 UTC (permalink / raw)
  To: netdev
  Cc: linux-kernel, linux-arm-kernel, brouer, Sunil.Goutham, daniel,
	dsahern, Christina Jacob

The patch below implements port to port forwarding through route table and arp
table lookup for ipv4 packets using bpf_redirect helper function and lpm_trie
map.  This has an improved performance over the normal kernel stack ip forward.

Implementation details.
-----------------------
The program uses one map each for arp table, route table and packet count.
The number of entries the program can process is limited by the size of the
map used.

In the xdp3_user.c,

initially, the routing table is read and is stored in an lpm trie map.
The arp table is read and stored in an array map There are two netlink sockets
that listens to any change in the route table  and arp table.
There are two types of changes to the route table.
	1.New
	
	The new entries are added to the lpm_trie with proper key and prefix
	length If there is a another entry in the route table with a different
	metric(only metric is considered). Then the values are compared and the
	one with lowest metric is added to the node.
	
	2.Deletion 

	On deletion from the route table, The particular node is removed and the
	entire route table is again read to check if there is another entry with
	a different metric.  

This implementation depends on  bpf: Implement map_delete_elem for
BPF_MAP_TYPE_LPM_TRIE which is not yet upstreamed.

There are two types of changes to the route table

	1.New
	
	The new arp entries are added in the in the array map directly with the
	ip address as the key and the destination mac address as the value.
	
	2.Delete 
	
	The entry corresponding to the particular ip is deleted from the 
	arp table map.

Another map is maintained for entries in the route table having 32 bit mask.
such entries can have a corresponding  arp entry which if  stored together with
the route entry in an array map and can be accessed in O(1) time. Eliminating
the trie lookup and arp lookup.

In the xdp3_kern.c,

The array map for the 32 bit mask entries checked to see if there is a key that
exactly matches with the destination ip. If it has a non zero destination mac
entry then the xdp data is updated accordingly Otherwise a proper route and 
arp table lookup is done using the lpm_trie and the arp table array map.
	
	Usage: as ./xdp3 -S <ifindex1...ifindexn> (-S for
	generic xdp implementation ifindex- the index of the interface to which
	the xdp program has to be attached.) in 4.14-rc3 kernel.

Changes from v1 to v2
---------------------
 
* As suggested by Jesper Dangaard Brouer
	1. Changed the program name to  list xdp_router_ipv4
	2. Changed the commandline arguments from ifindex list to interface name
		Usage : ./xdp_router_ipv4 [-S] <interface name list>
		-S for generic xdp implementation
		-interface name list is the list of interfaces to which
		the xdp program should attach to

* As suggested by Daniel Borkmann
	1. Using __builin_memcpy to update source and destination mac in the bpf
	  kernel program. 
	
	2. Started using __be32 in the kernel program to be inline with the data
	   type used in user program

	3. Rectified few style issues.

* Corrected the copyright issue pointed out by David Ahern 

* Fixed the bug: The already attached interfaces are not detached from the 
  xdp program if the program fails to attach to an interface later in the list.


Christina Jacob (1):
  xdp: Sample xdp program implementing ip forward

 samples/bpf/Makefile               |    4 +
 samples/bpf/xdp_router_ipv4_kern.c |  189 +++++++++++
 samples/bpf/xdp_router_ipv4_user.c |  655 ++++++++++++++++++++++++++++++++++++
 3 files changed, 848 insertions(+), 0 deletions(-)
 create mode 100644 samples/bpf/xdp_router_ipv4_kern.c
 create mode 100644 samples/bpf/xdp_router_ipv4_user.c

^ permalink raw reply

* [PATCH v2] xdp: Sample xdp program implementing ip forward
From: Christina Jacob @ 2017-10-10  7:28 UTC (permalink / raw)
  To: netdev
  Cc: linux-kernel, linux-arm-kernel, brouer, Sunil.Goutham, daniel,
	dsahern, Christina Jacob
In-Reply-To: <1507620532-25804-1-git-send-email-Christina.Jacob@cavium.com>

Implements port to port forwarding with route table and arp table
lookup for ipv4 packets using bpf_redirect helper function and
lpm_trie  map.

Signed-off-by: Christina Jacob <Christina.Jacob@cavium.com>
---
 samples/bpf/Makefile               |    4 +
 samples/bpf/xdp_router_ipv4_kern.c |  189 +++++++++++
 samples/bpf/xdp_router_ipv4_user.c |  655 ++++++++++++++++++++++++++++++++++++
 3 files changed, 848 insertions(+), 0 deletions(-)
 create mode 100644 samples/bpf/xdp_router_ipv4_kern.c
 create mode 100644 samples/bpf/xdp_router_ipv4_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index cf17c79..8504ebb 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -28,6 +28,7 @@ hostprogs-y += test_cgrp2_sock
 hostprogs-y += test_cgrp2_sock2
 hostprogs-y += xdp1
 hostprogs-y += xdp2
+hostprogs-y += xdp_router_ipv4
 hostprogs-y += test_current_task_under_cgroup
 hostprogs-y += trace_event
 hostprogs-y += sampleip
@@ -73,6 +74,7 @@ test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o
 xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
+xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o
 test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \
 				       test_current_task_under_cgroup_user.o
 trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
@@ -114,6 +116,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o
 always += test_cgrp2_tc_kern.o
 always += xdp1_kern.o
 always += xdp2_kern.o
+always += xdp_router_ipv4_kern.o
 always += test_current_task_under_cgroup_kern.o
 always += trace_event_kern.o
 always += sampleip_kern.o
@@ -160,6 +163,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt
 HOSTLOADLIBES_test_overhead += -lelf -lrt
 HOSTLOADLIBES_xdp1 += -lelf
 HOSTLOADLIBES_xdp2 += -lelf
+HOSTLOADLIBES_xdp_router_ipv4 += -lelf
 HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
 HOSTLOADLIBES_trace_event += -lelf
 HOSTLOADLIBES_sampleip += -lelf
diff --git a/samples/bpf/xdp_router_ipv4_kern.c b/samples/bpf/xdp_router_ipv4_kern.c
new file mode 100644
index 0000000..c2bfe40
--- /dev/null
+++ b/samples/bpf/xdp_router_ipv4_kern.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2017 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+#include <linux/slab.h>
+#include <net/ip_fib.h>
+
+struct trie_value {
+	__u8 prefix[4];
+	long value;
+	int gw;
+	int ifindex;
+	int metric;
+};
+
+/*Key for lpm_trie*/
+union key_4 {
+	u32 b32[2];
+	u8 b8[8];
+};
+
+struct arp_entry {
+	int dst;
+	long mac;
+};
+
+struct direct_map {
+	long mac;
+	int ifindex;
+	struct arp_entry arp;
+};
+
+/* Map for trie implementation*/
+struct bpf_map_def SEC("maps") lpm_map = {
+	.type = BPF_MAP_TYPE_LPM_TRIE,
+	.key_size = 8,
+	.value_size = sizeof(struct trie_value),
+	.max_entries = 50,
+	.map_flags = BPF_F_NO_PREALLOC,
+};
+
+/* Map for counter*/
+struct bpf_map_def SEC("maps") rxcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = 256,
+};
+
+/* Map for ARP table*/
+struct bpf_map_def SEC("maps") arp_table = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(int),
+	.value_size = sizeof(long),
+	.max_entries = 50,
+};
+
+/* Map to keep the exact match entries in the route table*/
+struct bpf_map_def SEC("maps") exact_match = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct direct_map),
+	.max_entries = 50,
+};
+
+/* Function to set source and destination mac of the packet */
+static inline void set_src_dst_mac(void *data, void *src, void *dst)
+{
+	unsigned short *p      = data;
+	unsigned short *dest   = dst;
+	unsigned short *source = src;
+
+	__builtin_memcpy(p, dest, 3);
+	__builtin_memcpy(p + 3, source, 3);
+}
+
+/* Parse IPV4 packet to get SRC, DST IP and protocol */
+static inline int parse_ipv4(void *data, u64 nh_off, void *data_end,
+			     __be32 *src, __be32 *dest)
+{
+	struct iphdr *iph = data + nh_off;
+
+	if (iph + 1 > data_end)
+		return 0;
+	*src = (__be32)iph->saddr;
+	*dest = (__be32)iph->daddr;
+	return iph->protocol;
+}
+
+SEC("xdp3")
+int xdp_prog3(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	int rc = XDP_DROP, forward_to;
+	long *value;
+	struct trie_value *prefix_value;
+	long *dest_mac = NULL, *src_mac = NULL;
+	u16 h_proto;
+	u64 nh_off;
+	u32 ipproto;
+	union key_4 key4;
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		return rc;
+
+	h_proto = eth->h_proto;
+
+	if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
+		struct vlan_hdr *vhdr;
+
+		vhdr = data + nh_off;
+		nh_off += sizeof(struct vlan_hdr);
+		if (data + nh_off > data_end)
+			return rc;
+		h_proto = vhdr->h_vlan_encapsulated_proto;
+	}
+	if (h_proto == htons(ETH_P_ARP)) {
+		return XDP_PASS;
+	} else if (h_proto == htons(ETH_P_IP)) {
+		int src_ip = 0, dest_ip = 0;
+		struct direct_map *direct_entry;
+
+		ipproto = parse_ipv4(data, nh_off, data_end, &src_ip, &dest_ip);
+		direct_entry = (struct direct_map *)bpf_map_lookup_elem
+			(&exact_match, &dest_ip);
+		/*check for exact match, this would give a faster lookup*/
+		if (direct_entry && direct_entry->mac && direct_entry->arp.mac) {
+			src_mac = &direct_entry->mac;
+			dest_mac = &direct_entry->arp.mac;
+			forward_to = direct_entry->ifindex;
+		} else {
+			/*Look up in the trie for lpm*/
+			key4.b32[0] = 32;
+			key4.b8[4] = dest_ip % 0x100;
+			key4.b8[5] = (dest_ip >> 8) % 0x100;
+			key4.b8[6] = (dest_ip >> 16) % 0x100;
+			key4.b8[7] = (dest_ip >> 24) % 0x100;
+			prefix_value = ((struct trie_value *)bpf_map_lookup_elem
+					(&lpm_map, &key4));
+			if (!prefix_value) {
+				return XDP_DROP;
+			} else {
+				src_mac = &prefix_value->value;
+				if (src_mac) {
+					dest_mac = (long *)bpf_map_lookup_elem
+						(&arp_table, &dest_ip);
+					if (!dest_mac) {
+						if (prefix_value->gw) {
+							dest_ip = *(__be32 *)&prefix_value->gw;
+							dest_mac = (long *)bpf_map_lookup_elem(&arp_table, &dest_ip);
+						} else {
+							return XDP_DROP;
+						}
+					}
+					forward_to = prefix_value->ifindex;
+				} else {
+					return XDP_DROP;
+				}
+			}
+		}
+	} else {
+		ipproto = 0;
+	}
+	if (src_mac && dest_mac) {
+		set_src_dst_mac(data, src_mac, dest_mac);
+		value = bpf_map_lookup_elem(&rxcnt, &ipproto);
+		if (value)
+			*value += 1;
+		return  bpf_redirect(forward_to, 0);
+	}
+	return rc;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
new file mode 100644
index 0000000..32cc6b9
--- /dev/null
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -0,0 +1,655 @@
+/*
+ * Copyright (C) 2017 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include "bpf_load.h"
+#include "libbpf.h"
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include "bpf_util.h"
+
+int sock, sock_arp, flags = 0;
+char buf[8192];
+static int total_ifindex;
+int *ifindex_list;
+
+static int get_route_table(int rtm_family);
+static void int_exit(int sig)
+{
+	int i = 0;
+
+	for (i = 0; i < total_ifindex; i++)
+		set_link_xdp_fd(ifindex_list[i], -1, flags);
+	exit(0);
+}
+
+static void close_and_exit(int sig)
+{
+	int i = 0;
+
+	close(sock);
+	close(sock_arp);
+
+	for (i = 0; i < total_ifindex; i++)
+		set_link_xdp_fd(ifindex_list[i], -1, flags);
+	exit(0);
+}
+
+/* Get the mac address of the interface given interface name */
+static long *getmac(char *iface)
+{
+	int fd;
+	struct ifreq ifr;
+	long *mac = NULL;
+
+	fd = socket(AF_INET, SOCK_DGRAM, 0);
+	ifr.ifr_addr.sa_family = AF_INET;
+	strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1);
+	ioctl(fd, SIOCGIFHWADDR, &ifr);
+	mac = (long *)ifr.ifr_hwaddr.sa_data;
+	close(fd);
+	return mac;
+}
+
+static int recv_msg(struct sockaddr_nl sock_addr, int sock)
+{
+	char *buf_ptr;
+	struct nlmsghdr *nh;
+	int len, nll = 0;
+
+	buf_ptr = buf;
+	while (1) {
+		len = recv(sock, buf_ptr, sizeof(buf) - nll, 0);
+		if (len < 0)
+			return len;
+
+		nh = (struct nlmsghdr *)buf_ptr;
+
+		if (nh->nlmsg_type == NLMSG_DONE)
+			break;
+		buf_ptr += len;
+		nll += len;
+		if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH)
+			break;
+
+		if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE)
+			break;
+	}
+	return nll;
+}
+
+/* Function to parse the route entry returned by netlink
+ * Updates the route entry related map entries
+ */
+static void read_route(struct nlmsghdr *nh, int nll)
+{
+	struct route_table {
+		int dst, gw, dst_len, iface, metric;
+		long *mac;
+		char *iface_name;
+	} route;
+	struct arp_table {
+		int dst;
+		long mac;
+	};
+
+	struct direct_map {
+		long mac;
+		int ifindex;
+		struct arp_table arp;
+	} direct_entry;
+	int i;
+	int rtm_family;
+	struct bpf_lpm_trie_key *prefix_key;
+	char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
+	struct rtmsg *rt_msg;
+	int rtl;
+	struct rtattr *rt_attr;
+
+	if (nh->nlmsg_type == RTM_DELROUTE)
+		printf("DELETING Route entry\n");
+	else if (nh->nlmsg_type == RTM_GETROUTE)
+		printf("READING Route entry\n");
+	else if (nh->nlmsg_type == RTM_NEWROUTE)
+		printf("NEW Route entry\n");
+	else
+		printf("%d\n", nh->nlmsg_type);
+
+	bzero(&route, sizeof(route));
+	printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n");
+	for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
+		rt_msg = (struct rtmsg *)NLMSG_DATA(nh);
+		rtm_family = rt_msg->rtm_family;
+		if (rtm_family == AF_INET)
+			if (rt_msg->rtm_table != RT_TABLE_MAIN)
+				continue;
+		rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
+		rtl = RTM_PAYLOAD(nh);
+
+		for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
+			switch (rt_attr->rta_type) {
+			case NDA_DST:
+				sprintf(dsts, "%d",
+					*((int *)RTA_DATA(rt_attr)));
+				break;
+			case RTA_GATEWAY:
+				sprintf(gws, "%d", *((int *)RTA_DATA(rt_attr)));
+				break;
+			case RTA_OIF:
+				sprintf(ifs, "%d", *((int *)RTA_DATA(rt_attr)));
+				break;
+			case RTA_METRICS:
+				sprintf(metrics, "%d",
+					*((int *)RTA_DATA(rt_attr)));
+			default:
+				break;
+			}
+		}
+		sprintf(dsts_len, "%d", rt_msg->rtm_dst_len);
+
+		route.dst = atoi(dsts);
+		route.dst_len = atoi(dsts_len);
+		route.gw = atoi(gws);
+		route.iface = atoi(ifs);
+		route.metric = atoi(metrics);
+		route.iface_name = alloca(sizeof(char *) * IFNAMSIZ);
+		route.iface_name = if_indextoname(route.iface, route.iface_name);
+		route.mac = getmac(route.iface_name);
+		if (rtm_family == AF_INET) {
+			struct trie_value {
+				__u8 prefix[4];
+				long value;
+				int gw;
+				int ifindex;
+				int metric;
+			} *prefix_value;
+
+			prefix_key = alloca(sizeof(*prefix_key) + 3);
+			prefix_value = alloca(sizeof(*prefix_value));
+
+			prefix_key->prefixlen = 32;
+			prefix_key->prefixlen = route.dst_len;
+			direct_entry.mac = *route.mac & 0xffffffffffff;
+			direct_entry.ifindex = route.iface;
+			direct_entry.arp.mac = 0;
+			direct_entry.arp.dst = 0;
+			if (route.dst_len == 32) {
+				if (nh->nlmsg_type == RTM_DELROUTE) {
+					assert(bpf_map_delete_elem(
+								   map_fd[3],
+								   &route.dst
+								   ) == 0);
+				} else {
+					if (bpf_map_lookup_elem(map_fd[2],
+								&route.dst,
+								&direct_entry.arp.mac
+								) == 0)
+						direct_entry.arp.dst = route.dst;
+
+					assert(bpf_map_update_elem(map_fd[3],
+								   &route.dst,
+								   &direct_entry,
+								   0) == 0);
+				}
+			}
+			for (i = 0; i < 4; i++)
+				prefix_key->data[i] =
+					(route.dst >> i * 8) % 0x100;
+			printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n",
+			       (int)prefix_key->data[0], (int)prefix_key->data[1],
+			       (int)prefix_key->data[2], (int)prefix_key->data[3],
+			       route.gw, route.dst_len, route.metric,
+			       route.iface_name);
+			if (bpf_map_lookup_elem(map_fd[0], prefix_key,
+						prefix_value) < 0) {
+				for (i = 0; i < 4; i++)
+					prefix_value->prefix[i] = prefix_key->data[i];
+				prefix_value->value = *route.mac & 0xffffffffffff;
+				prefix_value->ifindex = route.iface;
+				prefix_value->gw = route.gw;
+				prefix_value->metric = route.metric;
+
+				assert(bpf_map_update_elem(map_fd[0],
+							   prefix_key,
+							   prefix_value, 0
+							   ) == 0);
+			} else {
+				if (nh->nlmsg_type == RTM_DELROUTE) {
+					printf("deleting entry\n");
+					printf("prefix key=%d.%d.%d.%d/%d",
+					       prefix_key->data[0],
+					       prefix_key->data[1],
+					       prefix_key->data[2],
+					       prefix_key->data[3],
+					       prefix_key->prefixlen);
+					assert(bpf_map_delete_elem(map_fd[0],
+								   prefix_key
+								   ) == 0);
+					/* Rereading the route table to check if
+					 * there is an entry with the same
+					 * prefix but a different metric as the
+					 * deleted enty.
+					 */
+					get_route_table(AF_INET);
+				} else if (prefix_key->data[0] ==
+					   prefix_value->prefix[0] &&
+					   prefix_key->data[1] ==
+					   prefix_value->prefix[1] &&
+					   prefix_key->data[2] ==
+					   prefix_value->prefix[2] &&
+					   prefix_key->data[3] ==
+					   prefix_value->prefix[3] &&
+					   route.metric >= prefix_value->metric) {
+					continue;
+				} else {
+					for (i = 0; i < 4; i++)
+						prefix_value->prefix[i] =
+							prefix_key->data[i];
+					prefix_value->value =
+						*route.mac & 0xffffffffffff;
+					prefix_value->ifindex = route.iface;
+					prefix_value->gw = route.gw;
+					prefix_value->metric = route.metric;
+					assert(bpf_map_update_elem(
+								   map_fd[0],
+								   prefix_key,
+								   prefix_value,
+								   0) == 0);
+				}
+			}
+		}
+		bzero(&route, sizeof(route));
+		bzero(dsts, sizeof(dsts));
+		bzero(dsts_len, sizeof(dsts_len));
+		bzero(gws, sizeof(gws));
+		bzero(ifs, sizeof(ifs));
+		bzero(&route, sizeof(route));
+	}
+}
+
+/* Function to read the existing route table  when the process is launched*/
+static int get_route_table(int rtm_family)
+{
+	struct {
+		struct nlmsghdr nl;
+		struct rtmsg rt;
+		char buf[8192];
+	} req;
+
+	int sock, seq = 0;
+	struct sockaddr_nl sa;
+	struct msghdr msg;
+	struct iovec iov;
+	int ret = 0;
+	struct nlmsghdr *nh;
+	int nll;
+
+	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock < 0) {
+		printf("open netlink socket: %s\n", strerror(errno));
+		return -1;
+	}
+	bzero(&sa, sizeof(sa));
+	sa.nl_family = AF_NETLINK;
+	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		printf("bind to netlink: %s\n", strerror(errno));
+		ret = -1;
+		goto cleanup;
+	}
+	bzero(&req, sizeof(req));
+	req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+	req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+	req.nl.nlmsg_type = RTM_GETROUTE;
+
+	req.rt.rtm_family = rtm_family;
+	req.rt.rtm_table = RT_TABLE_MAIN;
+	req.nl.nlmsg_pid = 0;
+	req.nl.nlmsg_seq = ++seq;
+	bzero(&msg, sizeof(msg));
+	iov.iov_base = (void *)&req.nl;
+	iov.iov_len = req.nl.nlmsg_len;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	ret = sendmsg(sock, &msg, 0);
+	if (ret < 0) {
+		printf("send to netlink: %s\n", strerror(errno));
+		ret = -1;
+		goto cleanup;
+	}
+	bzero(buf, sizeof(buf));
+	nll = recv_msg(sa, sock);
+	if (nll < 0) {
+		printf("recv from netlink: %s\n", strerror(nll));
+		ret = -1;
+		goto cleanup;
+	}
+	nh = (struct nlmsghdr *)buf;
+	read_route(nh, nll);
+cleanup:
+	close(sock);
+	return ret;
+}
+
+/* Function to parse the arp entry returned by netlink
+ * Updates the arp entry related map entries
+ */
+static void read_arp(struct nlmsghdr *nh, int nll)
+{
+	struct arp_table {
+		int dst;
+		long mac;
+	} arp_entry;
+	struct direct_map {
+		long mac;
+		int ifindex;
+		struct arp_table arp;
+	} direct_entry;
+
+	char dsts[24], mac[24];
+	struct ndmsg *rt_msg;
+	int rtl, i = 0, ndm_family;
+	struct rtattr *rt_attr;
+
+	if (nh->nlmsg_type == RTM_GETNEIGH)
+		printf("READING arp entry\n");
+	printf("Address\tHwAddress\n");
+	for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
+		i++;
+		rt_msg = (struct ndmsg *)NLMSG_DATA(nh);
+		rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
+		ndm_family = rt_msg->ndm_family;
+		rtl = RTM_PAYLOAD(nh);
+		for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
+			switch (rt_attr->rta_type) {
+			case NDA_DST:
+				sprintf(dsts, "%d",
+					*((int *)RTA_DATA(rt_attr)));
+				break;
+			case NDA_LLADDR:
+				sprintf(mac, "%ld",
+					*((long *)RTA_DATA(rt_attr)));
+				break;
+			default:
+				break;
+			}
+		}
+		arp_entry.dst = atoi(dsts);
+		arp_entry.mac = atol(mac);
+		printf("%x\t\t%lx\n", arp_entry.dst, arp_entry.mac);
+		if (ndm_family == AF_INET) {
+			if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst,
+						&direct_entry) == 0) {
+				if (nh->nlmsg_type == RTM_DELNEIGH) {
+					direct_entry.arp.dst = 0;
+					direct_entry.arp.mac = 0;
+				} else if (nh->nlmsg_type == RTM_NEWNEIGH) {
+					direct_entry.arp.dst = arp_entry.dst;
+					direct_entry.arp.mac = arp_entry.mac;
+				}
+				assert(bpf_map_update_elem(map_fd[3],
+							   &arp_entry.dst,
+							   &direct_entry, 0
+							   ) == 0);
+				bzero(&direct_entry, sizeof(direct_entry));
+			}
+			if (nh->nlmsg_type == RTM_DELNEIGH) {
+				assert(bpf_map_delete_elem(map_fd[2],
+							   &arp_entry.dst) == 0);
+			} else if (nh->nlmsg_type == RTM_NEWNEIGH) {
+				assert(bpf_map_update_elem(map_fd[2],
+							   &arp_entry.dst,
+							   &arp_entry.mac, 0
+							   ) == 0);
+			}
+		}
+		bzero(&arp_entry, sizeof(arp_entry));
+		bzero(dsts, sizeof(dsts));
+	}
+}
+
+/* Function to read the existing arp table  when the process is launched*/
+static int get_arp_table(int rtm_family)
+{
+	struct {
+		struct nlmsghdr nl;
+		struct ndmsg rt;
+		char buf[8192];
+	} req;
+
+	int sock, seq = 0;
+	struct sockaddr_nl sa;
+	struct msghdr msg;
+	struct iovec iov;
+	int ret = 0;
+	struct nlmsghdr *nh;
+	int nll;
+
+	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock < 0) {
+		printf("open netlink socket: %s\n", strerror(errno));
+		return -1;
+	}
+	bzero(&sa, sizeof(sa));
+	sa.nl_family = AF_NETLINK;
+	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		printf("bind to netlink: %s\n", strerror(errno));
+		ret = -1;
+		goto cleanup;
+	}
+	bzero(&req, sizeof(req));
+	req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+	req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+	req.nl.nlmsg_type = RTM_GETNEIGH;
+	req.rt.ndm_state = NUD_REACHABLE;
+	req.rt.ndm_family = rtm_family;
+	req.nl.nlmsg_pid = 0;
+	req.nl.nlmsg_seq = ++seq;
+	bzero(&msg, sizeof(msg));
+	iov.iov_base = (void *)&req.nl;
+	iov.iov_len = req.nl.nlmsg_len;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	ret = sendmsg(sock, &msg, 0);
+	if (ret < 0) {
+		printf("send to netlink: %s\n", strerror(errno));
+		ret = -1;
+		goto cleanup;
+	}
+	bzero(buf, sizeof(buf));
+	nll = recv_msg(sa, sock);
+	if (nll < 0) {
+		printf("recv from netlink: %s\n", strerror(nll));
+		ret = -1;
+		goto cleanup;
+	}
+	nh = (struct nlmsghdr *)buf;
+	read_arp(nh, nll);
+cleanup:
+	close(sock);
+	return ret;
+}
+
+/* Function to keep track and update changes in route and arp table
+ * Give regular statistics of packets forwarded
+ */
+static int monitor_route(void)
+{
+	struct sockaddr_nl la, lr;
+	struct nlmsghdr *nh;
+	int nll, ret = 0;
+	const unsigned int nr_keys = 256;
+	int interval = 5;
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	__u64 values[nr_cpus], prev[nr_keys][nr_cpus];
+	__u32 key;
+	int i;
+	struct pollfd fds_route, fds_arp;
+
+	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock < 0) {
+		printf("open netlink socket: %s\n", strerror(errno));
+		return -1;
+	}
+
+	fcntl(sock, F_SETFL, O_NONBLOCK);
+	bzero(&lr, sizeof(lr));
+	lr.nl_family = AF_NETLINK;
+	lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
+	if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) {
+		printf("bind to netlink: %s\n", strerror(errno));
+		ret = -1;
+		goto cleanup;
+	}
+	fds_route.fd = sock;
+	fds_route.events = POLL_IN;
+
+	sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock_arp < 0) {
+		printf("open netlink socket: %s\n", strerror(errno));
+		return -1;
+	}
+
+	fcntl(sock_arp, F_SETFL, O_NONBLOCK);
+	bzero(&la, sizeof(la));
+	la.nl_family = AF_NETLINK;
+	la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
+	if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) {
+		printf("bind to netlink: %s\n", strerror(errno));
+		ret = -1;
+		goto cleanup;
+	}
+	fds_arp.fd = sock_arp;
+	fds_arp.events = POLL_IN;
+
+	memset(prev, 0, sizeof(prev));
+	do {
+		signal(SIGINT, close_and_exit);
+		signal(SIGTERM, close_and_exit);
+
+		sleep(interval);
+		for (key = 0; key < nr_keys; key++) {
+			__u64 sum = 0;
+
+			assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
+			for (i = 0; i < nr_cpus; i++)
+				sum += (values[i] - prev[key][i]);
+			if (sum)
+				printf("proto %u: %10llu pkt/s\n",
+				       key, sum / interval);
+			memcpy(prev[key], values, sizeof(values));
+		}
+
+		bzero(buf, sizeof(buf));
+		if (poll(&fds_route, 1, 3) == POLL_IN) {
+			nll = recv_msg(lr, sock);
+			if (nll < 0) {
+				printf("recv from netlink: %s\n", strerror(nll));
+				ret = -1;
+				goto cleanup;
+			}
+
+			nh = (struct nlmsghdr *)buf;
+			printf("Routing table updated.\n");
+			read_route(nh, nll);
+		}
+		bzero(buf, sizeof(buf));
+		if (poll(&fds_arp, 1, 3) == POLL_IN) {
+			nll = recv_msg(la, sock_arp);
+			if (nll < 0) {
+				printf("recv from netlink: %s\n", strerror(nll));
+				ret = -1;
+				goto cleanup;
+			}
+
+			nh = (struct nlmsghdr *)buf;
+			read_arp(nh, nll);
+		}
+
+	} while (1);
+cleanup:
+	close(sock);
+	return ret;
+}
+
+int main(int ac, char **argv)
+{
+	char filename[256];
+	int i = 1;
+	char **ifname_list;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	if (ac < 2) {
+		printf("usage: %s [-S] Interface name list\n", argv[0]);
+		return 1;
+	}
+	if (!strcmp(argv[1], "-S")) {
+		flags = XDP_FLAGS_SKB_MODE;
+		total_ifindex = ac - 2;
+		ifname_list = (argv + 2);
+	} else {
+		flags = 0;
+		total_ifindex = ac - 1;
+		ifname_list = (argv + 1);
+	}
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+	printf("\n**************loading bpf file*********************\n\n\n");
+	if (!prog_fd[0]) {
+		printf("load_bpf_file: %s\n", strerror(errno));
+		return 1;
+	}
+	ifindex_list = (int *)malloc(total_ifindex * sizeof(int *));
+	for (i = 0; i < total_ifindex; i++) {
+		ifindex_list[i] = if_nametoindex(ifname_list[i]);
+		if (!ifindex_list[i]) {
+			printf("Couldn't translate interface name: %s", strerror(errno));
+			return 1;
+		}
+	}
+	for (i = 0; i < total_ifindex; i++) {
+		if (set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) {
+			printf("link set xdp fd failed\n");
+			int recovery_index = i;
+
+			for (i = 0; i < recovery_index; i++)
+				set_link_xdp_fd(ifindex_list[i], -1, flags);
+
+			return 1;
+		}
+		printf("Attached to %d\n", ifindex_list[i]);
+	}
+	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
+
+	printf("*******************ROUTE TABLE*************************\n\n\n");
+	get_route_table(AF_INET);
+	printf("*******************ARP TABLE***************************\n\n\n");
+	get_arp_table(AF_INET);
+	if (monitor_route() < 0) {
+		printf("Error in receiving route update");
+		return 1;
+	}
+
+	return 0;
+}
-- 
1.7.1

^ permalink raw reply related

* [patch net-next 0/4] net: sched: get rid of cls_flower->egress_dev
From: Jiri Pirko @ 2017-10-10  7:30 UTC (permalink / raw)
  To: netdev; +Cc: davem, jhs, xiyou.wangcong, saeedm, matanb, leonro, mlxsw

From: Jiri Pirko <jiri@mellanox.com>

Introduction of cls_flower->egress_dev was a workaround. Turned out
to be a bit ugly hack. So replace it with more generic and reusable
infrastructure.

This is a dependency of shared block introduction that will be send as
a follow-up patchsets group.

Jiri Pirko (4):
  net: sched: make tc_action_ops->get_dev return dev and avoid passing
    net
  net: sched: introduce per-egress action device callbacks
  net: sched: convert cls_flower->egress_dev users to tc_setup_cb_egdev
    infra
  net: sched: remove unused tcf_exts_get_dev helper and
    cls_flower->egress_dev

 drivers/net/ethernet/mellanox/mlx5/core/en.h      |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  |  31 ++--
 include/net/act_api.h                             |  37 +++-
 include/net/pkt_cls.h                             |   6 +-
 include/net/tc_act/tc_mirred.h                    |   1 +
 net/sched/act_api.c                               | 203 ++++++++++++++++++++++
 net/sched/act_mirred.c                            |  13 +-
 net/sched/cls_api.c                               |  35 ++--
 net/sched/cls_flower.c                            |  63 +++----
 10 files changed, 331 insertions(+), 65 deletions(-)

-- 
2.9.5

^ permalink raw reply

* [patch net-next 1/4] net: sched: make tc_action_ops->get_dev return dev and avoid passing net
From: Jiri Pirko @ 2017-10-10  7:30 UTC (permalink / raw)
  To: netdev; +Cc: davem, jhs, xiyou.wangcong, saeedm, matanb, leonro, mlxsw
In-Reply-To: <20171010073016.3682-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Return dev directly, NULL if not possible. That is enough.

Makes no sense to pass struct net * to get_dev op, as there is only one
net possible, the one the action was created in. So just store it in
mirred priv and use directly.

Rename the mirred op callback function.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 include/net/act_api.h          |  3 +--
 include/net/tc_act/tc_mirred.h |  1 +
 net/sched/act_mirred.c         | 13 +++++--------
 net/sched/cls_api.c            |  6 ++----
 4 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index b944e0e..900168a 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -93,8 +93,7 @@ struct tc_action_ops {
 	int     (*walk)(struct net *, struct sk_buff *,
 			struct netlink_callback *, int, const struct tc_action_ops *);
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
-	int	(*get_dev)(const struct tc_action *a, struct net *net,
-			   struct net_device **mirred_dev);
+	struct net_device *(*get_dev)(const struct tc_action *a);
 };
 
 struct tc_action_net {
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 604bc31..21a6565 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -10,6 +10,7 @@ struct tcf_mirred {
 	int			tcfm_ifindex;
 	bool			tcfm_mac_header_xmit;
 	struct net_device __rcu	*tcfm_dev;
+	struct net		*net;
 	struct list_head	tcfm_list;
 };
 #define to_mirred(a) ((struct tcf_mirred *)a)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 416627c..8b3e5938 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -140,6 +140,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	m->tcfm_eaction = parm->eaction;
 	if (dev != NULL) {
 		m->tcfm_ifindex = parm->ifindex;
+		m->net = net;
 		if (ret != ACT_P_CREATED)
 			dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
 		dev_hold(dev);
@@ -313,15 +314,11 @@ static struct notifier_block mirred_device_notifier = {
 	.notifier_call = mirred_device_event,
 };
 
-static int tcf_mirred_device(const struct tc_action *a, struct net *net,
-			     struct net_device **mirred_dev)
+static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
 {
-	int ifindex = tcf_mirred_ifindex(a);
+	struct tcf_mirred *m = to_mirred(a);
 
-	*mirred_dev = __dev_get_by_index(net, ifindex);
-	if (!*mirred_dev)
-		return -EINVAL;
-	return 0;
+	return __dev_get_by_index(m->net, m->tcfm_ifindex);
 }
 
 static struct tc_action_ops act_mirred_ops = {
@@ -336,7 +333,7 @@ static struct tc_action_ops act_mirred_ops = {
 	.walk		=	tcf_mirred_walker,
 	.lookup		=	tcf_mirred_search,
 	.size		=	sizeof(struct tcf_mirred),
-	.get_dev	=	tcf_mirred_device,
+	.get_dev	=	tcf_mirred_get_dev,
 };
 
 static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0b2219a..450873b 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1016,10 +1016,8 @@ int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
 
 	tcf_exts_to_list(exts, &actions);
 	list_for_each_entry(a, &actions, list) {
-		if (a->ops->get_dev) {
-			a->ops->get_dev(a, dev_net(dev), hw_dev);
-			break;
-		}
+		if (a->ops->get_dev)
+			*hw_dev = a->ops->get_dev(a);
 	}
 	if (*hw_dev)
 		return 0;
-- 
2.9.5

^ permalink raw reply related

* [patch net-next 2/4] net: sched: introduce per-egress action device callbacks
From: Jiri Pirko @ 2017-10-10  7:30 UTC (permalink / raw)
  To: netdev; +Cc: davem, jhs, xiyou.wangcong, saeedm, matanb, leonro, mlxsw
In-Reply-To: <20171010073016.3682-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Introduce infrastructure that allows drivers to register callbacks that
are called whenever tc would offload inserted rule and specified device
acts as tc action egress device.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 include/net/act_api.h |  34 +++++++++
 include/net/pkt_cls.h |   2 +
 net/sched/act_api.c   | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/sched/cls_api.c   |  30 ++++++++
 4 files changed, 269 insertions(+)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 900168a..f5e8c90 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -174,4 +174,38 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
 #endif
 }
 
+typedef int tc_setup_cb_t(enum tc_setup_type type,
+			  void *type_data, void *cb_priv);
+
+#ifdef CONFIG_NET_CLS_ACT
+int tc_setup_cb_egdev_register(const struct net_device *dev,
+			       tc_setup_cb_t *cb, void *cb_priv);
+void tc_setup_cb_egdev_unregister(const struct net_device *dev,
+				  tc_setup_cb_t *cb, void *cb_priv);
+int tc_setup_cb_egdev_call(const struct net_device *dev,
+			   enum tc_setup_type type, void *type_data,
+			   bool err_stop);
+#else
+static inline
+int tc_setup_cb_egdev_register(const struct net_device *dev,
+			       tc_setup_cb_t *cb, void *cb_priv)
+{
+	return 0;
+}
+
+static inline
+void tc_setup_cb_egdev_unregister(const struct net_device *dev,
+				  tc_setup_cb_t *cb, void *cb_priv)
+{
+}
+
+static inline
+int tc_setup_cb_egdev_call(const struct net_device *dev,
+			   enum tc_setup_type type, void *type_data,
+			   bool err_stop)
+{
+	return 0;
+}
+#endif
+
 #endif
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e80edd8..6f8149c 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -206,6 +206,8 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
 		     struct net_device **hw_dev);
+int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type,
+			   void *type_data, bool err_stop);
 
 /**
  * struct tcf_pkt_info - packet information
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index da6fa82..92a9efb 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -21,6 +21,8 @@
 #include <linux/kmod.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/rhashtable.h>
+#include <linux/list.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/sch_generic.h>
@@ -1249,8 +1251,209 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+struct tcf_action_net {
+	struct rhashtable egdev_ht;
+};
+
+static unsigned int tcf_action_net_id;
+
+struct tcf_action_egdev_cb {
+	struct list_head list;
+	tc_setup_cb_t *cb;
+	void *cb_priv;
+};
+
+struct tcf_action_egdev {
+	struct rhash_head ht_node;
+	const struct net_device *dev;
+	unsigned int refcnt;
+	struct list_head cb_list;
+};
+
+static const struct rhashtable_params tcf_action_egdev_ht_params = {
+	.key_offset = offsetof(struct tcf_action_egdev, dev),
+	.head_offset = offsetof(struct tcf_action_egdev, ht_node),
+	.key_len = sizeof(const struct net_device *),
+};
+
+static struct tcf_action_egdev *
+tcf_action_egdev_lookup(const struct net_device *dev)
+{
+	struct net *net = dev_net(dev);
+	struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+	return rhashtable_lookup_fast(&tan->egdev_ht, &dev,
+				      tcf_action_egdev_ht_params);
+}
+
+static struct tcf_action_egdev *
+tcf_action_egdev_get(const struct net_device *dev)
+{
+	struct tcf_action_egdev *egdev;
+	struct tcf_action_net *tan;
+
+	egdev = tcf_action_egdev_lookup(dev);
+	if (egdev)
+		goto inc_ref;
+
+	egdev = kzalloc(sizeof(*egdev), GFP_KERNEL);
+	if (!egdev)
+		return NULL;
+	INIT_LIST_HEAD(&egdev->cb_list);
+	tan = net_generic(dev_net(dev), tcf_action_net_id);
+	rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node,
+			       tcf_action_egdev_ht_params);
+
+inc_ref:
+	egdev->refcnt++;
+	return egdev;
+}
+
+static void tcf_action_egdev_put(struct tcf_action_egdev *egdev)
+{
+	struct tcf_action_net *tan;
+
+	if (--egdev->refcnt)
+		return;
+	tan = net_generic(dev_net(egdev->dev), tcf_action_net_id);
+	rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node,
+			       tcf_action_egdev_ht_params);
+	kfree(egdev);
+}
+
+static struct tcf_action_egdev_cb *
+tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev,
+			   tc_setup_cb_t *cb, void *cb_priv)
+{
+	struct tcf_action_egdev_cb *egdev_cb;
+
+	list_for_each_entry(egdev_cb, &egdev->cb_list, list)
+		if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv)
+			return egdev_cb;
+	return NULL;
+}
+
+static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev,
+				    enum tc_setup_type type,
+				    void *type_data, bool err_stop)
+{
+	struct tcf_action_egdev_cb *egdev_cb;
+	int ok_count = 0;
+	int err;
+
+	list_for_each_entry(egdev_cb, &egdev->cb_list, list) {
+		err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv);
+		if (err) {
+			if (err_stop)
+				return err;
+		} else {
+			ok_count++;
+		}
+	}
+	return ok_count;
+}
+
+static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev,
+				   tc_setup_cb_t *cb, void *cb_priv)
+{
+	struct tcf_action_egdev_cb *egdev_cb;
+
+	egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
+	if (WARN_ON(egdev_cb))
+		return -EEXIST;
+	egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL);
+	if (!egdev_cb)
+		return -ENOMEM;
+	egdev_cb->cb = cb;
+	egdev_cb->cb_priv = cb_priv;
+	list_add(&egdev_cb->list, &egdev->cb_list);
+	return 0;
+}
+
+static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev,
+				    tc_setup_cb_t *cb, void *cb_priv)
+{
+	struct tcf_action_egdev_cb *egdev_cb;
+
+	egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
+	if (WARN_ON(!egdev_cb))
+		return;
+	list_del(&egdev_cb->list);
+	kfree(egdev_cb);
+}
+
+int tc_setup_cb_egdev_register(const struct net_device *dev,
+			       tc_setup_cb_t *cb, void *cb_priv)
+{
+	struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev);
+	int err;
+
+	if (!egdev)
+		return -ENOMEM;
+	err = tcf_action_egdev_cb_add(egdev, cb, cb_priv);
+	if (err)
+		goto err_cb_add;
+	return 0;
+
+err_cb_add:
+	tcf_action_egdev_put(egdev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register);
+
+void tc_setup_cb_egdev_unregister(const struct net_device *dev,
+				  tc_setup_cb_t *cb, void *cb_priv)
+{
+	struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
+
+	if (WARN_ON(!egdev))
+		return;
+	tcf_action_egdev_cb_del(egdev, cb, cb_priv);
+	tcf_action_egdev_put(egdev);
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister);
+
+int tc_setup_cb_egdev_call(const struct net_device *dev,
+			   enum tc_setup_type type, void *type_data,
+			   bool err_stop)
+{
+	struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
+
+	if (!egdev)
+		return 0;
+	return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop);
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call);
+
+static __net_init int tcf_action_net_init(struct net *net)
+{
+	struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+	return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params);
+}
+
+static void __net_exit tcf_action_net_exit(struct net *net)
+{
+	struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+	rhashtable_destroy(&tan->egdev_ht);
+}
+
+static struct pernet_operations tcf_action_net_ops = {
+	.init = tcf_action_net_init,
+	.exit = tcf_action_net_exit,
+	.id = &tcf_action_net_id,
+	.size = sizeof(struct tcf_action_net),
+};
+
 static int __init tc_action_init(void)
 {
+	int err;
+
+	err = register_pernet_subsys(&tcf_action_net_ops);
+	if (err)
+		return err;
+
 	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 450873b..99f9432 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1026,6 +1026,36 @@ int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
 }
 EXPORT_SYMBOL(tcf_exts_get_dev);
 
+int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type,
+			   void *type_data, bool err_stop)
+{
+	int ok_count = 0;
+#ifdef CONFIG_NET_CLS_ACT
+	const struct tc_action *a;
+	struct net_device *dev;
+	LIST_HEAD(actions);
+	int ret;
+
+	if (!tcf_exts_has_actions(exts))
+		return 0;
+
+	tcf_exts_to_list(exts, &actions);
+	list_for_each_entry(a, &actions, list) {
+		if (!a->ops->get_dev)
+			continue;
+		dev = a->ops->get_dev(a);
+		if (!dev || !tc_can_offload(dev))
+			continue;
+		ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
+		if (ret < 0)
+			return ret;
+		ok_count += ret;
+	}
+#endif
+	return ok_count;
+}
+EXPORT_SYMBOL(tcf_exts_egdev_cb_call);
+
 static int __init tc_filter_init(void)
 {
 	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
-- 
2.9.5

^ permalink raw reply related

* [patch net-next 3/4] net: sched: convert cls_flower->egress_dev users to tc_setup_cb_egdev infra
From: Jiri Pirko @ 2017-10-10  7:30 UTC (permalink / raw)
  To: netdev; +Cc: davem, jhs, xiyou.wangcong, saeedm, matanb, leonro, mlxsw
In-Reply-To: <20171010073016.3682-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

The only user of cls_flower->egress_dev is mlx5. So do the conversion
there alongside with the code originating the call in cls_flower
function fl_hw_replace_filter to the newly introduced egress device
callback infrastucture.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |  3 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  | 31 +++++++----
 include/net/pkt_cls.h                             |  5 +-
 net/sched/cls_api.c                               | 13 +++--
 net/sched/cls_flower.c                            | 63 ++++++++++++-----------
 6 files changed, 73 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index cc13d3d..5ec6d3e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1081,6 +1081,9 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
 int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
 			       struct ethtool_flash *flash);
 
+int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		   void *type_data);
+
 /* mlx5e generic netdev management API */
 struct net_device*
 mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index cc11bbb..2a32102 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3108,8 +3108,8 @@ static int mlx5e_setup_tc_cls_flower(struct net_device *dev,
 }
 #endif
 
-static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			  void *type_data)
+int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		   void *type_data)
 {
 	switch (type) {
 #ifdef CONFIG_MLX5_ESWITCH
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 45e03c4..765fc74 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -34,6 +34,7 @@
 #include <linux/mlx5/fs.h>
 #include <net/switchdev.h>
 #include <net/pkt_cls.h>
+#include <net/act_api.h>
 #include <net/netevent.h>
 #include <net/arp.h>
 
@@ -667,14 +668,6 @@ mlx5e_rep_setup_tc_cls_flower(struct net_device *dev,
 	    cls_flower->common.chain_index)
 		return -EOPNOTSUPP;
 
-	if (cls_flower->egress_dev) {
-		struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
-		dev = mlx5_eswitch_get_uplink_netdev(esw);
-		return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
-						     cls_flower);
-	}
-
 	switch (cls_flower->command) {
 	case TC_CLSFLOWER_REPLACE:
 		return mlx5e_configure_flower(priv, cls_flower);
@@ -698,6 +691,14 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	}
 }
 
+static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
+				 void *cb_priv)
+{
+	struct net_device *dev = cb_priv;
+
+	return mlx5e_setup_tc(dev, type, type_data);
+}
+
 bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -1017,15 +1018,24 @@ mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
 		goto err_detach_netdev;
 	}
 
+	err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb,
+					 mlx5_eswitch_get_uplink_netdev(esw));
+	if (err)
+		goto err_neigh_cleanup;
+
 	err = register_netdev(netdev);
 	if (err) {
 		pr_warn("Failed to register representor netdev for vport %d\n",
 			rep->vport);
-		goto err_neigh_cleanup;
+		goto err_egdev_cleanup;
 	}
 
 	return 0;
 
+err_egdev_cleanup:
+	tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb,
+				     mlx5_eswitch_get_uplink_netdev(esw));
+
 err_neigh_cleanup:
 	mlx5e_rep_neigh_cleanup(rpriv);
 
@@ -1047,7 +1057,8 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
 	void *ppriv = priv->ppriv;
 
 	unregister_netdev(rep->netdev);
-
+	tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb,
+				     mlx5_eswitch_get_uplink_netdev(esw));
 	mlx5e_rep_neigh_cleanup(rpriv);
 	mlx5e_detach_netdev(priv);
 	mlx5e_destroy_netdev(priv);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6f8149c..c0bdf5c 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -206,8 +206,6 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
 		     struct net_device **hw_dev);
-int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type,
-			   void *type_data, bool err_stop);
 
 /**
  * struct tcf_pkt_info - packet information
@@ -407,6 +405,9 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 }
 #endif /* CONFIG_NET_CLS_IND */
 
+int tc_setup_cb_call(struct tcf_exts *exts, enum tc_setup_type type,
+		     void *type_data, bool err_stop);
+
 struct tc_cls_common_offload {
 	u32 chain_index;
 	__be16 protocol;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 99f9432..51994a2 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1026,8 +1026,9 @@ int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
 }
 EXPORT_SYMBOL(tcf_exts_get_dev);
 
-int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type,
-			   void *type_data, bool err_stop)
+static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
+				       enum tc_setup_type type,
+				       void *type_data, bool err_stop)
 {
 	int ok_count = 0;
 #ifdef CONFIG_NET_CLS_ACT
@@ -1054,7 +1055,13 @@ int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type,
 #endif
 	return ok_count;
 }
-EXPORT_SYMBOL(tcf_exts_egdev_cb_call);
+
+int tc_setup_cb_call(struct tcf_exts *exts, enum tc_setup_type type,
+		     void *type_data, bool err_stop)
+{
+	return tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
+}
+EXPORT_SYMBOL(tc_setup_cb_call);
 
 static int __init tc_filter_init(void)
 {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index db831ac..5b7bb96 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -88,7 +88,6 @@ struct cls_fl_filter {
 	u32 handle;
 	u32 flags;
 	struct rcu_head	rcu;
-	struct net_device *hw_dev;
 };
 
 static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
@@ -201,16 +200,17 @@ static void fl_destroy_filter(struct rcu_head *head)
 static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
 	struct tc_cls_flower_offload cls_flower = {};
-	struct net_device *dev = f->hw_dev;
-
-	if (!tc_can_offload(dev))
-		return;
+	struct net_device *dev = tp->q->dev_queue->dev;
 
 	tc_cls_common_offload_init(&cls_flower.common, tp);
 	cls_flower.command = TC_CLSFLOWER_DESTROY;
 	cls_flower.cookie = (unsigned long) f;
 
-	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower);
+	if (tc_can_offload(dev))
+		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
+					      &cls_flower);
+	tc_setup_cb_call(&f->exts, TC_SETUP_CLSFLOWER,
+			 &cls_flower, false);
 }
 
 static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -220,20 +220,9 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
 	struct tc_cls_flower_offload cls_flower = {};
+	bool skip_sw = tc_skip_sw(f->flags);
 	int err;
 
-	if (!tc_can_offload(dev)) {
-		if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
-		    (f->hw_dev && !tc_can_offload(f->hw_dev))) {
-			f->hw_dev = dev;
-			return tc_skip_sw(f->flags) ? -EINVAL : 0;
-		}
-		dev = f->hw_dev;
-		cls_flower.egress_dev = true;
-	} else {
-		f->hw_dev = dev;
-	}
-
 	tc_cls_common_offload_init(&cls_flower.common, tp);
 	cls_flower.command = TC_CLSFLOWER_REPLACE;
 	cls_flower.cookie = (unsigned long) f;
@@ -242,31 +231,47 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 	cls_flower.key = &f->mkey;
 	cls_flower.exts = &f->exts;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
-					    &cls_flower);
-	if (!err)
-		f->flags |= TCA_CLS_FLAGS_IN_HW;
+	if (tc_can_offload(dev)) {
+		err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
+						    &cls_flower);
+		if (err) {
+			if (skip_sw)
+				return err;
+		} else {
+			f->flags |= TCA_CLS_FLAGS_IN_HW;
+		}
+	}
 
-	if (tc_skip_sw(f->flags))
+	err = tc_setup_cb_call(&f->exts, TC_SETUP_CLSFLOWER,
+			       &cls_flower, skip_sw);
+	if (err < 0) {
+		fl_hw_destroy_filter(tp, f);
 		return err;
+	} else if (err > 0) {
+		f->flags |= TCA_CLS_FLAGS_IN_HW;
+	}
+
+	if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
+		return -EINVAL;
+
 	return 0;
 }
 
 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
 	struct tc_cls_flower_offload cls_flower = {};
-	struct net_device *dev = f->hw_dev;
-
-	if (!tc_can_offload(dev))
-		return;
+	struct net_device *dev = tp->q->dev_queue->dev;
 
 	tc_cls_common_offload_init(&cls_flower.common, tp);
 	cls_flower.command = TC_CLSFLOWER_STATS;
 	cls_flower.cookie = (unsigned long) f;
 	cls_flower.exts = &f->exts;
 
-	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
-				      &cls_flower);
+	if (tc_can_offload(dev))
+		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
+					      &cls_flower);
+	tc_setup_cb_call(&f->exts, TC_SETUP_CLSFLOWER,
+			 &cls_flower, false);
 }
 
 static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
-- 
2.9.5

^ permalink raw reply related

* [patch net-next 4/4] net: sched: remove unused tcf_exts_get_dev helper and cls_flower->egress_dev
From: Jiri Pirko @ 2017-10-10  7:30 UTC (permalink / raw)
  To: netdev; +Cc: davem, jhs, xiyou.wangcong, saeedm, matanb, leonro, mlxsw
In-Reply-To: <20171010073016.3682-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

The helper and the struct field ares no longer used by any code,
so remove them.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 include/net/pkt_cls.h |  3 ---
 net/sched/cls_api.c   | 22 ----------------------
 2 files changed, 25 deletions(-)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index c0bdf5c..f526374 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -204,8 +204,6 @@ void tcf_exts_destroy(struct tcf_exts *exts);
 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
-int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
-		     struct net_device **hw_dev);
 
 /**
  * struct tcf_pkt_info - packet information
@@ -517,7 +515,6 @@ struct tc_cls_flower_offload {
 	struct fl_flow_key *mask;
 	struct fl_flow_key *key;
 	struct tcf_exts *exts;
-	bool egress_dev;
 };
 
 enum tc_matchall_command {
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 51994a2..2977b8a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1004,28 +1004,6 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
 }
 EXPORT_SYMBOL(tcf_exts_dump_stats);
 
-int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
-		     struct net_device **hw_dev)
-{
-#ifdef CONFIG_NET_CLS_ACT
-	const struct tc_action *a;
-	LIST_HEAD(actions);
-
-	if (!tcf_exts_has_actions(exts))
-		return -EINVAL;
-
-	tcf_exts_to_list(exts, &actions);
-	list_for_each_entry(a, &actions, list) {
-		if (a->ops->get_dev)
-			*hw_dev = a->ops->get_dev(a);
-	}
-	if (*hw_dev)
-		return 0;
-#endif
-	return -EOPNOTSUPP;
-}
-EXPORT_SYMBOL(tcf_exts_get_dev);
-
 static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
 				       enum tc_setup_type type,
 				       void *type_data, bool err_stop)
-- 
2.9.5

^ permalink raw reply related

* Re: Patch "udp: perform source validation for mcast early demux" has been added to the 4.13-stable tree
From: Greg KH @ 2017-10-10  7:31 UTC (permalink / raw)
  To: Paolo Abeni, davem; +Cc: stable, stable-commits, netdev
In-Reply-To: <1507619766.2719.14.camel@redhat.com>

On Tue, Oct 10, 2017 at 09:16:06AM +0200, Paolo Abeni wrote:
> On Mon, 2017-10-09 at 10:54 +0200, Greg KH wrote:
> > On Mon, Oct 09, 2017 at 10:02:14AM +0200, Paolo Abeni wrote:
> > > On Mon, 2017-10-09 at 09:57 +0200, Greg KH wrote:
> > > > On Mon, Oct 09, 2017 at 09:37:31AM +0200, Paolo Abeni wrote:
> > > > > On Mon, 2017-10-09 at 09:35 +0200, gregkh@linuxfoundation.org wrote:
> > > > > > This is a note to let you know that I've just added the patch titled
> > > > > > 
> > > > > >     udp: perform source validation for mcast early demux
> > > > > > 
> > > > > > to the 4.13-stable tree which can be found at:
> > > > > >     http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary
> > > > > > 
> > > > > > The filename of the patch is:
> > > > > >      udp-perform-source-validation-for-mcast-early-demux.patch
> > > > > > and it can be found in the queue-4.13 subdirectory.
> > > > > > 
> > > > > > If you, or anyone else, feels it should not be added to the stable tree,
> > > > > > please let <stable@vger.kernel.org> know about it.
> > > > > 
> > > > > Please, keep this one on-hold. It needs a relevant follow-up I'm going
> > > > > to post soon!
> > > > 
> > > > Can I keep the patch before this one in the series "IPv4: early demux
> > > > can return an error code"?  Or should I hold off on both of these for
> > > > now?
> > > 
> > > AFAIK the patch "IPv4: early demux can return an error code" does not
> > > have any issue - it's just useless without this one - I guess it can
> > > stay in.
> > 
> > Ok, I've now moved this one out, thanks for letting me know.
> > 
> > And if you happen to remember when/if a fix for this goes into the tree,
> > that would be most helpful :)
> 
> Sure! the fix just entered Linus's tree: commit 996b44fcef8f ("udp: fix
> bcast packet reception")

Great!  Dave, mind if I take this now, or do you want me to wait for the
next round of networking patches.

thanks,

greg k-h

^ permalink raw reply

* Re: [jkirsher/net-queue PATCH] i40e: Fix memory leak related filter programming status
From: Anders K. Pedersen | Cohaesio @ 2017-10-10  8:01 UTC (permalink / raw)
  To: netdev@vger.kernel.org, intel-wired-lan@lists.osuosl.org,
	alexander.duyck@gmail.com
In-Reply-To: <20171004153903.13100.38029.stgit@localhost.localdomain>

On ons, 2017-10-04 at 08:44 -0700, Alexander Duyck wrote:
> From: Alexander Duyck <alexander.h.duyck@intel.com>
> 
> It looks like we weren't correctly placing the pages from buffers
> that had
> been used to return a filter programming status back on the ring. As
> a
> result they were being overwritten and tracking of the pages was
> lost.
> 
> This change works to correct that by incorporating part of
> i40e_put_rx_buffer into the programming status handler code. As a
> result we
> should now be correctly placing the pages for those buffers on the
> re-allocation list instead of letting them stay in place.
> 
> Fixes: 0e626ff7ccbf ("i40e: Fix support for flow director programming
> status")
> Reported-by: Anders K. Pedersen <akp@cohaesio.com>
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>

We rebooted the router that had memory leaks on 4.12 and 4.13 kernels 
with 4.13.5 plus this patch this morning, and the memory leak is gone,
so

Tested-by: Anders K. Pedersen <akp@cohaesio.com>

Thanks,
Anders

> ---
> 
> I'm submitting this for Jeff's net queue to undergo some additional
> testing
> before being submitted for net or stable to address the memory leak
> isue. The
> testing for this should be pretty straight forward since ATR filters
> seem
> to cause the issue it should be possible to trigger a pretty
> signficant
> amount of memory loss running something like a netperf TCP_CRR test
> for an
> extended period of time which will trigger multiple socket
> creation/destruction.
> 
> Anders, feel free to test this patch. If you need to grab a copy of
> the
> diff instead of trying to work with applying the patch through email
> you
> should be able to find a copy at the following URL shortly after I
> submit
> this to intel-wired-lan:
> 	http://patchwork.ozlabs.org/project/intel-wired-lan/list/
> 
>  drivers/net/ethernet/intel/i40e/i40e_txrx.c |   63 +++++++++++++++
> ------------
>  1 file changed, 36 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> index 94311e3e4f43..d4ae24674a70 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> @@ -1038,6 +1038,32 @@ static bool i40e_set_new_dynamic_itr(struct
> i40e_ring_container *rc)
>  }
>  
>  /**
> + * i40e_reuse_rx_page - page flip buffer and store it back on the
> ring
> + * @rx_ring: rx descriptor ring to store buffers on
> + * @old_buff: donor buffer to have page reused
> + *
> + * Synchronizes page for reuse by the adapter
> + **/
> +static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
> +			       struct i40e_rx_buffer *old_buff)
> +{
> +	struct i40e_rx_buffer *new_buff;
> +	u16 nta = rx_ring->next_to_alloc;
> +
> +	new_buff = &rx_ring->rx_bi[nta];
> +
> +	/* update, and store next to alloc */
> +	nta++;
> +	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
> +
> +	/* transfer page from old buffer to new buffer */
> +	new_buff->dma		= old_buff->dma;
> +	new_buff->page		= old_buff->page;
> +	new_buff->page_offset	= old_buff->page_offset;
> +	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
> +}
> +
> +/**
>   * i40e_rx_is_programming_status - check for programming status
> descriptor
>   * @qw: qword representing status_error_len in CPU ordering
>   *
> @@ -1071,15 +1097,24 @@ static void
> i40e_clean_programming_status(struct i40e_ring *rx_ring,
>  					  union i40e_rx_desc
> *rx_desc,
>  					  u64 qw)
>  {
> -	u32 ntc = rx_ring->next_to_clean + 1;
> +	struct i40e_rx_buffer *rx_buffer;
> +	u32 ntc = rx_ring->next_to_clean;
>  	u8 id;
>  
>  	/* fetch, update, and store next to clean */
> +	rx_buffer = &rx_ring->rx_bi[ntc++];
>  	ntc = (ntc < rx_ring->count) ? ntc : 0;
>  	rx_ring->next_to_clean = ntc;
>  
>  	prefetch(I40E_RX_DESC(rx_ring, ntc));
>  
> +	/* place unused page back on the ring */
> +	i40e_reuse_rx_page(rx_ring, rx_buffer);
> +	rx_ring->rx_stats.page_reuse_count++;
> +
> +	/* clear contents of buffer_info */
> +	rx_buffer->page = NULL;
> +
>  	id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
>  		  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
>  
> @@ -1648,32 +1683,6 @@ static bool i40e_cleanup_headers(struct
> i40e_ring *rx_ring, struct sk_buff *skb,
>  }
>  
>  /**
> - * i40e_reuse_rx_page - page flip buffer and store it back on the
> ring
> - * @rx_ring: rx descriptor ring to store buffers on
> - * @old_buff: donor buffer to have page reused
> - *
> - * Synchronizes page for reuse by the adapter
> - **/
> -static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
> -			       struct i40e_rx_buffer *old_buff)
> -{
> -	struct i40e_rx_buffer *new_buff;
> -	u16 nta = rx_ring->next_to_alloc;
> -
> -	new_buff = &rx_ring->rx_bi[nta];
> -
> -	/* update, and store next to alloc */
> -	nta++;
> -	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
> -
> -	/* transfer page from old buffer to new buffer */
> -	new_buff->dma		= old_buff->dma;
> -	new_buff->page		= old_buff->page;
> -	new_buff->page_offset	= old_buff->page_offset;
> -	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
> -}
> -
> -/**
>   * i40e_page_is_reusable - check if any reuse is possible
>   * @page: page struct to check
>   *
> 

^ permalink raw reply

* Re: [PATCH] net: ftgmac100: Request clock and set speed
From: Benjamin Herrenschmidt @ 2017-10-10  8:14 UTC (permalink / raw)
  To: Joel Stanley, David S . Miller; +Cc: netdev, linux-kernel, Andrew Jeffery
In-Reply-To: <20171010044925.21078-1-joel@jms.id.au>

On Tue, 2017-10-10 at 15:19 +1030, Joel Stanley wrote:
> According to the ASPEED datasheet, gigabit speeds require a clock of
> 100MHz or higher. Other speeds require 25MHz or higher.

Did you try "live" changing by either using ethtool or plugging into
switches/hubs at different speed ?

Also this is aspeed'isms, we should probably keep that under an
is_aspeed test.

My assumption is that we wouldn't bother, and just leave the freq
set based on whether there's a physical gigabit capable connection or
not (ie, real gigabit PHY vs. NC-SI really). But if it can help save a
few milliwatts..

> Signed-off-by: Joel Stanley <joel@jms.id.au>
> ---
>  drivers/net/ethernet/faraday/ftgmac100.c | 22 ++++++++++++++++++++++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
> index 9ed8e4b81530..870ebd857978 100644
> --- a/drivers/net/ethernet/faraday/ftgmac100.c
> +++ b/drivers/net/ethernet/faraday/ftgmac100.c
> @@ -21,6 +21,7 @@
>  
>  #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
>  
> +#include <linux/clk.h>
>  #include <linux/dma-mapping.h>
>  #include <linux/etherdevice.h>
>  #include <linux/ethtool.h>
> @@ -59,6 +60,9 @@
>  /* Min number of tx ring entries before stopping queue */
>  #define TX_THRESHOLD		(MAX_SKB_FRAGS + 1)
>  
> +#define FTGMAC_100MHZ		100000000
> +#define FTGMAC_25MHZ		25000000
> +
>  struct ftgmac100 {
>  	/* Registers */
>  	struct resource *res;
> @@ -96,6 +100,7 @@ struct ftgmac100 {
>  	struct napi_struct napi;
>  	struct work_struct reset_task;
>  	struct mii_bus *mii_bus;
> +	struct clk *clk;
>  
>  	/* Link management */
>  	int cur_speed;
> @@ -142,18 +147,22 @@ static int ftgmac100_reset_mac(struct ftgmac100 *priv, u32 maccr)
>  static int ftgmac100_reset_and_config_mac(struct ftgmac100 *priv)
>  {
>  	u32 maccr = 0;
> +	int freq = 0;
>  
>  	switch (priv->cur_speed) {
>  	case SPEED_10:
>  	case 0: /* no link */
> +		freq = FTGMAC_25MHZ;
>  		break;
>  
>  	case SPEED_100:
>  		maccr |= FTGMAC100_MACCR_FAST_MODE;
> +		freq = FTGMAC_25MHZ;
>  		break;
>  
>  	case SPEED_1000:
>  		maccr |= FTGMAC100_MACCR_GIGA_MODE;
> +		freq = FTGMAC_100MHZ;
>  		break;
>  	default:
>  		netdev_err(priv->netdev, "Unknown speed %d !\n",
> @@ -161,6 +170,9 @@ static int ftgmac100_reset_and_config_mac(struct ftgmac100 *priv)
>  		break;
>  	}
>  
> +	if (freq && priv->clk)
> +		clk_set_rate(priv->clk, freq);
> +
>  	/* (Re)initialize the queue pointers */
>  	priv->rx_pointer = 0;
>  	priv->tx_clean_pointer = 0;
> @@ -1775,6 +1787,13 @@ static int ftgmac100_probe(struct platform_device *pdev)
>  	priv->dev = &pdev->dev;
>  	INIT_WORK(&priv->reset_task, ftgmac100_reset_task);
>  
> +	/* Enable clock if present */
> +	priv->clk = devm_clk_get(&pdev->dev, NULL);
> +	if (!IS_ERR(priv->clk))
> +		clk_prepare_enable(priv->clk);
> +	else
> +		priv->clk = NULL;
> +
>  	/* map io memory */
>  	priv->res = request_mem_region(res->start, resource_size(res),
>  				       dev_name(&pdev->dev));
> @@ -1883,6 +1902,9 @@ static int ftgmac100_remove(struct platform_device *pdev)
>  
>  	unregister_netdev(netdev);
>  
> +	if (priv->clk)
> +		clk_disable_unprepare(priv->clk);
> +
>  	/* There's a small chance the reset task will have been re-queued,
>  	 * during stop, make sure it's gone before we free the structure.
>  	 */

^ permalink raw reply

* [PATCH] i40e: mark PM functions as __maybe_unused
From: Arnd Bergmann @ 2017-10-10  8:17 UTC (permalink / raw)
  To: Jeff Kirsher
  Cc: Arnd Bergmann, Jacob Keller, Mitch Williams, Alexander Duyck,
	David S. Miller, Alan Brady, intel-wired-lan, netdev,
	linux-kernel

A cleanup of the PM code left an incorrect #ifdef in place, leading
to a harmless build warning:

drivers/net/ethernet/intel/i40e/i40e_main.c:12223:12: error: 'i40e_resume' defined but not used [-Werror=unused-function]
drivers/net/ethernet/intel/i40e/i40e_main.c:12185:12: error: 'i40e_suspend' defined but not used [-Werror=unused-function]

It's easier to use __maybe_unused attributes here, since you
can't pick the wrong one.

Fixes: 0e5d3da40055 ("i40e: use newer generic PM support instead of legacy PM callbacks")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 60b11fdeca2d..eb091268bc3c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -8370,7 +8370,6 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
 	return 0;
 }
 
-#ifdef CONFIG_PM
 /**
  * i40e_restore_interrupt_scheme - Restore the interrupt scheme
  * @pf: private board data structure
@@ -8419,7 +8418,6 @@ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
 
 	return err;
 }
-#endif /* CONFIG_PM */
 
 /**
  * i40e_setup_misc_vector - Setup the misc vector to handle non queue events
@@ -12177,12 +12175,11 @@ static void i40e_shutdown(struct pci_dev *pdev)
 	}
 }
 
-#ifdef CONFIG_PM
 /**
  * i40e_suspend - PM callback for moving to D3
  * @dev: generic device information structure
  **/
-static int i40e_suspend(struct device *dev)
+static int __maybe_unused i40e_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct i40e_pf *pf = pci_get_drvdata(pdev);
@@ -12220,7 +12217,7 @@ static int i40e_suspend(struct device *dev)
  * i40e_resume - PM callback for waking up from D3
  * @dev: generic device information structure
  **/
-static int i40e_resume(struct device *dev)
+static int __maybe_unused i40e_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct i40e_pf *pf = pci_get_drvdata(pdev);
@@ -12252,8 +12249,6 @@ static int i40e_resume(struct device *dev)
 	return 0;
 }
 
-#endif /* CONFIG_PM */
-
 static const struct pci_error_handlers i40e_err_handler = {
 	.error_detected = i40e_pci_error_detected,
 	.slot_reset = i40e_pci_error_slot_reset,
@@ -12269,11 +12264,9 @@ static struct pci_driver i40e_driver = {
 	.id_table = i40e_pci_tbl,
 	.probe    = i40e_probe,
 	.remove   = i40e_remove,
-#ifdef CONFIG_PM
 	.driver   = {
 		.pm = &i40e_pm_ops,
 	},
-#endif /* CONFIG_PM */
 	.shutdown = i40e_shutdown,
 	.err_handler = &i40e_err_handler,
 	.sriov_configure = i40e_pci_sriov_configure,
-- 
2.9.0

^ permalink raw reply related

* Re: [v2] p54: don't unregister leds when they are not initialized
From: Kalle Valo @ 2017-10-10  8:18 UTC (permalink / raw)
  To: Andrey Konovalov
  Cc: Christian Lamparter, linux-wireless, netdev, linux-kernel,
	Dmitry Vyukov, Kostya Serebryany, Andrey Konovalov
In-Reply-To: <17c60ebcc8ce7f20de41a55087d24dfdfca09c67.1506438620.git.andreyknvl@google.com>

Andrey Konovalov <andreyknvl@google.com> wrote:

> ieee80211_register_hw() in p54_register_common() may fail and leds won't
> get initialized. Currently p54_unregister_common() doesn't check that and
> always calls p54_unregister_leds(). The fix is to check priv->registered
> flag before calling p54_unregister_leds().
> 
> Found by syzkaller.
> 
> INFO: trying to register non-static key.
> the code is fine but needs lockdep annotation.
> turning off the locking correctness validator.
> CPU: 1 PID: 1404 Comm: kworker/1:1 Not tainted
> 4.14.0-rc1-42251-gebb2c2437d80-dirty #205
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> Workqueue: usb_hub_wq hub_event
> Call Trace:
>  __dump_stack lib/dump_stack.c:16
>  dump_stack+0x292/0x395 lib/dump_stack.c:52
>  register_lock_class+0x6c4/0x1a00 kernel/locking/lockdep.c:769
>  __lock_acquire+0x27e/0x4550 kernel/locking/lockdep.c:3385
>  lock_acquire+0x259/0x620 kernel/locking/lockdep.c:4002
>  flush_work+0xf0/0x8c0 kernel/workqueue.c:2886
>  __cancel_work_timer+0x51d/0x870 kernel/workqueue.c:2961
>  cancel_delayed_work_sync+0x1f/0x30 kernel/workqueue.c:3081
>  p54_unregister_leds+0x6c/0xc0 drivers/net/wireless/intersil/p54/led.c:160
>  p54_unregister_common+0x3d/0xb0 drivers/net/wireless/intersil/p54/main.c:856
>  p54u_disconnect+0x86/0x120 drivers/net/wireless/intersil/p54/p54usb.c:1073
>  usb_unbind_interface+0x21c/0xa90 drivers/usb/core/driver.c:423
>  __device_release_driver drivers/base/dd.c:861
>  device_release_driver_internal+0x4f4/0x5c0 drivers/base/dd.c:893
>  device_release_driver+0x1e/0x30 drivers/base/dd.c:918
>  bus_remove_device+0x2f4/0x4b0 drivers/base/bus.c:565
>  device_del+0x5c4/0xab0 drivers/base/core.c:1985
>  usb_disable_device+0x1e9/0x680 drivers/usb/core/message.c:1170
>  usb_disconnect+0x260/0x7a0 drivers/usb/core/hub.c:2124
>  hub_port_connect drivers/usb/core/hub.c:4754
>  hub_port_connect_change drivers/usb/core/hub.c:5009
>  port_event drivers/usb/core/hub.c:5115
>  hub_event+0x1318/0x3740 drivers/usb/core/hub.c:5195
>  process_one_work+0xc7f/0x1db0 kernel/workqueue.c:2119
>  process_scheduled_works kernel/workqueue.c:2179
>  worker_thread+0xb2b/0x1850 kernel/workqueue.c:2255
>  kthread+0x3a1/0x470 kernel/kthread.c:231
>  ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
> Acked-by: Christian Lamparter <chunkeey@googlemail.com>

Patch applied to wireless-drivers-next.git, thanks.

fc09785de0a3 p54: don't unregister leds when they are not initialized

-- 
https://patchwork.kernel.org/patch/9972281/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* Re: net/wireless/ray_cs: Convert timers to use
From: Kalle Valo @ 2017-10-10  8:26 UTC (permalink / raw)
  To: Kees Cook; +Cc: linux-kernel, linux-wireless, netdev, Thomas Gleixner
In-Reply-To: <20171005005020.GA23219@beast>

Kees Cook <keescook@chromium.org> wrote:

> In preparation for unconditionally passing the struct timer_list pointer to
> all timer callbacks, switch to using the new timer_setup() and from_timer()
> to pass the timer pointer explicitly.
> 
> Cc: Kalle Valo <kvalo@codeaurora.org>
> Cc: linux-wireless@vger.kernel.org
> Cc: netdev@vger.kernel.org
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Signed-off-by: Kees Cook <keescook@chromium.org>

I'll apply this once I have fast forwarded wireless-drivers-next to
-rc3. I'll also fix the title, what was it supposed to say?

Patch set to Awaiting Upstream.

-- 
https://patchwork.kernel.org/patch/9986253/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* [PATCH net-next 0/5] Support set_ringparam and {set|get}_rxnfc ethtool commands
From: Lipeng @ 2017-10-10  8:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, yisen.zhuang, salil.mehta,
	lipeng321

1, Patch [1/5,2/5] add support for ethtool ops set_ringparam
   (ethtool -G) and fix related bug.
2, Patch [3/5,4/5, 5/5] add support for ethtool ops
   set_rxnfc/get_rxnfc (-n/-N) and fix related bug. 

Lipeng (5):
  net: hns3: fixes the ring index in hns3_fini_ring
  net: hns3: add support for set_ringparam
  net: hns3: add support for set_rxnfc
  net: hns3: add support for ETHTOOL_GRXFH
  net: hns3: fix the ring count for ETHTOOL_GRXRINGS

 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |   4 +
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c |   9 ++
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |   1 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 159 ++++++++++++++++++++-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |   8 ++
 .../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c |   6 +-
 .../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h |   4 +
 .../ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c  |  95 +++++++++++-
 8 files changed, 280 insertions(+), 6 deletions(-)

-- 
1.9.1

^ permalink raw reply

* [PATCH net-next 1/5] net: hns3: fixes the ring index in hns3_fini_ring
From: Lipeng @ 2017-10-10  8:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, yisen.zhuang, salil.mehta,
	lipeng321
In-Reply-To: <1507624927-98008-1-git-send-email-lipeng321@huawei.com>

This patch fixes the ring index in hns3_fini_ring.

Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index 26bbc91..acb82cf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -2661,7 +2661,7 @@ static int hns3_init_all_ring(struct hns3_nic_priv *priv)
 
 out_when_alloc_ring_memory:
 	for (j = i - 1; j >= 0; j--)
-		hns3_fini_ring(priv->ring_data[i].ring);
+		hns3_fini_ring(priv->ring_data[j].ring);
 
 	return -ENOMEM;
 }
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next 2/5] net: hns3: add support for set_ringparam
From: Lipeng @ 2017-10-10  8:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, yisen.zhuang, salil.mehta,
	lipeng321
In-Reply-To: <1507624927-98008-1-git-send-email-lipeng321@huawei.com>

This patch supports the ethtool's set_ringparam().

Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c |  4 +-
 .../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h |  4 ++
 .../ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c  | 75 ++++++++++++++++++++++
 3 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index acb82cf..ba550c1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -2637,7 +2637,7 @@ static void hns3_init_ring_hw(struct hns3_enet_ring *ring)
 	}
 }
 
-static int hns3_init_all_ring(struct hns3_nic_priv *priv)
+int hns3_init_all_ring(struct hns3_nic_priv *priv)
 {
 	struct hnae3_handle *h = priv->ae_handle;
 	int ring_num = h->kinfo.num_tqps * 2;
@@ -2666,7 +2666,7 @@ static int hns3_init_all_ring(struct hns3_nic_priv *priv)
 	return -ENOMEM;
 }
 
-static int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
+int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
 {
 	struct hnae3_handle *h = priv->ae_handle;
 	int i;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
index dd8d40c..6659989 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
@@ -76,6 +76,8 @@ enum hns3_nic_state {
 #define HNS3_RING_NAME_LEN			16
 #define HNS3_BUFFER_SIZE_2048			2048
 #define HNS3_RING_MAX_PENDING			32768
+#define HNS3_RING_MIN_PENDING			8
+#define HNS3_RING_BD_MULTIPLE			8
 #define HNS3_MAX_MTU				9728
 
 #define HNS3_BD_SIZE_512_TYPE			0
@@ -593,6 +595,8 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value)
 void hns3_ethtool_set_ops(struct net_device *netdev);
 
 int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget);
+int hns3_init_all_ring(struct hns3_nic_priv *priv);
+int hns3_uninit_all_ring(struct hns3_nic_priv *priv);
 
 #ifdef CONFIG_HNS3_DCB
 void hns3_dcbnl_setup(struct hnae3_handle *handle);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
index 060bace..1c5d003 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
@@ -459,10 +459,85 @@ static int hns3_get_rxnfc(struct net_device *netdev,
 	return 0;
 }
 
+int hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv, u32 new_desc_num)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	int i;
+
+	h->kinfo.num_desc = new_desc_num;
+
+	for (i = 0; i < h->kinfo.num_tqps * 2; i++)
+		priv->ring_data[i].ring->desc_num = new_desc_num;
+
+	return hns3_init_all_ring(priv);
+}
+
+int hns3_set_ringparam(struct net_device *ndev, struct ethtool_ringparam *param)
+{
+	struct hns3_nic_priv *priv = netdev_priv(ndev);
+	struct hnae3_handle *h = priv->ae_handle;
+	bool if_running = netif_running(ndev);
+	u32 old_desc_num, new_desc_num;
+	int ret;
+
+	if (param->rx_mini_pending || param->rx_jumbo_pending)
+		return -EINVAL;
+
+	if (param->tx_pending != param->rx_pending) {
+		netdev_err(ndev,
+			   "Descriptors of tx and rx must be equal");
+		return -EINVAL;
+	}
+
+	if (param->tx_pending > HNS3_RING_MAX_PENDING ||
+	    param->tx_pending < HNS3_RING_MIN_PENDING) {
+		netdev_err(ndev,
+			   "Descriptors requested (Tx/Rx: %d) out of range [%d-%d]\n",
+			   param->tx_pending, HNS3_RING_MIN_PENDING,
+			   HNS3_RING_MAX_PENDING);
+		return -EINVAL;
+	}
+
+	new_desc_num = param->tx_pending;
+
+	/* Hardware requires that its descriptors must be multiple of eight */
+	new_desc_num = ALIGN(new_desc_num, HNS3_RING_BD_MULTIPLE);
+	old_desc_num = h->kinfo.num_desc;
+	if (old_desc_num == new_desc_num)
+		return 0;
+
+	netdev_info(ndev,
+		    "Changing descriptor count from %d to %d.\n",
+		    old_desc_num, new_desc_num);
+
+	if (if_running)
+		dev_close(ndev);
+
+	ret = hns3_uninit_all_ring(priv);
+	if (ret)
+		return ret;
+
+	ret = hns3_change_all_ring_bd_num(priv, new_desc_num);
+	if (ret) {
+		ret = hns3_change_all_ring_bd_num(priv, old_desc_num);
+		if (ret) {
+			netdev_err(ndev,
+				   "Revert to old bd num fail, ret=%d.\n", ret);
+			return ret;
+		}
+	}
+
+	if (if_running)
+		ret = dev_open(ndev);
+
+	return ret;
+}
+
 static const struct ethtool_ops hns3_ethtool_ops = {
 	.get_drvinfo = hns3_get_drvinfo,
 	.get_link = hns3_get_link,
 	.get_ringparam = hns3_get_ringparam,
+	.set_ringparam = hns3_set_ringparam,
 	.get_pauseparam = hns3_get_pauseparam,
 	.get_strings = hns3_get_strings,
 	.get_ethtool_stats = hns3_get_stats,
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next 3/5] net: hns3: add support for set_rxnfc
From: Lipeng @ 2017-10-10  8:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, yisen.zhuang, salil.mehta,
	lipeng321
In-Reply-To: <1507624927-98008-1-git-send-email-lipeng321@huawei.com>

This patch supports the ethtool's set_rxnfc().

Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |  2 +
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c |  9 ++
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |  1 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 95 +++++++++++++++++++++-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  8 ++
 .../ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c  | 16 ++++
 6 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index c677530..d952d62 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -339,6 +339,8 @@ struct hnae3_ae_ops {
 		       u8 *hfunc);
 	int (*set_rss)(struct hnae3_handle *handle, const u32 *indir,
 		       const u8 *key, const u8 hfunc);
+	int (*set_rss_tuple)(struct hnae3_handle *handle,
+			     struct ethtool_rxnfc *cmd);
 
 	int (*get_tc_size)(struct hnae3_handle *handle);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 8ecd807..60960e5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -85,6 +85,15 @@ static int hclge_init_cmd_queue(struct hclge_dev *hdev, int ring_type)
 	return 0;
 }
 
+void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read)
+{
+	desc->flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN);
+	if (is_read)
+		desc->flag |= cpu_to_le16(HCLGE_CMD_FLAG_WR);
+	else
+		desc->flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+}
+
 void hclge_cmd_setup_basic_desc(struct hclge_desc *desc,
 				enum hclge_opcode_type opcode, bool is_read)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 8f3ba02a..b437334 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -739,6 +739,7 @@ static inline u32 hclge_read_reg(u8 __iomem *base, u32 reg)
 int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num);
 void hclge_cmd_setup_basic_desc(struct hclge_desc *desc,
 				enum hclge_opcode_type opcode, bool is_read);
+void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read);
 
 int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
 			       struct hclge_promisc_param *param);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c91c779..5b5e52c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2595,8 +2595,6 @@ static int hclge_set_rss_tc_mode(struct hclge_dev *hdev, u16 *tc_valid,
 
 static int hclge_set_rss_input_tuple(struct hclge_dev *hdev)
 {
-#define HCLGE_RSS_INPUT_TUPLE_OTHER		0xf
-#define HCLGE_RSS_INPUT_TUPLE_SCTP		0x1f
 	struct hclge_rss_input_tuple_cmd *req;
 	struct hclge_desc desc;
 	int ret;
@@ -2677,6 +2675,98 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 	return ret;
 }
 
+static u8 hclge_get_rss_hash_bits(struct ethtool_rxnfc *nfc)
+{
+	u8 hash_sets = nfc->data & RXH_L4_B_0_1 ? HCLGE_S_PORT_BIT : 0;
+
+	if (nfc->data & RXH_L4_B_2_3)
+		hash_sets |= HCLGE_D_PORT_BIT;
+	else
+		hash_sets &= ~HCLGE_D_PORT_BIT;
+
+	if (nfc->data & RXH_IP_SRC)
+		hash_sets |= HCLGE_S_IP_BIT;
+	else
+		hash_sets &= ~HCLGE_S_IP_BIT;
+
+	if (nfc->data & RXH_IP_DST)
+		hash_sets |= HCLGE_D_IP_BIT;
+	else
+		hash_sets &= ~HCLGE_D_IP_BIT;
+
+	if (nfc->flow_type == SCTP_V4_FLOW || nfc->flow_type == SCTP_V6_FLOW)
+		hash_sets |= HCLGE_V_TAG_BIT;
+
+	return hash_sets;
+}
+
+static int hclge_set_rss_tuple(struct hnae3_handle *handle,
+			       struct ethtool_rxnfc *nfc)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_rss_input_tuple_cmd *req;
+	struct hclge_desc desc;
+	u8 tuple_sets;
+	int ret;
+
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Read rss tuple fail, status = %d\n", ret);
+		return ret;
+	}
+
+	hclge_cmd_reuse_desc(&desc, false);
+
+	tuple_sets = hclge_get_rss_hash_bits(nfc);
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+		req->ipv4_tcp_en = tuple_sets;
+		break;
+	case TCP_V6_FLOW:
+		req->ipv6_tcp_en = tuple_sets;
+		break;
+	case UDP_V4_FLOW:
+		req->ipv4_udp_en = tuple_sets;
+		break;
+	case UDP_V6_FLOW:
+		req->ipv6_udp_en = tuple_sets;
+		break;
+	case SCTP_V4_FLOW:
+		req->ipv4_sctp_en = tuple_sets;
+		break;
+	case SCTP_V6_FLOW:
+		if ((nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+
+		req->ipv6_sctp_en = tuple_sets;
+		break;
+	case IPV4_FLOW:
+		req->ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+		break;
+	case IPV6_FLOW:
+		req->ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"Set rss tuple fail, status = %d\n", ret);
+
+	return ret;
+}
+
 static int hclge_get_tc_size(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -4344,6 +4434,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	.get_rss_indir_size = hclge_get_rss_indir_size,
 	.get_rss = hclge_get_rss,
 	.set_rss = hclge_set_rss,
+	.set_rss_tuple = hclge_set_rss_tuple,
 	.get_tc_size = hclge_get_tc_size,
 	.get_mac_addr = hclge_get_mac_addr,
 	.set_mac_addr = hclge_set_mac_addr,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 79c1a06..a7c018c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -41,6 +41,14 @@
 #define HCLGE_RSS_CFG_TBL_NUM \
 	(HCLGE_RSS_IND_TBL_SIZE / HCLGE_RSS_CFG_TBL_SIZE)
 
+#define HCLGE_RSS_INPUT_TUPLE_OTHER	GENMASK(3, 0)
+#define HCLGE_RSS_INPUT_TUPLE_SCTP	GENMASK(4, 0)
+#define HCLGE_D_PORT_BIT		BIT(0)
+#define HCLGE_S_PORT_BIT		BIT(1)
+#define HCLGE_D_IP_BIT			BIT(2)
+#define HCLGE_S_IP_BIT			BIT(3)
+#define HCLGE_V_TAG_BIT			BIT(4)
+
 #define HCLGE_RSS_TC_SIZE_0		1
 #define HCLGE_RSS_TC_SIZE_1		2
 #define HCLGE_RSS_TC_SIZE_2		4
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
index 1c5d003..f0e88e0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
@@ -533,6 +533,21 @@ int hns3_set_ringparam(struct net_device *ndev, struct ethtool_ringparam *param)
 	return ret;
 }
 
+static int hns3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+
+	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss_tuple)
+		return -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		return h->ae_algo->ops->set_rss_tuple(h, cmd);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct ethtool_ops hns3_ethtool_ops = {
 	.get_drvinfo = hns3_get_drvinfo,
 	.get_link = hns3_get_link,
@@ -543,6 +558,7 @@ int hns3_set_ringparam(struct net_device *ndev, struct ethtool_ringparam *param)
 	.get_ethtool_stats = hns3_get_stats,
 	.get_sset_count = hns3_get_sset_count,
 	.get_rxnfc = hns3_get_rxnfc,
+	.set_rxnfc = hns3_set_rxnfc,
 	.get_rxfh_key_size = hns3_get_rss_key_size,
 	.get_rxfh_indir_size = hns3_get_rss_indir_size,
 	.get_rxfh = hns3_get_rss,
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next 4/5] net: hns3: add support for ETHTOOL_GRXFH
From: Lipeng @ 2017-10-10  8:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, yisen.zhuang, salil.mehta,
	lipeng321
In-Reply-To: <1507624927-98008-1-git-send-email-lipeng321@huawei.com>

This patch add support for ethtool's ETHTOOL_GRXFH in hns3_get_rxnfc().

Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |  2 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 64 ++++++++++++++++++++++
 .../ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c  |  2 +
 3 files changed, 68 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index d952d62..575f50d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -341,6 +341,8 @@ struct hnae3_ae_ops {
 		       const u8 *key, const u8 hfunc);
 	int (*set_rss_tuple)(struct hnae3_handle *handle,
 			     struct ethtool_rxnfc *cmd);
+	int (*get_rss_tuple)(struct hnae3_handle *handle,
+			     struct ethtool_rxnfc *cmd);
 
 	int (*get_tc_size)(struct hnae3_handle *handle);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 5b5e52c..c322b45 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2767,6 +2767,69 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle,
 	return ret;
 }
 
+static int hclge_get_rss_tuple(struct hnae3_handle *handle,
+			       struct ethtool_rxnfc *nfc)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_rss_input_tuple_cmd *req;
+	struct hclge_desc desc;
+	u8 tuple_sets;
+	int ret;
+
+	nfc->data = 0;
+
+	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Read rss tuple fail, status = %d\n", ret);
+		return ret;
+	}
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+		tuple_sets = req->ipv4_tcp_en;
+		break;
+	case UDP_V4_FLOW:
+		tuple_sets = req->ipv4_udp_en;
+		break;
+	case TCP_V6_FLOW:
+		tuple_sets = req->ipv6_tcp_en;
+		break;
+	case UDP_V6_FLOW:
+		tuple_sets = req->ipv6_udp_en;
+		break;
+	case SCTP_V4_FLOW:
+		tuple_sets = req->ipv4_sctp_en;
+		break;
+	case SCTP_V6_FLOW:
+		tuple_sets = req->ipv6_sctp_en;
+		break;
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+		tuple_sets = HCLGE_S_IP_BIT | HCLGE_D_IP_BIT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!tuple_sets)
+		return 0;
+
+	if (tuple_sets & HCLGE_D_PORT_BIT)
+		nfc->data |= RXH_L4_B_2_3;
+	if (tuple_sets & HCLGE_S_PORT_BIT)
+		nfc->data |= RXH_L4_B_0_1;
+	if (tuple_sets & HCLGE_D_IP_BIT)
+		nfc->data |= RXH_IP_DST;
+	if (tuple_sets & HCLGE_S_IP_BIT)
+		nfc->data |= RXH_IP_SRC;
+
+	return 0;
+}
+
 static int hclge_get_tc_size(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -4435,6 +4498,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	.get_rss = hclge_get_rss,
 	.set_rss = hclge_set_rss,
 	.set_rss_tuple = hclge_set_rss_tuple,
+	.get_rss_tuple = hclge_get_rss_tuple,
 	.get_tc_size = hclge_get_tc_size,
 	.get_mac_addr = hclge_get_mac_addr,
 	.set_mac_addr = hclge_set_mac_addr,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
index f0e88e0..b64fbd3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
@@ -452,6 +452,8 @@ static int hns3_get_rxnfc(struct net_device *netdev,
 	case ETHTOOL_GRXRINGS:
 		cmd->data = h->ae_algo->ops->get_tc_size(h);
 		break;
+	case ETHTOOL_GRXFH:
+		return h->ae_algo->ops->get_rss_tuple(h, cmd);
 	default:
 		return -EOPNOTSUPP;
 	}
-- 
1.9.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox