[RFC V1 net-next 0/1] Introduce xdp to ena

All of lore.kernel.org
 help / color / mirror / Atom feed

* [RFC V1 net-next 0/1] Introduce xdp to ena
@ 2019-06-23  7:06 sameehj
  2019-06-23  7:06 ` [RFC V1 net-next 1/1] net: ena: implement XDP drop support sameehj
  0 siblings, 1 reply; 28+ messages in thread
From: sameehj @ 2019-06-23  7:06 UTC (permalink / raw)
  To: davem, netdev
  Cc: Sameeh Jubran, dwmw, zorik, matua, saeedb, msw, aliguori, nafea,
	gtzalik, netanel, alisaidi, benh, akiyano

From: Sameeh Jubran <sameehj@amazon.com>

This patch set has one patch which implements xdp drop support in the
ena driver.

Sameeh Jubran (1):
  net: ena: implement XDP drop support

 drivers/net/ethernet/amazon/ena/ena_netdev.c | 83 +++++++++++++++++++-
 drivers/net/ethernet/amazon/ena/ena_netdev.h | 29 +++++++
 2 files changed, 111 insertions(+), 1 deletion(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [RFC V1 net-next 1/1] net: ena: implement XDP drop support
  2019-06-23  7:06 [RFC V1 net-next 0/1] Introduce xdp to ena sameehj
@ 2019-06-23  7:06 ` sameehj
  2019-06-23 14:09   ` Jesper Dangaard Brouer
                     ` (3 more replies)
  0 siblings, 4 replies; 28+ messages in thread
From: sameehj @ 2019-06-23  7:06 UTC (permalink / raw)
  To: davem, netdev
  Cc: Sameeh Jubran, dwmw, zorik, matua, saeedb, msw, aliguori, nafea,
	gtzalik, netanel, alisaidi, benh, akiyano

From: Sameeh Jubran <sameehj@amazon.com>

This commit implements the basic functionality of drop/pass logic in the
ena driver.

Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 83 +++++++++++++++++++-
 drivers/net/ethernet/amazon/ena/ena_netdev.h | 29 +++++++
 2 files changed, 111 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 20ec8ff03..3d65c0771 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -33,10 +33,10 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #ifdef CONFIG_RFS_ACCEL
+#include <linux/bpf_trace.h>
 #include <linux/cpu_rmap.h>
 #endif /* CONFIG_RFS_ACCEL */
 #include <linux/ethtool.h>
-#include <linux/if_vlan.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/numa.h>
@@ -105,11 +105,82 @@ static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
 		adapter->rx_ring[i].mtu = mtu;
 }
 
+static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+{
+	struct bpf_prog *xdp_prog = rx_ring->xdp_bpf_prog;
+	u32 verdict = XDP_PASS;
+
+	rcu_read_lock();
+
+	if (!xdp_prog)
+		goto out;
+
+	verdict = bpf_prog_run_xdp(xdp_prog, xdp);
+
+	if (unlikely(verdict == XDP_ABORTED))
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+	else if (unlikely(verdict > XDP_REDIRECT))
+		bpf_warn_invalid_xdp_action(verdict);
+out:
+	rcu_read_unlock();
+	return verdict;
+}
+
+static int ena_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	struct bpf_prog *old_bpf_prog;
+	int i;
+
+	if (ena_xdp_allowed(adapter)) {
+		old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
+
+		for (i = 0; i < adapter->num_queues; i++)
+			xchg(&adapter->rx_ring[i].xdp_bpf_prog, prog);
+
+		if (old_bpf_prog)
+			bpf_prog_put(old_bpf_prog);
+
+	} else {
+		netif_err(adapter, drv, adapter->netdev, "Failed to set xdp program, the current MTU (%d) is larger than the maximal allowed MTU (%lu) while xdp is on",
+			  netdev->mtu, ENA_XDP_MAX_MTU);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
+ * program as well as to query the current xdp program id.
+ */
+static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	switch (bpf->command) {
+	case XDP_SETUP_PROG:
+		return ena_xdp_set(netdev, bpf->prog);
+	case XDP_QUERY_PROG:
+		bpf->prog_id = adapter->xdp_bpf_prog ?
+			adapter->xdp_bpf_prog->aux->id : 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int ena_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct ena_adapter *adapter = netdev_priv(dev);
 	int ret;
 
+	if (new_mtu > ENA_XDP_MAX_MTU && ena_xdp_present(adapter)) {
+		netif_err(adapter, drv, dev,
+			  "Requested MTU value is not valid while xdp is enabled new_mtu: %d max mtu: %lu min mtu: %d\n",
+			  new_mtu, ENA_XDP_MAX_MTU, ENA_MIN_MTU);
+		return -EINVAL;
+	}
 	ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
 	if (!ret) {
 		netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
@@ -888,6 +959,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 	va = page_address(rx_info->page) + rx_info->page_offset;
 	prefetch(va + NET_IP_ALIGN);
 
+	if (ena_xdp_present_ring(rx_ring)) {
+		rx_ring->xdp.data = va;
+		rx_ring->xdp.data_meta = rx_ring->xdp.data;
+		rx_ring->xdp.data_hard_start = rx_ring->xdp.data -
+			rx_info->page_offset;
+		rx_ring->xdp.data_end = rx_ring->xdp.data + len;
+		if (ena_xdp_execute(rx_ring, &rx_ring->xdp) != XDP_PASS)
+			return NULL;
+	}
 	if (len <= rx_ring->rx_copybreak) {
 		skb = ena_alloc_skb(rx_ring, false);
 		if (unlikely(!skb))
@@ -2549,6 +2629,7 @@ static const struct net_device_ops ena_netdev_ops = {
 	.ndo_change_mtu		= ena_change_mtu,
 	.ndo_set_mac_address	= NULL,
 	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_bpf		= ena_xdp,
 };
 
 static int ena_device_validate_params(struct ena_adapter *adapter,
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index f2b6e2e05..e17965f7a 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -35,6 +35,7 @@
 
 #include <linux/bitops.h>
 #include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
 #include <linux/inetdevice.h>
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
@@ -139,6 +140,14 @@
 
 #define ENA_MMIO_DISABLE_REG_READ	BIT(0)
 
+/* The max MTU size is configured to be the ethernet frame without the overhead
+ * of the ethernet header, which can have VLAN header, and the frame check
+ * sequence (FCS).
+ * The buffer sizes we share with the device are defined to be ENA_PAGE_SIZE
+ */
+#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \
+				VLAN_HLEN)
+
 struct ena_irq {
 	irq_handler_t handler;
 	void *data;
@@ -288,6 +297,8 @@ struct ena_ring {
 
 	u8 *push_buf_intermediate_buf;
 	int empty_rx_queue;
+	struct bpf_prog *xdp_bpf_prog;
+	struct xdp_buff xdp;
 } ____cacheline_aligned;
 
 struct ena_stats_dev {
@@ -380,6 +391,9 @@ struct ena_adapter {
 	enum ena_regs_reset_reason_types reset_reason;
 
 	u8 ena_extra_properties_count;
+
+	/* XDP structures */
+	struct bpf_prog *xdp_bpf_prog;
 };
 
 void ena_set_ethtool_ops(struct net_device *netdev);
@@ -394,4 +408,19 @@ int ena_update_queue_sizes(struct ena_adapter *adapter,
 
 int ena_get_sset_count(struct net_device *netdev, int sset);
 
+static inline bool ena_xdp_present(struct ena_adapter *adapter)
+{
+	return !!adapter->xdp_bpf_prog;
+}
+
+static inline bool ena_xdp_present_ring(struct ena_ring *ring)
+{
+	return !!ring->xdp_bpf_prog;
+}
+
+static inline bool ena_xdp_allowed(struct ena_adapter *adapter)
+{
+	return adapter->netdev->mtu <= ENA_XDP_MAX_MTU;
+}
+
 #endif /* !(ENA_H) */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [RFC V1 net-next 1/1] net: ena: implement XDP drop support
  2019-06-23  7:06 ` [RFC V1 net-next 1/1] net: ena: implement XDP drop support sameehj
@ 2019-06-23 14:09   ` Jesper Dangaard Brouer
  2019-06-23 14:21   ` Jesper Dangaard Brouer
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 28+ messages in thread
From: Jesper Dangaard Brouer @ 2019-06-23 14:09 UTC (permalink / raw)
  To: sameehj
  Cc: brouer, davem, netdev, dwmw, Machulsky, Zorik, matua, saeedb, msw,
	aliguori, nafea, gtzalik, netanel, alisaidi, benh, akiyano,
	Daniel Borkmann


I'm very happy to see progress with XDP for the ENA driver.

On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:

> @@ -888,6 +959,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
>  	va = page_address(rx_info->page) + rx_info->page_offset;
>  	prefetch(va + NET_IP_ALIGN);
>  
> +	if (ena_xdp_present_ring(rx_ring)) {
> +		rx_ring->xdp.data = va;
> +		rx_ring->xdp.data_meta = rx_ring->xdp.data;
> +		rx_ring->xdp.data_hard_start = rx_ring->xdp.data -
> +			rx_info->page_offset;
> +		rx_ring->xdp.data_end = rx_ring->xdp.data + len;
> +		if (ena_xdp_execute(rx_ring, &rx_ring->xdp) != XDP_PASS)
> +			return NULL;
> +	}

Looks like you forgot to init xdp.rxq pointer.

You can use the samples/bpf/xdp_rxq_info* to access this... and this
driver will likely crash if you use it...

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC V1 net-next 1/1] net: ena: implement XDP drop support
  2019-06-23  7:06 ` [RFC V1 net-next 1/1] net: ena: implement XDP drop support sameehj
  2019-06-23 14:09   ` Jesper Dangaard Brouer
@ 2019-06-23 14:21   ` Jesper Dangaard Brouer
  2019-06-25  3:19     ` Machulsky, Zorik
  2019-06-23 14:28   ` [RFC V1 net-next 1/1] net: ena: implement XDP drop support David Ahern
  2019-06-23 14:51   ` Maciej Fijalkowski
  3 siblings, 1 reply; 28+ messages in thread
From: Jesper Dangaard Brouer @ 2019-06-23 14:21 UTC (permalink / raw)
  To: sameehj
  Cc: brouer, davem, netdev, dwmw, Machulsky, Zorik, matua, saeedb, msw,
	aliguori, nafea, gtzalik, netanel, alisaidi, benh, akiyano,
	Daniel Borkmann

On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:

> This commit implements the basic functionality of drop/pass logic in the
> ena driver.

Usually we require a driver to implement all the XDP return codes,
before we accept it.  But as Daniel and I discussed with Zorik during
NetConf[1], we are going to make an exception and accept the driver
if you also implement XDP_TX.

As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
later, given he/you wants AF_XDP support which requires XDP_REDIRECT.

[1] http://vger.kernel.org/netconf2019.html
-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC V1 net-next 1/1] net: ena: implement XDP drop support
  2019-06-23 14:21   ` Jesper Dangaard Brouer
@ 2019-06-25  3:19     ` Machulsky, Zorik
  2019-06-26  8:38       ` XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support) Jesper Dangaard Brouer
  0 siblings, 1 reply; 28+ messages in thread
From: Machulsky, Zorik @ 2019-06-25  3:19 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, Jubran, Samih
  Cc: davem@davemloft.net, netdev@vger.kernel.org, Woodhouse, David,
	Matushevsky, Alexander, Bshara, Saeed, Wilson, Matt,
	Liguori, Anthony, Bshara, Nafea, Tzalik, Guy, Belgazal, Netanel,
	Saidi, Ali, Herrenschmidt, Benjamin, Kiyanovski, Arthur,
	Daniel Borkmann



On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> wrote:

    On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
    
    > This commit implements the basic functionality of drop/pass logic in the
    > ena driver.
    
    Usually we require a driver to implement all the XDP return codes,
    before we accept it.  But as Daniel and I discussed with Zorik during
    NetConf[1], we are going to make an exception and accept the driver
    if you also implement XDP_TX.
    
    As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
    later, given he/you wants AF_XDP support which requires XDP_REDIRECT.

Jesper, thanks for your comments and very helpful discussion during NetConf! 
That's the plan, as we agreed. From our side I would like to reiterate again the 
importance of multi-buffer support by xdp frame. We would really prefer not 
to see our MTU shrinking because of xdp support.   

    
    [1] http://vger.kernel.org/netconf2019.html
    -- 
    Best regards,
      Jesper Dangaard Brouer
      MSc.CS, Principal Kernel Engineer at Red Hat
      LinkedIn: http://www.linkedin.com/in/brouer
    


^ permalink raw reply	[flat|nested] 28+ messages in thread

* XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-25  3:19     ` Machulsky, Zorik
@ 2019-06-26  8:38       ` Jesper Dangaard Brouer
  2019-06-26 11:52           ` Toke Høiland-Jørgensen
  2019-06-28  7:14         ` Eelco Chaudron
  0 siblings, 2 replies; 28+ messages in thread
From: Jesper Dangaard Brouer @ 2019-06-26  8:38 UTC (permalink / raw)
  To: Machulsky, Zorik
  Cc: Jubran, Samih, davem@davemloft.net, netdev@vger.kernel.org,
	Woodhouse, David, Matushevsky, Alexander, Bshara, Saeed,
	Wilson, Matt, Liguori, Anthony, Bshara, Nafea, Tzalik, Guy,
	Belgazal, Netanel, Saidi, Ali, Herrenschmidt, Benjamin,
	Kiyanovski, Arthur, Daniel Borkmann, brouer,
	Toke Høiland-Jørgensen, Ilias Apalodimas,
	Alexei Starovoitov, Jakub Kicinski, xdp-newbies@vger.kernel.org

On Tue, 25 Jun 2019 03:19:22 +0000
"Machulsky, Zorik" <zorik@amazon.com> wrote:

> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> wrote:
> 
>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
>     
>     > This commit implements the basic functionality of drop/pass logic in the
>     > ena driver.  
>     
>     Usually we require a driver to implement all the XDP return codes,
>     before we accept it.  But as Daniel and I discussed with Zorik during
>     NetConf[1], we are going to make an exception and accept the driver
>     if you also implement XDP_TX.
>     
>     As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
>     later, given he/you wants AF_XDP support which requires XDP_REDIRECT.
> 
> Jesper, thanks for your comments and very helpful discussion during
> NetConf! That's the plan, as we agreed. From our side I would like to
> reiterate again the importance of multi-buffer support by xdp frame.
> We would really prefer not to see our MTU shrinking because of xdp
> support.   

Okay we really need to make a serious attempt to find a way to support
multi-buffer packets with XDP. With the important criteria of not
hurting performance of the single-buffer per packet design.

I've created a design document[2], that I will update based on our
discussions: [2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org

The use-case that really convinced me was Eric's packet header-split.

Lets refresh: Why XDP don't have multi-buffer support:

XDP is designed for maximum performance, which is why certain driver-level
use-cases were not supported, like multi-buffer packets (like jumbo-frames).
As it e.g. complicated the driver RX-loop and memory model handling.

The single buffer per packet design, is also tied into eBPF Direct-Access
(DA) to packet data, which can only be allowed if the packet memory is in
contiguous memory.  This DA feature is essential for XDP performance.

One way forward is to define that XDP only get access to the first
packet buffer, and it cannot see subsequent buffers.  For XDP_TX and
XDP_REDIRECT to work then XDP still need to carry pointers (plus
len+offset) to the other buffers, which is 16 bytes per extra buffer.

>     [1] http://vger.kernel.org/netconf2019.html
-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-26  8:38       ` XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support) Jesper Dangaard Brouer
@ 2019-06-26 11:52           ` Toke Høiland-Jørgensen
  2019-06-28  7:14         ` Eelco Chaudron
  1 sibling, 0 replies; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-06-26 11:52 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, Machulsky, Zorik
  Cc: Jubran, Samih, davem@davemloft.net, netdev@vger.kernel.org,
	Woodhouse, David, Matushevsky, Alexander, Bshara, Saeed,
	Wilson, Matt, Liguori, Anthony, Bshara, Nafea, Tzalik, Guy,
	Belgazal, Netanel, Saidi, Ali, Herrenschmidt, Benjamin,
	Kiyanovski, Arthur, Daniel Borkmann, Ilias Apalodimas,
	Alexei Starovoitov, Jakub Kicinski, xdp-newbies@vger.kernel.org

Jesper Dangaard Brouer <brouer@redhat.com> writes:

> On Tue, 25 Jun 2019 03:19:22 +0000
> "Machulsky, Zorik" <zorik@amazon.com> wrote:
>
>> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> wrote:
>> 
>>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
>>     
>>     > This commit implements the basic functionality of drop/pass logic in the
>>     > ena driver.  
>>     
>>     Usually we require a driver to implement all the XDP return codes,
>>     before we accept it.  But as Daniel and I discussed with Zorik during
>>     NetConf[1], we are going to make an exception and accept the driver
>>     if you also implement XDP_TX.
>>     
>>     As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
>>     later, given he/you wants AF_XDP support which requires XDP_REDIRECT.
>> 
>> Jesper, thanks for your comments and very helpful discussion during
>> NetConf! That's the plan, as we agreed. From our side I would like to
>> reiterate again the importance of multi-buffer support by xdp frame.
>> We would really prefer not to see our MTU shrinking because of xdp
>> support.   
>
> Okay we really need to make a serious attempt to find a way to support
> multi-buffer packets with XDP. With the important criteria of not
> hurting performance of the single-buffer per packet design.
>
> I've created a design document[2], that I will update based on our
> discussions: [2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
>
> The use-case that really convinced me was Eric's packet header-split.
>
>
> Lets refresh: Why XDP don't have multi-buffer support:
>
> XDP is designed for maximum performance, which is why certain driver-level
> use-cases were not supported, like multi-buffer packets (like jumbo-frames).
> As it e.g. complicated the driver RX-loop and memory model handling.
>
> The single buffer per packet design, is also tied into eBPF Direct-Access
> (DA) to packet data, which can only be allowed if the packet memory is in
> contiguous memory.  This DA feature is essential for XDP performance.
>
>
> One way forward is to define that XDP only get access to the first
> packet buffer, and it cannot see subsequent buffers. For XDP_TX and
> XDP_REDIRECT to work then XDP still need to carry pointers (plus
> len+offset) to the other buffers, which is 16 bytes per extra buffer.

Yeah, I think this would be reasonable. As long as we can have a
metadata field with the full length + still give XDP programs the
ability to truncate the packet (i.e., discard the subsequent pages) I
think many (most?) use cases will work fine without having access to the
full packet data...

-Toke

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
@ 2019-06-26 11:52           ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-06-26 11:52 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, Machulsky, Zorik
  Cc: Jubran, Samih, davem@davemloft.net, netdev@vger.kernel.org,
	Woodhouse, David, Matushevsky, Alexander, Bshara, Saeed,
	Wilson, Matt, Liguori, Anthony, Bshara, Nafea, Tzalik, Guy,
	Belgazal, Netanel, Saidi, Ali, Herrenschmidt, Benjamin,
	Kiyanovski, Arthur, Daniel Borkmann, brouer, Ilias Apalodimas,
	Alexei Starovoitov, Jakub Kicinski, xdp-newbies@vger.kernel.org

Jesper Dangaard Brouer <brouer@redhat.com> writes:

> On Tue, 25 Jun 2019 03:19:22 +0000
> "Machulsky, Zorik" <zorik@amazon.com> wrote:
>
>> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> wrote:
>> 
>>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
>>     
>>     > This commit implements the basic functionality of drop/pass logic in the
>>     > ena driver.  
>>     
>>     Usually we require a driver to implement all the XDP return codes,
>>     before we accept it.  But as Daniel and I discussed with Zorik during
>>     NetConf[1], we are going to make an exception and accept the driver
>>     if you also implement XDP_TX.
>>     
>>     As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
>>     later, given he/you wants AF_XDP support which requires XDP_REDIRECT.
>> 
>> Jesper, thanks for your comments and very helpful discussion during
>> NetConf! That's the plan, as we agreed. From our side I would like to
>> reiterate again the importance of multi-buffer support by xdp frame.
>> We would really prefer not to see our MTU shrinking because of xdp
>> support.   
>
> Okay we really need to make a serious attempt to find a way to support
> multi-buffer packets with XDP. With the important criteria of not
> hurting performance of the single-buffer per packet design.
>
> I've created a design document[2], that I will update based on our
> discussions: [2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
>
> The use-case that really convinced me was Eric's packet header-split.
>
>
> Lets refresh: Why XDP don't have multi-buffer support:
>
> XDP is designed for maximum performance, which is why certain driver-level
> use-cases were not supported, like multi-buffer packets (like jumbo-frames).
> As it e.g. complicated the driver RX-loop and memory model handling.
>
> The single buffer per packet design, is also tied into eBPF Direct-Access
> (DA) to packet data, which can only be allowed if the packet memory is in
> contiguous memory.  This DA feature is essential for XDP performance.
>
>
> One way forward is to define that XDP only get access to the first
> packet buffer, and it cannot see subsequent buffers. For XDP_TX and
> XDP_REDIRECT to work then XDP still need to carry pointers (plus
> len+offset) to the other buffers, which is 16 bytes per extra buffer.

Yeah, I think this would be reasonable. As long as we can have a
metadata field with the full length + still give XDP programs the
ability to truncate the packet (i.e., discard the subsequent pages) I
think many (most?) use cases will work fine without having access to the
full packet data...

-Toke

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-26 11:52           ` Toke Høiland-Jørgensen
@ 2019-06-26 14:40             ` Jesper Dangaard Brouer
  -1 siblings, 0 replies; 28+ messages in thread
From: Jesper Dangaard Brouer @ 2019-06-26 14:40 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Machulsky, Zorik, Jubran, Samih, davem@davemloft.net,
	netdev@vger.kernel.org, Woodhouse, David, Matushevsky, Alexander,
	Bshara, Saeed, Wilson, Matt, Liguori, Anthony, Bshara, Nafea,
	Tzalik, Guy, Belgazal, Netanel, Saidi, Ali,
	Herrenschmidt, Benjamin, Kiyanovski, Arthur, Daniel Borkmann,
	Ilias Apalodimas, Alexei Starovoitov, Jakub Kicinski,
	xdp-newbies@vger.kernel.org, brouer

On Wed, 26 Jun 2019 13:52:16 +0200
Toke Høiland-Jørgensen <toke@redhat.com> wrote:

> Jesper Dangaard Brouer <brouer@redhat.com> writes:
> 
> > On Tue, 25 Jun 2019 03:19:22 +0000
> > "Machulsky, Zorik" <zorik@amazon.com> wrote:
> >  
> >> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> wrote:
> >> 
> >>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
> >>       
> >>     > This commit implements the basic functionality of drop/pass logic in the
> >>     > ena driver.    
> >>     
> >>     Usually we require a driver to implement all the XDP return codes,
> >>     before we accept it.  But as Daniel and I discussed with Zorik during
> >>     NetConf[1], we are going to make an exception and accept the driver
> >>     if you also implement XDP_TX.
> >>     
> >>     As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
> >>     later, given he/you wants AF_XDP support which requires XDP_REDIRECT.
> >> 
> >> Jesper, thanks for your comments and very helpful discussion during
> >> NetConf! That's the plan, as we agreed. From our side I would like to
> >> reiterate again the importance of multi-buffer support by xdp frame.
> >> We would really prefer not to see our MTU shrinking because of xdp
> >> support.     
> >
> > Okay we really need to make a serious attempt to find a way to support
> > multi-buffer packets with XDP. With the important criteria of not
> > hurting performance of the single-buffer per packet design.
> >
> > I've created a design document[2], that I will update based on our
> > discussions: [2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
> >
> > The use-case that really convinced me was Eric's packet header-split.
> >
> >
> > Lets refresh: Why XDP don't have multi-buffer support:
> >
> > XDP is designed for maximum performance, which is why certain driver-level
> > use-cases were not supported, like multi-buffer packets (like jumbo-frames).
> > As it e.g. complicated the driver RX-loop and memory model handling.
> >
> > The single buffer per packet design, is also tied into eBPF Direct-Access
> > (DA) to packet data, which can only be allowed if the packet memory is in
> > contiguous memory.  This DA feature is essential for XDP performance.
> >
> >
> > One way forward is to define that XDP only get access to the first
> > packet buffer, and it cannot see subsequent buffers. For XDP_TX and
> > XDP_REDIRECT to work then XDP still need to carry pointers (plus
> > len+offset) to the other buffers, which is 16 bytes per extra buffer.  
> 
> Yeah, I think this would be reasonable. As long as we can have a
> metadata field with the full length + still give XDP programs the
> ability to truncate the packet (i.e., discard the subsequent pages)

You touch upon some interesting complications already:

1. It is valuable for XDP bpf_prog to know "full" length?
   (if so, then we need to extend xdp ctx with info)

 But if we need to know the full length, when the first-buffer is
 processed. Then realize that this affect the drivers RX-loop, because
 then we need to "collect" all the buffers before we can know the
 length (although some HW provide this in first descriptor).

 We likely have to change drivers RX-loop anyhow, as XDP_TX and
 XDP_REDIRECT will also need to "collect" all buffers before the packet
 can be forwarded. (Although this could potentially happen later in
 driver loop when it meet/find the End-Of-Packet descriptor bit).
 

2. Can we even allow helper bpf_xdp_adjust_tail() ?

 Wouldn't it be easier to disallow a BPF-prog with this helper, when
 driver have configured multi-buffer?  Or will it be too restrictive,
 if jumbo-frame is very uncommon and only enabled because switch infra
 could not be changed (like Amazon case).

 Perhaps it is better to let bpf_xdp_adjust_tail() fail runtime?


> I  think many (most?) use cases will work fine without having access
> to the full packet data...

I agree.  Other people should voice their concerns if they don't
agree...

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
@ 2019-06-26 14:40             ` Jesper Dangaard Brouer
  0 siblings, 0 replies; 28+ messages in thread
From: Jesper Dangaard Brouer @ 2019-06-26 14:40 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Machulsky, Zorik, Jubran, Samih, davem@davemloft.net,
	netdev@vger.kernel.org, Woodhouse, David, Matushevsky, Alexander,
	Bshara, Saeed, Wilson, Matt, Liguori, Anthony, Bshara, Nafea,
	Tzalik, Guy, Belgazal, Netanel, Saidi, Ali,
	Herrenschmidt, Benjamin, Kiyanovski, Arthur, Daniel Borkmann,
	Ilias Apalodimas, Alexei Starovoitov, Jakub Kicinski,
	xdp-newbies@vger.kernel.org, brouer

On Wed, 26 Jun 2019 13:52:16 +0200
Toke Høiland-Jørgensen <toke@redhat.com> wrote:

> Jesper Dangaard Brouer <brouer@redhat.com> writes:
> 
> > On Tue, 25 Jun 2019 03:19:22 +0000
> > "Machulsky, Zorik" <zorik@amazon.com> wrote:
> >  
> >> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> wrote:
> >> 
> >>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
> >>       
> >>     > This commit implements the basic functionality of drop/pass logic in the
> >>     > ena driver.    
> >>     
> >>     Usually we require a driver to implement all the XDP return codes,
> >>     before we accept it.  But as Daniel and I discussed with Zorik during
> >>     NetConf[1], we are going to make an exception and accept the driver
> >>     if you also implement XDP_TX.
> >>     
> >>     As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
> >>     later, given he/you wants AF_XDP support which requires XDP_REDIRECT.
> >> 
> >> Jesper, thanks for your comments and very helpful discussion during
> >> NetConf! That's the plan, as we agreed. From our side I would like to
> >> reiterate again the importance of multi-buffer support by xdp frame.
> >> We would really prefer not to see our MTU shrinking because of xdp
> >> support.     
> >
> > Okay we really need to make a serious attempt to find a way to support
> > multi-buffer packets with XDP. With the important criteria of not
> > hurting performance of the single-buffer per packet design.
> >
> > I've created a design document[2], that I will update based on our
> > discussions: [2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
> >
> > The use-case that really convinced me was Eric's packet header-split.
> >
> >
> > Lets refresh: Why XDP don't have multi-buffer support:
> >
> > XDP is designed for maximum performance, which is why certain driver-level
> > use-cases were not supported, like multi-buffer packets (like jumbo-frames).
> > As it e.g. complicated the driver RX-loop and memory model handling.
> >
> > The single buffer per packet design, is also tied into eBPF Direct-Access
> > (DA) to packet data, which can only be allowed if the packet memory is in
> > contiguous memory.  This DA feature is essential for XDP performance.
> >
> >
> > One way forward is to define that XDP only get access to the first
> > packet buffer, and it cannot see subsequent buffers. For XDP_TX and
> > XDP_REDIRECT to work then XDP still need to carry pointers (plus
> > len+offset) to the other buffers, which is 16 bytes per extra buffer.  
> 
> Yeah, I think this would be reasonable. As long as we can have a
> metadata field with the full length + still give XDP programs the
> ability to truncate the packet (i.e., discard the subsequent pages)

You touch upon some interesting complications already:

1. It is valuable for XDP bpf_prog to know "full" length?
   (if so, then we need to extend xdp ctx with info)

 But if we need to know the full length, when the first-buffer is
 processed. Then realize that this affect the drivers RX-loop, because
 then we need to "collect" all the buffers before we can know the
 length (although some HW provide this in first descriptor).

 We likely have to change drivers RX-loop anyhow, as XDP_TX and
 XDP_REDIRECT will also need to "collect" all buffers before the packet
 can be forwarded. (Although this could potentially happen later in
 driver loop when it meet/find the End-Of-Packet descriptor bit).
 

2. Can we even allow helper bpf_xdp_adjust_tail() ?

 Wouldn't it be easier to disallow a BPF-prog with this helper, when
 driver have configured multi-buffer?  Or will it be too restrictive,
 if jumbo-frame is very uncommon and only enabled because switch infra
 could not be changed (like Amazon case).

 Perhaps it is better to let bpf_xdp_adjust_tail() fail runtime?


> I  think many (most?) use cases will work fine without having access
> to the full packet data...

I agree.  Other people should voice their concerns if they don't
agree...

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-26 14:40             ` Jesper Dangaard Brouer
@ 2019-06-26 15:01               ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-06-26 15:01 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Machulsky, Zorik, Jubran, Samih, davem@davemloft.net,
	netdev@vger.kernel.org, Woodhouse, David, Matushevsky, Alexander,
	Bshara, Saeed, Wilson, Matt, Liguori, Anthony, Bshara, Nafea,
	Tzalik, Guy, Belgazal, Netanel, Saidi, Ali,
	Herrenschmidt, Benjamin, Kiyanovski, Arthur, Daniel Borkmann,
	Ilias Apalodimas, Alexei Starovoitov, Jakub Kicinski,
	xdp-newbies@vger.kernel.org

Jesper Dangaard Brouer <brouer@redhat.com> writes:

> On Wed, 26 Jun 2019 13:52:16 +0200
> Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>
>> Jesper Dangaard Brouer <brouer@redhat.com> writes:
>> 
>> > On Tue, 25 Jun 2019 03:19:22 +0000
>> > "Machulsky, Zorik" <zorik@amazon.com> wrote:
>> >  
>> >> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> wrote:
>> >> 
>> >>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
>> >>       
>> >>     > This commit implements the basic functionality of drop/pass logic in the
>> >>     > ena driver.    
>> >>     
>> >>     Usually we require a driver to implement all the XDP return codes,
>> >>     before we accept it.  But as Daniel and I discussed with Zorik during
>> >>     NetConf[1], we are going to make an exception and accept the driver
>> >>     if you also implement XDP_TX.
>> >>     
>> >>     As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
>> >>     later, given he/you wants AF_XDP support which requires XDP_REDIRECT.
>> >> 
>> >> Jesper, thanks for your comments and very helpful discussion during
>> >> NetConf! That's the plan, as we agreed. From our side I would like to
>> >> reiterate again the importance of multi-buffer support by xdp frame.
>> >> We would really prefer not to see our MTU shrinking because of xdp
>> >> support.     
>> >
>> > Okay we really need to make a serious attempt to find a way to support
>> > multi-buffer packets with XDP. With the important criteria of not
>> > hurting performance of the single-buffer per packet design.
>> >
>> > I've created a design document[2], that I will update based on our
>> > discussions: [2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
>> >
>> > The use-case that really convinced me was Eric's packet header-split.
>> >
>> >
>> > Lets refresh: Why XDP don't have multi-buffer support:
>> >
>> > XDP is designed for maximum performance, which is why certain driver-level
>> > use-cases were not supported, like multi-buffer packets (like jumbo-frames).
>> > As it e.g. complicated the driver RX-loop and memory model handling.
>> >
>> > The single buffer per packet design, is also tied into eBPF Direct-Access
>> > (DA) to packet data, which can only be allowed if the packet memory is in
>> > contiguous memory.  This DA feature is essential for XDP performance.
>> >
>> >
>> > One way forward is to define that XDP only get access to the first
>> > packet buffer, and it cannot see subsequent buffers. For XDP_TX and
>> > XDP_REDIRECT to work then XDP still need to carry pointers (plus
>> > len+offset) to the other buffers, which is 16 bytes per extra buffer.  
>> 
>> Yeah, I think this would be reasonable. As long as we can have a
>> metadata field with the full length + still give XDP programs the
>> ability to truncate the packet (i.e., discard the subsequent pages)
>
> You touch upon some interesting complications already:
>
> 1. It is valuable for XDP bpf_prog to know "full" length?
>    (if so, then we need to extend xdp ctx with info)

Valuable, quite likely. A hard requirement, probably not (for all use
cases).

>  But if we need to know the full length, when the first-buffer is
>  processed. Then realize that this affect the drivers RX-loop, because
>  then we need to "collect" all the buffers before we can know the
>  length (although some HW provide this in first descriptor).
>
>  We likely have to change drivers RX-loop anyhow, as XDP_TX and
>  XDP_REDIRECT will also need to "collect" all buffers before the packet
>  can be forwarded. (Although this could potentially happen later in
>  driver loop when it meet/find the End-Of-Packet descriptor bit).
>  
>
> 2. Can we even allow helper bpf_xdp_adjust_tail() ?
>
>  Wouldn't it be easier to disallow a BPF-prog with this helper, when
>  driver have configured multi-buffer?

Easier, certainly. But then it's even easier to not implement this at
all ;)

>  Or will it be too restrictive, if jumbo-frame is very uncommon and
>  only enabled because switch infra could not be changed (like Amazon
>  case).

I think it would be preferable to support it; but maybe we can let that
depend on how difficult it actually turns out to be to allow it?

>  Perhaps it is better to let bpf_xdp_adjust_tail() fail runtime?

If we do disallow it, I think I'd lean towards failing the call at
runtime...

-Toke

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
@ 2019-06-26 15:01               ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-06-26 15:01 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Machulsky, Zorik, Jubran, Samih, davem@davemloft.net,
	netdev@vger.kernel.org, Woodhouse, David, Matushevsky, Alexander,
	Bshara, Saeed, Wilson, Matt, Liguori, Anthony, Bshara, Nafea,
	Tzalik, Guy, Belgazal, Netanel, Saidi, Ali,
	Herrenschmidt, Benjamin, Kiyanovski, Arthur, Daniel Borkmann,
	Ilias Apalodimas, Alexei Starovoitov, Jakub Kicinski,
	xdp-newbies@vger.kernel.org, brouer

Jesper Dangaard Brouer <brouer@redhat.com> writes:

> On Wed, 26 Jun 2019 13:52:16 +0200
> Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>
>> Jesper Dangaard Brouer <brouer@redhat.com> writes:
>> 
>> > On Tue, 25 Jun 2019 03:19:22 +0000
>> > "Machulsky, Zorik" <zorik@amazon.com> wrote:
>> >  
>> >> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> wrote:
>> >> 
>> >>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
>> >>       
>> >>     > This commit implements the basic functionality of drop/pass logic in the
>> >>     > ena driver.    
>> >>     
>> >>     Usually we require a driver to implement all the XDP return codes,
>> >>     before we accept it.  But as Daniel and I discussed with Zorik during
>> >>     NetConf[1], we are going to make an exception and accept the driver
>> >>     if you also implement XDP_TX.
>> >>     
>> >>     As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
>> >>     later, given he/you wants AF_XDP support which requires XDP_REDIRECT.
>> >> 
>> >> Jesper, thanks for your comments and very helpful discussion during
>> >> NetConf! That's the plan, as we agreed. From our side I would like to
>> >> reiterate again the importance of multi-buffer support by xdp frame.
>> >> We would really prefer not to see our MTU shrinking because of xdp
>> >> support.     
>> >
>> > Okay we really need to make a serious attempt to find a way to support
>> > multi-buffer packets with XDP. With the important criteria of not
>> > hurting performance of the single-buffer per packet design.
>> >
>> > I've created a design document[2], that I will update based on our
>> > discussions: [2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
>> >
>> > The use-case that really convinced me was Eric's packet header-split.
>> >
>> >
>> > Lets refresh: Why XDP don't have multi-buffer support:
>> >
>> > XDP is designed for maximum performance, which is why certain driver-level
>> > use-cases were not supported, like multi-buffer packets (like jumbo-frames).
>> > As it e.g. complicated the driver RX-loop and memory model handling.
>> >
>> > The single buffer per packet design, is also tied into eBPF Direct-Access
>> > (DA) to packet data, which can only be allowed if the packet memory is in
>> > contiguous memory.  This DA feature is essential for XDP performance.
>> >
>> >
>> > One way forward is to define that XDP only get access to the first
>> > packet buffer, and it cannot see subsequent buffers. For XDP_TX and
>> > XDP_REDIRECT to work then XDP still need to carry pointers (plus
>> > len+offset) to the other buffers, which is 16 bytes per extra buffer.  
>> 
>> Yeah, I think this would be reasonable. As long as we can have a
>> metadata field with the full length + still give XDP programs the
>> ability to truncate the packet (i.e., discard the subsequent pages)
>
> You touch upon some interesting complications already:
>
> 1. It is valuable for XDP bpf_prog to know "full" length?
>    (if so, then we need to extend xdp ctx with info)

Valuable, quite likely. A hard requirement, probably not (for all use
cases).

>  But if we need to know the full length, when the first-buffer is
>  processed. Then realize that this affect the drivers RX-loop, because
>  then we need to "collect" all the buffers before we can know the
>  length (although some HW provide this in first descriptor).
>
>  We likely have to change drivers RX-loop anyhow, as XDP_TX and
>  XDP_REDIRECT will also need to "collect" all buffers before the packet
>  can be forwarded. (Although this could potentially happen later in
>  driver loop when it meet/find the End-Of-Packet descriptor bit).
>  
>
> 2. Can we even allow helper bpf_xdp_adjust_tail() ?
>
>  Wouldn't it be easier to disallow a BPF-prog with this helper, when
>  driver have configured multi-buffer?

Easier, certainly. But then it's even easier to not implement this at
all ;)

>  Or will it be too restrictive, if jumbo-frame is very uncommon and
>  only enabled because switch infra could not be changed (like Amazon
>  case).

I think it would be preferable to support it; but maybe we can let that
depend on how difficult it actually turns out to be to allow it?

>  Perhaps it is better to let bpf_xdp_adjust_tail() fail runtime?

If we do disallow it, I think I'd lean towards failing the call at
runtime...

-Toke

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-26 15:01               ` Toke Høiland-Jørgensen
  (?)
@ 2019-06-26 15:20               ` Willem de Bruijn
  2019-06-26 16:42                 ` Jonathan Lemon
  2019-06-28  8:46                 ` Jesper Dangaard Brouer
  -1 siblings, 2 replies; 28+ messages in thread
From: Willem de Bruijn @ 2019-06-26 15:20 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Jesper Dangaard Brouer, Machulsky, Zorik, Jubran, Samih,
	davem@davemloft.net, netdev@vger.kernel.org, Woodhouse, David,
	Matushevsky, Alexander, Bshara, Saeed, Wilson, Matt,
	Liguori, Anthony, Bshara, Nafea, Tzalik, Guy, Belgazal, Netanel,
	Saidi, Ali, Herrenschmidt, Benjamin, Kiyanovski, Arthur,
	Daniel Borkmann, Ilias Apalodimas, Alexei Starovoitov,
	Jakub Kicinski, xdp-newbies@vger.kernel.org

On Wed, Jun 26, 2019 at 11:01 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>
> Jesper Dangaard Brouer <brouer@redhat.com> writes:
>
> > On Wed, 26 Jun 2019 13:52:16 +0200
> > Toke Høiland-Jørgensen <toke@redhat.com> wrote:
> >
> >> Jesper Dangaard Brouer <brouer@redhat.com> writes:
> >>
> >> > On Tue, 25 Jun 2019 03:19:22 +0000
> >> > "Machulsky, Zorik" <zorik@amazon.com> wrote:
> >> >
> >> >> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> wrote:
> >> >>
> >> >>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
> >> >>
> >> >>     > This commit implements the basic functionality of drop/pass logic in the
> >> >>     > ena driver.
> >> >>
> >> >>     Usually we require a driver to implement all the XDP return codes,
> >> >>     before we accept it.  But as Daniel and I discussed with Zorik during
> >> >>     NetConf[1], we are going to make an exception and accept the driver
> >> >>     if you also implement XDP_TX.
> >> >>
> >> >>     As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
> >> >>     later, given he/you wants AF_XDP support which requires XDP_REDIRECT.
> >> >>
> >> >> Jesper, thanks for your comments and very helpful discussion during
> >> >> NetConf! That's the plan, as we agreed. From our side I would like to
> >> >> reiterate again the importance of multi-buffer support by xdp frame.
> >> >> We would really prefer not to see our MTU shrinking because of xdp
> >> >> support.
> >> >
> >> > Okay we really need to make a serious attempt to find a way to support
> >> > multi-buffer packets with XDP. With the important criteria of not
> >> > hurting performance of the single-buffer per packet design.
> >> >
> >> > I've created a design document[2], that I will update based on our
> >> > discussions: [2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
> >> >
> >> > The use-case that really convinced me was Eric's packet header-split.

Thanks for starting this discussion Jesper!

> >> >
> >> >
> >> > Lets refresh: Why XDP don't have multi-buffer support:
> >> >
> >> > XDP is designed for maximum performance, which is why certain driver-level
> >> > use-cases were not supported, like multi-buffer packets (like jumbo-frames).
> >> > As it e.g. complicated the driver RX-loop and memory model handling.
> >> >
> >> > The single buffer per packet design, is also tied into eBPF Direct-Access
> >> > (DA) to packet data, which can only be allowed if the packet memory is in
> >> > contiguous memory.  This DA feature is essential for XDP performance.
> >> >
> >> >
> >> > One way forward is to define that XDP only get access to the first
> >> > packet buffer, and it cannot see subsequent buffers. For XDP_TX and
> >> > XDP_REDIRECT to work then XDP still need to carry pointers (plus
> >> > len+offset) to the other buffers, which is 16 bytes per extra buffer.
> >>
> >> Yeah, I think this would be reasonable. As long as we can have a
> >> metadata field with the full length + still give XDP programs the
> >> ability to truncate the packet (i.e., discard the subsequent pages)
> >
> > You touch upon some interesting complications already:
> >
> > 1. It is valuable for XDP bpf_prog to know "full" length?
> >    (if so, then we need to extend xdp ctx with info)
>
> Valuable, quite likely. A hard requirement, probably not (for all use
> cases).

Agreed.

One common validation use would be to drop any packets whose header
length disagrees with the actual packet length.

> >  But if we need to know the full length, when the first-buffer is
> >  processed. Then realize that this affect the drivers RX-loop, because
> >  then we need to "collect" all the buffers before we can know the
> >  length (although some HW provide this in first descriptor).
> >
> >  We likely have to change drivers RX-loop anyhow, as XDP_TX and
> >  XDP_REDIRECT will also need to "collect" all buffers before the packet
> >  can be forwarded. (Although this could potentially happen later in
> >  driver loop when it meet/find the End-Of-Packet descriptor bit).

Yes, this might be quite a bit of refactoring of device driver code.

Should we move forward with some initial constraints, e.g., no
XDP_REDIRECT, no "full" length and no bpf_xdp_adjust_tail?

That already allows many useful programs.

As long as we don't arrive at a design that cannot be extended with
those features later.

> >
> >
> > 2. Can we even allow helper bpf_xdp_adjust_tail() ?
> >
> >  Wouldn't it be easier to disallow a BPF-prog with this helper, when
> >  driver have configured multi-buffer?
>
> Easier, certainly. But then it's even easier to not implement this at
> all ;)
>
> >  Or will it be too restrictive, if jumbo-frame is very uncommon and
> >  only enabled because switch infra could not be changed (like Amazon
> >  case).

Header-split, LRO and jumbo frame are certainly not limited to the Amazon case.

> I think it would be preferable to support it; but maybe we can let that
> depend on how difficult it actually turns out to be to allow it?
>
> >  Perhaps it is better to let bpf_xdp_adjust_tail() fail runtime?
>
> If we do disallow it, I think I'd lean towards failing the call at
> runtime...

Disagree. I'd rather have a program fail at load if it depends on
multi-frag support while the (driver) implementation does not yet
support it.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-26 15:20               ` Willem de Bruijn
@ 2019-06-26 16:42                 ` Jonathan Lemon
  2019-06-26 20:00                   ` Jesper Dangaard Brouer
  2019-06-28  8:46                 ` Jesper Dangaard Brouer
  1 sibling, 1 reply; 28+ messages in thread
From: Jonathan Lemon @ 2019-06-26 16:42 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Toke Høiland-Jørgensen, Jesper Dangaard Brouer,
	Machulsky, Zorik, Jubran, Samih, davem, netdev, Woodhouse, David,
	Matushevsky, Alexander, Bshara, Saeed, Wilson, Matt,
	Liguori, Anthony, Bshara, Nafea, Tzalik, Guy, Belgazal, Netanel,
	Saidi, Ali, Herrenschmidt, Benjamin, Kiyanovski, Arthur,
	Daniel Borkmann, Ilias Apalodimas, Alexei Starovoitov,
	Jakub Kicinski, xdp-newbies



On 26 Jun 2019, at 8:20, Willem de Bruijn wrote:

> On Wed, Jun 26, 2019 at 11:01 AM Toke Høiland-Jørgensen 
> <toke@redhat.com> wrote:
>>
>> Jesper Dangaard Brouer <brouer@redhat.com> writes:
>>
>>> On Wed, 26 Jun 2019 13:52:16 +0200
>>> Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>>>
>>>> Jesper Dangaard Brouer <brouer@redhat.com> writes:
>>>>
>>>>> On Tue, 25 Jun 2019 03:19:22 +0000
>>>>> "Machulsky, Zorik" <zorik@amazon.com> wrote:
>>>>>
>>>>>> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" 
>>>>>> <brouer@redhat.com> wrote:
>>>>>>
>>>>>>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> 
>>>>>> wrote:
>>>>>>
>>>>>>     > This commit implements the basic functionality of drop/pass 
>>>>>> logic in the
>>>>>>     > ena driver.
>>>>>>
>>>>>>     Usually we require a driver to implement all the XDP return 
>>>>>> codes,
>>>>>>     before we accept it.  But as Daniel and I discussed with 
>>>>>> Zorik during
>>>>>>     NetConf[1], we are going to make an exception and accept the 
>>>>>> driver
>>>>>>     if you also implement XDP_TX.
>>>>>>
>>>>>>     As we trust that Zorik/Amazon will follow and implement 
>>>>>> XDP_REDIRECT
>>>>>>     later, given he/you wants AF_XDP support which requires 
>>>>>> XDP_REDIRECT.
>>>>>>
>>>>>> Jesper, thanks for your comments and very helpful discussion 
>>>>>> during
>>>>>> NetConf! That's the plan, as we agreed. From our side I would 
>>>>>> like to
>>>>>> reiterate again the importance of multi-buffer support by xdp 
>>>>>> frame.
>>>>>> We would really prefer not to see our MTU shrinking because of 
>>>>>> xdp
>>>>>> support.
>>>>>
>>>>> Okay we really need to make a serious attempt to find a way to 
>>>>> support
>>>>> multi-buffer packets with XDP. With the important criteria of not
>>>>> hurting performance of the single-buffer per packet design.
>>>>>
>>>>> I've created a design document[2], that I will update based on our
>>>>> discussions: [2] 
>>>>> https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
>>>>>
>>>>> The use-case that really convinced me was Eric's packet 
>>>>> header-split.
>
> Thanks for starting this discussion Jesper!
>
>>>>>
>>>>>
>>>>> Lets refresh: Why XDP don't have multi-buffer support:
>>>>>
>>>>> XDP is designed for maximum performance, which is why certain 
>>>>> driver-level
>>>>> use-cases were not supported, like multi-buffer packets (like 
>>>>> jumbo-frames).
>>>>> As it e.g. complicated the driver RX-loop and memory model 
>>>>> handling.
>>>>>
>>>>> The single buffer per packet design, is also tied into eBPF 
>>>>> Direct-Access
>>>>> (DA) to packet data, which can only be allowed if the packet 
>>>>> memory is in
>>>>> contiguous memory.  This DA feature is essential for XDP 
>>>>> performance.
>>>>>
>>>>>
>>>>> One way forward is to define that XDP only get access to the first
>>>>> packet buffer, and it cannot see subsequent buffers. For XDP_TX 
>>>>> and
>>>>> XDP_REDIRECT to work then XDP still need to carry pointers (plus
>>>>> len+offset) to the other buffers, which is 16 bytes per extra 
>>>>> buffer.
>>>>
>>>> Yeah, I think this would be reasonable. As long as we can have a
>>>> metadata field with the full length + still give XDP programs the
>>>> ability to truncate the packet (i.e., discard the subsequent pages)
>>>
>>> You touch upon some interesting complications already:
>>>
>>> 1. It is valuable for XDP bpf_prog to know "full" length?
>>>    (if so, then we need to extend xdp ctx with info)
>>
>> Valuable, quite likely. A hard requirement, probably not (for all use
>> cases).
>
> Agreed.
>
> One common validation use would be to drop any packets whose header
> length disagrees with the actual packet length.
>
>>>  But if we need to know the full length, when the first-buffer is
>>>  processed. Then realize that this affect the drivers RX-loop, 
>>> because
>>>  then we need to "collect" all the buffers before we can know the
>>>  length (although some HW provide this in first descriptor).
>>>
>>>  We likely have to change drivers RX-loop anyhow, as XDP_TX and
>>>  XDP_REDIRECT will also need to "collect" all buffers before the 
>>> packet
>>>  can be forwarded. (Although this could potentially happen later in
>>>  driver loop when it meet/find the End-Of-Packet descriptor bit).
>
> Yes, this might be quite a bit of refactoring of device driver code.
>
> Should we move forward with some initial constraints, e.g., no
> XDP_REDIRECT, no "full" length and no bpf_xdp_adjust_tail?
>
> That already allows many useful programs.
>
> As long as we don't arrive at a design that cannot be extended with
> those features later.

I think collecting all frames until EOP and then processing them
at once sounds reasonable.



>>> 2. Can we even allow helper bpf_xdp_adjust_tail() ?
>>>
>>>  Wouldn't it be easier to disallow a BPF-prog with this helper, when
>>>  driver have configured multi-buffer?
>>
>> Easier, certainly. But then it's even easier to not implement this at
>> all ;)
>>
>>>  Or will it be too restrictive, if jumbo-frame is very uncommon and
>>>  only enabled because switch infra could not be changed (like Amazon
>>>  case).
>
> Header-split, LRO and jumbo frame are certainly not limited to the 
> Amazon case.
>
>> I think it would be preferable to support it; but maybe we can let 
>> that
>> depend on how difficult it actually turns out to be to allow it?
>>
>>>  Perhaps it is better to let bpf_xdp_adjust_tail() fail runtime?
>>
>> If we do disallow it, I think I'd lean towards failing the call at
>> runtime...
>
> Disagree. I'd rather have a program fail at load if it depends on
> multi-frag support while the (driver) implementation does not yet
> support it.

If all packets are collected together (like the bulk queue does), and 
then
passed to XDP, this could easily be made backwards compatible.  If the 
XDP
program isn't 'multi-frag' aware, then each packet is just passed in 
individually.

Of course, passing in the equivalent of a iovec requires some form of 
loop
support on the BPF side, doesn't it?
-- 
Jonathan

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-26 16:42                 ` Jonathan Lemon
@ 2019-06-26 20:00                   ` Jesper Dangaard Brouer
  2019-06-27 22:07                     ` Jonathan Lemon
  0 siblings, 1 reply; 28+ messages in thread
From: Jesper Dangaard Brouer @ 2019-06-26 20:00 UTC (permalink / raw)
  To: Jonathan Lemon
  Cc: Willem de Bruijn, Toke Høiland-Jørgensen,
	Machulsky, Zorik, Jubran, Samih, davem, netdev, Woodhouse, David,
	Matushevsky, Alexander, Bshara, Saeed, Wilson, Matt,
	Liguori, Anthony, Bshara, Nafea, Tzalik, Guy, Belgazal, Netanel,
	Saidi, Ali, Herrenschmidt, Benjamin, Kiyanovski, Arthur,
	Daniel Borkmann, Ilias Apalodimas, Alexei Starovoitov,
	Jakub Kicinski, xdp-newbies, brouer


On Wed, 26 Jun 2019 09:42:07 -0700 "Jonathan Lemon" <jonathan.lemon@gmail.com> wrote:

> If all packets are collected together (like the bulk queue does), and 
> then passed to XDP, this could easily be made backwards compatible.
> If the XDP program isn't 'multi-frag' aware, then each packet is just
> passed in individually.

My proposal#1 is XDP only access first-buffer[1], as this simplifies things.

(AFAIK) What you are proposing is that all the buffers are passed to
the XDP prog (in form of a iovec).  I need some more details about your
suggestion.

Specifically:

- What is the semantic when a 3 buffer packet is input and XDP prog
choose to return XDP_DROP for packet #2 ?

- Same situation of packet #2 wants a XDP_TX or redirect?


> Of course, passing in the equivalent of a iovec requires some form of 
> loop support on the BPF side, doesn't it?

The data structure used for holding these packet buffers/segments also
needs to be discussed.  I would either use an array of bio_vec[2] or
skb_frag_t (aka skb_frag_struct).  The skb_frag_t would be most
obvious, as we already have to write this when creating an SKB, in
skb_shared_info area. (Structs listed below signature).

The problem is also that size of these structs (16 bytes) per
buffer/segment, and we likely need to support 17 segments, as this need
to be compatible with SKBs (size 272 bytes).

My idea here is that we simply use the same memory area, that we have to
store skb_shared_info into.  As this allow us to get the SKB setup for
free, when doing XDP_PASS or when doing SKB alloc after XDP_REDIRECT.


[1] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org#proposal1-xdp-only-access-first-buffer

[2] https://lore.kernel.org/netdev/20190501041757.8647-1-willy@infradead.org/
-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


$ pahole -C skb_frag_struct vmlinux
struct skb_frag_struct {
	struct {
		struct page * p;                         /*     0     8 */
	} page;                                          /*     0     8 */
	__u32                      page_offset;          /*     8     4 */
	__u32                      size;                 /*    12     4 */

	/* size: 16, cachelines: 1, members: 3 */
	/* last cacheline: 16 bytes */
};

$ pahole -C bio_vec vmlinux
struct bio_vec {
	struct page        * bv_page;                    /*     0     8 */
	unsigned int               bv_len;               /*     8     4 */
	unsigned int               bv_offset;            /*    12     4 */

	/* size: 16, cachelines: 1, members: 3 */
	/* last cacheline: 16 bytes */
};

$ pahole -C skb_shared_info vmlinux
struct skb_shared_info {
	__u8                       __unused;             /*     0     1 */
	__u8                       meta_len;             /*     1     1 */
	__u8                       nr_frags;             /*     2     1 */
	__u8                       tx_flags;             /*     3     1 */
	short unsigned int         gso_size;             /*     4     2 */
	short unsigned int         gso_segs;             /*     6     2 */
	struct sk_buff     * frag_list;                  /*     8     8 */
	struct skb_shared_hwtstamps hwtstamps;           /*    16     8 */
	unsigned int               gso_type;             /*    24     4 */
	u32                        tskey;                /*    28     4 */
	atomic_t                   dataref;              /*    32     0 */

	/* XXX 8 bytes hole, try to pack */

	void *                     destructor_arg;       /*    40     8 */
	skb_frag_t                 frags[17];            /*    48   272 */

	/* size: 320, cachelines: 5, members: 13 */
	/* sum members: 312, holes: 1, sum holes: 8 */
};

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-26 20:00                   ` Jesper Dangaard Brouer
@ 2019-06-27 22:07                     ` Jonathan Lemon
  0 siblings, 0 replies; 28+ messages in thread
From: Jonathan Lemon @ 2019-06-27 22:07 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Willem de Bruijn, Toke Høiland-Jørgensen,
	Machulsky, Zorik, Jubran, Samih, davem, netdev, Woodhouse, David,
	Matushevsky, Alexander, Bshara, Saeed, Wilson, Matt,
	Liguori, Anthony, Bshara, Nafea, Tzalik, Guy, Belgazal, Netanel,
	Saidi, Ali, Herrenschmidt, Benjamin, Kiyanovski, Arthur,
	Daniel Borkmann, Ilias Apalodimas, Alexei Starovoitov,
	Jakub Kicinski, xdp-newbies

On 26 Jun 2019, at 13:00, Jesper Dangaard Brouer wrote:

> On Wed, 26 Jun 2019 09:42:07 -0700 "Jonathan Lemon" 
> <jonathan.lemon@gmail.com> wrote:
>
>> If all packets are collected together (like the bulk queue does), and
>> then passed to XDP, this could easily be made backwards compatible.
>> If the XDP program isn't 'multi-frag' aware, then each packet is just
>> passed in individually.
>
> My proposal#1 is XDP only access first-buffer[1], as this simplifies 
> things.
>
> (AFAIK) What you are proposing is that all the buffers are passed to
> the XDP prog (in form of a iovec).  I need some more details about 
> your
> suggestion.

I was thinking this over yesterday - and was probably conflating packets
and buffers a bit.  Suppose that for the purposes of this discussion, 
we're
talking about a single packet that is split over multiple buffer areas.

Say, on RX, with header split:
    buf[0] = header
    buf[1] = data

For LRO (hw recv) and jumbo frames (and TSO):
    buf[0] = hdr + data
    buf[1] = data
    buf[n] = data

GRO cases, where individual packets are reassembled by software, aren't
handled here.


> Specifically:
>
> - What is the semantic when a 3 buffer packet is input and XDP prog
> choose to return XDP_DROP for packet #2 ?
>
> - Same situation of packet #2 wants a XDP_TX or redirect?

The collection of buffers represents a single packet, so this isn't
applicable here, right?

However, just thinking about incomplete data words (aka: pullup) gives
me a headache - seems this would complicate the BPF/verifier quite a 
bit.

So perhaps just restricting things to the first entry would do for now?

As far as the exact data structure used to hold the buffers, it would
be nice if it had the same layout as a bio_vec, in case someone wanted
to get clever and start transferring things over directly.
-- 
Jonathan


>> Of course, passing in the equivalent of a iovec requires some form of
>> loop support on the BPF side, doesn't it?
>
> The data structure used for holding these packet buffers/segments also
> needs to be discussed.  I would either use an array of bio_vec[2] or
> skb_frag_t (aka skb_frag_struct).  The skb_frag_t would be most
> obvious, as we already have to write this when creating an SKB, in
> skb_shared_info area. (Structs listed below signature).
>
> The problem is also that size of these structs (16 bytes) per
> buffer/segment, and we likely need to support 17 segments, as this 
> need
> to be compatible with SKBs (size 272 bytes).
>
> My idea here is that we simply use the same memory area, that we have 
> to
> store skb_shared_info into.  As this allow us to get the SKB setup for
> free, when doing XDP_PASS or when doing SKB alloc after XDP_REDIRECT.
>
>
> [1] 
> https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org#proposal1-xdp-only-access-first-buffer
>
> [2] 
> https://lore.kernel.org/netdev/20190501041757.8647-1-willy@infradead.org/
> -- 
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer
>
>
> $ pahole -C skb_frag_struct vmlinux
> struct skb_frag_struct {
> 	struct {
> 		struct page * p;                         /*     0     8 */
> 	} page;                                          /*     0     8 */
> 	__u32                      page_offset;          /*     8     4 */
> 	__u32                      size;                 /*    12     4 */
>
> 	/* size: 16, cachelines: 1, members: 3 */
> 	/* last cacheline: 16 bytes */
> };
>
> $ pahole -C bio_vec vmlinux
> struct bio_vec {
> 	struct page        * bv_page;                    /*     0     8 */
> 	unsigned int               bv_len;               /*     8     4 */
> 	unsigned int               bv_offset;            /*    12     4 */
>
> 	/* size: 16, cachelines: 1, members: 3 */
> 	/* last cacheline: 16 bytes */
> };
>
> $ pahole -C skb_shared_info vmlinux
> struct skb_shared_info {
> 	__u8                       __unused;             /*     0     1 */
> 	__u8                       meta_len;             /*     1     1 */
> 	__u8                       nr_frags;             /*     2     1 */
> 	__u8                       tx_flags;             /*     3     1 */
> 	short unsigned int         gso_size;             /*     4     2 */
> 	short unsigned int         gso_segs;             /*     6     2 */
> 	struct sk_buff     * frag_list;                  /*     8     8 */
> 	struct skb_shared_hwtstamps hwtstamps;           /*    16     8 */
> 	unsigned int               gso_type;             /*    24     4 */
> 	u32                        tskey;                /*    28     4 */
> 	atomic_t                   dataref;              /*    32     0 */
>
> 	/* XXX 8 bytes hole, try to pack */
>
> 	void *                     destructor_arg;       /*    40     8 */
> 	skb_frag_t                 frags[17];            /*    48   272 */
>
> 	/* size: 320, cachelines: 5, members: 13 */
> 	/* sum members: 312, holes: 1, sum holes: 8 */
> };

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-26 15:20               ` Willem de Bruijn
  2019-06-26 16:42                 ` Jonathan Lemon
@ 2019-06-28  8:46                 ` Jesper Dangaard Brouer
  1 sibling, 0 replies; 28+ messages in thread
From: Jesper Dangaard Brouer @ 2019-06-28  8:46 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Toke Høiland-Jørgensen, Machulsky, Zorik, Jubran, Samih,
	davem@davemloft.net, netdev@vger.kernel.org, Woodhouse, David,
	Matushevsky, Alexander, Bshara, Saeed, Wilson, Matt,
	Liguori, Anthony, Bshara, Nafea, Tzalik, Guy, Belgazal, Netanel,
	Saidi, Ali, Herrenschmidt, Benjamin, Kiyanovski, Arthur,
	Daniel Borkmann, Ilias Apalodimas, Alexei Starovoitov,
	Jakub Kicinski, xdp-newbies@vger.kernel.org, brouer


On Wed, 26 Jun 2019 11:20:45 -0400 Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote:
> On Wed, Jun 26, 2019 at 11:01 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote:
> > Jesper Dangaard Brouer <brouer@redhat.com> writes:
> > > On Wed, 26 Jun 2019 13:52:16 +0200
> > > Toke Høiland-Jørgensen <toke@redhat.com> wrote:
> > >  
> > >> Jesper Dangaard Brouer <brouer@redhat.com> writes:
> > >>  
[...]
> > >
> > > You touch upon some interesting complications already:
> > >
> > > 1. It is valuable for XDP bpf_prog to know "full" length?
> > >    (if so, then we need to extend xdp ctx with info)  
> >
> > Valuable, quite likely. A hard requirement, probably not (for all use
> > cases).  
> 
> Agreed.
> 
> One common validation use would be to drop any packets whose header
> length disagrees with the actual packet length.

That is a good point.

Added a section "XDP access to full packet length?" to capture this:
- https://github.com/xdp-project/xdp-project/commit/da5b84264b85b0d
- https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org#xdp-access-to-full-packet-length


> > >  But if we need to know the full length, when the first-buffer is
> > >  processed. Then realize that this affect the drivers RX-loop, because
> > >  then we need to "collect" all the buffers before we can know the
> > >  length (although some HW provide this in first descriptor).
> > >
> > >  We likely have to change drivers RX-loop anyhow, as XDP_TX and
> > >  XDP_REDIRECT will also need to "collect" all buffers before the packet
> > >  can be forwarded. (Although this could potentially happen later in
> > >  driver loop when it meet/find the End-Of-Packet descriptor bit).  
> 
> Yes, this might be quite a bit of refactoring of device driver code.
> 
> Should we move forward with some initial constraints, e.g., no
> XDP_REDIRECT, no "full" length and no bpf_xdp_adjust_tail?

I generally like this...

If not adding "full" length. Maybe we could add an indication to
XDP-developer, that his is a multi-buffer/multi-segment packet, such
that header length validation code against packet length (data_end-data)
is not possible.  This is user visible, so we would have to keep it
forever... I'm leaning towards adding "full" length from beginning.

> That already allows many useful programs.
>
> As long as we don't arrive at a design that cannot be extended with
> those features later.

That is the important part...

 
> > > 2. Can we even allow helper bpf_xdp_adjust_tail() ?
[...]
> >  
> > >  Perhaps it is better to let bpf_xdp_adjust_tail() fail runtime?  
> >
> > If we do disallow it, I think I'd lean towards failing the call at
> > runtime...  
> 
> Disagree. I'd rather have a program fail at load if it depends on
> multi-frag support while the (driver) implementation does not yet
> support it.

I usually agree that we should fail the program, early at load time.
For XDP we are unfortunately missing some knobs to do this, see[1].

Specifically for bpf_xdp_adjust_tail(), it might be better to fail
runtime.  Because, the driver might have enabled TSO for TCP packets,
while your XDP use-case is for adjusting UDP-packets (and do XDP level
replies), which will never see multi-buffer packets.  If we fail use of
bpf_xdp_adjust_tail(), then you would have to disable TSO to allow
loading your XDP-prog, hurting the other TSO-TCP use-case.


[1] http://vger.kernel.org/netconf2019_files/xdp-feature-detection.pdf
-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-26 14:40             ` Jesper Dangaard Brouer
@ 2019-06-26 15:14               ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-06-26 15:14 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Machulsky, Zorik, Jubran, Samih, davem@davemloft.net,
	netdev@vger.kernel.org, Woodhouse, David, Matushevsky, Alexander,
	Bshara, Saeed, Wilson, Matt, Liguori, Anthony, Bshara, Nafea,
	Tzalik, Guy, Belgazal, Netanel, Saidi, Ali,
	Herrenschmidt, Benjamin, Kiyanovski, Arthur, Daniel Borkmann,
	Ilias Apalodimas, Alexei Starovoitov, Jakub Kicinski,
	xdp-newbies@vger.kernel.org

Jesper Dangaard Brouer <brouer@redhat.com> writes:

> On Wed, 26 Jun 2019 13:52:16 +0200
> Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>
>> Jesper Dangaard Brouer <brouer@redhat.com> writes:
>> 
>> > On Tue, 25 Jun 2019 03:19:22 +0000
>> > "Machulsky, Zorik" <zorik@amazon.com> wrote:
>> >  
>> >> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> wrote:
>> >> 
>> >>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
>> >>       
>> >>     > This commit implements the basic functionality of drop/pass logic in the
>> >>     > ena driver.    
>> >>     
>> >>     Usually we require a driver to implement all the XDP return codes,
>> >>     before we accept it.  But as Daniel and I discussed with Zorik during
>> >>     NetConf[1], we are going to make an exception and accept the driver
>> >>     if you also implement XDP_TX.
>> >>     
>> >>     As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
>> >>     later, given he/you wants AF_XDP support which requires XDP_REDIRECT.
>> >> 
>> >> Jesper, thanks for your comments and very helpful discussion during
>> >> NetConf! That's the plan, as we agreed. From our side I would like to
>> >> reiterate again the importance of multi-buffer support by xdp frame.
>> >> We would really prefer not to see our MTU shrinking because of xdp
>> >> support.     
>> >
>> > Okay we really need to make a serious attempt to find a way to support
>> > multi-buffer packets with XDP. With the important criteria of not
>> > hurting performance of the single-buffer per packet design.
>> >
>> > I've created a design document[2], that I will update based on our
>> > discussions: [2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
>> >
>> > The use-case that really convinced me was Eric's packet header-split.
>> >
>> >
>> > Lets refresh: Why XDP don't have multi-buffer support:
>> >
>> > XDP is designed for maximum performance, which is why certain driver-level
>> > use-cases were not supported, like multi-buffer packets (like jumbo-frames).
>> > As it e.g. complicated the driver RX-loop and memory model handling.
>> >
>> > The single buffer per packet design, is also tied into eBPF Direct-Access
>> > (DA) to packet data, which can only be allowed if the packet memory is in
>> > contiguous memory.  This DA feature is essential for XDP performance.
>> >
>> >
>> > One way forward is to define that XDP only get access to the first
>> > packet buffer, and it cannot see subsequent buffers. For XDP_TX and
>> > XDP_REDIRECT to work then XDP still need to carry pointers (plus
>> > len+offset) to the other buffers, which is 16 bytes per extra buffer.  
>> 
>> Yeah, I think this would be reasonable. As long as we can have a
>> metadata field with the full length + still give XDP programs the
>> ability to truncate the packet (i.e., discard the subsequent pages)
>
> You touch upon some interesting complications already:
>
> 1. It is valuable for XDP bpf_prog to know "full" length?
>    (if so, then we need to extend xdp ctx with info)
>
>  But if we need to know the full length, when the first-buffer is
>  processed. Then realize that this affect the drivers RX-loop, because
>  then we need to "collect" all the buffers before we can know the
>  length (although some HW provide this in first descriptor).
>
>  We likely have to change drivers RX-loop anyhow, as XDP_TX and
>  XDP_REDIRECT will also need to "collect" all buffers before the packet
>  can be forwarded. (Although this could potentially happen later in
>  driver loop when it meet/find the End-Of-Packet descriptor bit).

A few more points (mostly thinking out loud here):

- In any case we probably need to loop through the subsequent
  descriptors in all cases, right? (i.e., if we run XDP on first
  segment, and that returns DROP, the rest that are part of the packet
  still need to be discarded). So we may as well delay XDP execution
  until we have the whole packet?

- Will this allow us to run XDP on hardware-assembled GRO super-packets?

-Toke

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
@ 2019-06-26 15:14               ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-06-26 15:14 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Machulsky, Zorik, Jubran, Samih, davem@davemloft.net,
	netdev@vger.kernel.org, Woodhouse, David, Matushevsky, Alexander,
	Bshara, Saeed, Wilson, Matt, Liguori, Anthony, Bshara, Nafea,
	Tzalik, Guy, Belgazal, Netanel, Saidi, Ali,
	Herrenschmidt, Benjamin, Kiyanovski, Arthur, Daniel Borkmann,
	Ilias Apalodimas, Alexei Starovoitov, Jakub Kicinski,
	xdp-newbies@vger.kernel.org, brouer

Jesper Dangaard Brouer <brouer@redhat.com> writes:

> On Wed, 26 Jun 2019 13:52:16 +0200
> Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>
>> Jesper Dangaard Brouer <brouer@redhat.com> writes:
>> 
>> > On Tue, 25 Jun 2019 03:19:22 +0000
>> > "Machulsky, Zorik" <zorik@amazon.com> wrote:
>> >  
>> >> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> wrote:
>> >> 
>> >>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
>> >>       
>> >>     > This commit implements the basic functionality of drop/pass logic in the
>> >>     > ena driver.    
>> >>     
>> >>     Usually we require a driver to implement all the XDP return codes,
>> >>     before we accept it.  But as Daniel and I discussed with Zorik during
>> >>     NetConf[1], we are going to make an exception and accept the driver
>> >>     if you also implement XDP_TX.
>> >>     
>> >>     As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
>> >>     later, given he/you wants AF_XDP support which requires XDP_REDIRECT.
>> >> 
>> >> Jesper, thanks for your comments and very helpful discussion during
>> >> NetConf! That's the plan, as we agreed. From our side I would like to
>> >> reiterate again the importance of multi-buffer support by xdp frame.
>> >> We would really prefer not to see our MTU shrinking because of xdp
>> >> support.     
>> >
>> > Okay we really need to make a serious attempt to find a way to support
>> > multi-buffer packets with XDP. With the important criteria of not
>> > hurting performance of the single-buffer per packet design.
>> >
>> > I've created a design document[2], that I will update based on our
>> > discussions: [2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
>> >
>> > The use-case that really convinced me was Eric's packet header-split.
>> >
>> >
>> > Lets refresh: Why XDP don't have multi-buffer support:
>> >
>> > XDP is designed for maximum performance, which is why certain driver-level
>> > use-cases were not supported, like multi-buffer packets (like jumbo-frames).
>> > As it e.g. complicated the driver RX-loop and memory model handling.
>> >
>> > The single buffer per packet design, is also tied into eBPF Direct-Access
>> > (DA) to packet data, which can only be allowed if the packet memory is in
>> > contiguous memory.  This DA feature is essential for XDP performance.
>> >
>> >
>> > One way forward is to define that XDP only get access to the first
>> > packet buffer, and it cannot see subsequent buffers. For XDP_TX and
>> > XDP_REDIRECT to work then XDP still need to carry pointers (plus
>> > len+offset) to the other buffers, which is 16 bytes per extra buffer.  
>> 
>> Yeah, I think this would be reasonable. As long as we can have a
>> metadata field with the full length + still give XDP programs the
>> ability to truncate the packet (i.e., discard the subsequent pages)
>
> You touch upon some interesting complications already:
>
> 1. It is valuable for XDP bpf_prog to know "full" length?
>    (if so, then we need to extend xdp ctx with info)
>
>  But if we need to know the full length, when the first-buffer is
>  processed. Then realize that this affect the drivers RX-loop, because
>  then we need to "collect" all the buffers before we can know the
>  length (although some HW provide this in first descriptor).
>
>  We likely have to change drivers RX-loop anyhow, as XDP_TX and
>  XDP_REDIRECT will also need to "collect" all buffers before the packet
>  can be forwarded. (Although this could potentially happen later in
>  driver loop when it meet/find the End-Of-Packet descriptor bit).

A few more points (mostly thinking out loud here):

- In any case we probably need to loop through the subsequent
  descriptors in all cases, right? (i.e., if we run XDP on first
  segment, and that returns DROP, the rest that are part of the packet
  still need to be discarded). So we may as well delay XDP execution
  until we have the whole packet?

- Will this allow us to run XDP on hardware-assembled GRO super-packets?

-Toke

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-26 15:14               ` Toke Høiland-Jørgensen
@ 2019-06-26 16:36                 ` Jesper Dangaard Brouer
  -1 siblings, 0 replies; 28+ messages in thread
From: Jesper Dangaard Brouer @ 2019-06-26 16:36 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Machulsky, Zorik, Jubran, Samih, davem@davemloft.net,
	netdev@vger.kernel.org, Woodhouse, David, Matushevsky, Alexander,
	Bshara, Saeed, Wilson, Matt, Liguori, Anthony, Bshara, Nafea,
	Tzalik, Guy, Belgazal, Netanel, Saidi, Ali,
	Herrenschmidt, Benjamin, Kiyanovski, Arthur, Daniel Borkmann,
	Ilias Apalodimas, Alexei Starovoitov, Jakub Kicinski,
	xdp-newbies@vger.kernel.org, brouer

On Wed, 26 Jun 2019 17:14:32 +0200
Toke Høiland-Jørgensen <toke@redhat.com> wrote:

> Jesper Dangaard Brouer <brouer@redhat.com> writes:
> 
> > On Wed, 26 Jun 2019 13:52:16 +0200
> > Toke Høiland-Jørgensen <toke@redhat.com> wrote:
> >  
> >> Jesper Dangaard Brouer <brouer@redhat.com> writes:
> >>   
> >> > On Tue, 25 Jun 2019 03:19:22 +0000
> >> > "Machulsky, Zorik" <zorik@amazon.com> wrote:
> >> >    
> >> >> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> wrote:
> >> >> 
> >> >>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
> >> >>         
> >> >>     > This commit implements the basic functionality of drop/pass logic in the
> >> >>     > ena driver.      
> >> >>     
> >> >>     Usually we require a driver to implement all the XDP return codes,
> >> >>     before we accept it.  But as Daniel and I discussed with Zorik during
> >> >>     NetConf[1], we are going to make an exception and accept the driver
> >> >>     if you also implement XDP_TX.
> >> >>     
> >> >>     As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
> >> >>     later, given he/you wants AF_XDP support which requires XDP_REDIRECT.
> >> >> 
> >> >> Jesper, thanks for your comments and very helpful discussion during
> >> >> NetConf! That's the plan, as we agreed. From our side I would like to
> >> >> reiterate again the importance of multi-buffer support by xdp frame.
> >> >> We would really prefer not to see our MTU shrinking because of xdp
> >> >> support.       
> >> >
> >> > Okay we really need to make a serious attempt to find a way to support
> >> > multi-buffer packets with XDP. With the important criteria of not
> >> > hurting performance of the single-buffer per packet design.
> >> >
> >> > I've created a design document[2], that I will update based on our
> >> > discussions: [2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
> >> >
> >> > The use-case that really convinced me was Eric's packet header-split.
> >> >
> >> >
> >> > Lets refresh: Why XDP don't have multi-buffer support:
> >> >
> >> > XDP is designed for maximum performance, which is why certain driver-level
> >> > use-cases were not supported, like multi-buffer packets (like jumbo-frames).
> >> > As it e.g. complicated the driver RX-loop and memory model handling.
> >> >
> >> > The single buffer per packet design, is also tied into eBPF Direct-Access
> >> > (DA) to packet data, which can only be allowed if the packet memory is in
> >> > contiguous memory.  This DA feature is essential for XDP performance.
> >> >
> >> >
> >> > One way forward is to define that XDP only get access to the first
> >> > packet buffer, and it cannot see subsequent buffers. For XDP_TX and
> >> > XDP_REDIRECT to work then XDP still need to carry pointers (plus
> >> > len+offset) to the other buffers, which is 16 bytes per extra buffer.    
> >> 
> >> Yeah, I think this would be reasonable. As long as we can have a
> >> metadata field with the full length + still give XDP programs the
> >> ability to truncate the packet (i.e., discard the subsequent pages)  
> >
> > You touch upon some interesting complications already:
> >
> > 1. It is valuable for XDP bpf_prog to know "full" length?
> >    (if so, then we need to extend xdp ctx with info)
> >
> >  But if we need to know the full length, when the first-buffer is
> >  processed. Then realize that this affect the drivers RX-loop, because
> >  then we need to "collect" all the buffers before we can know the
> >  length (although some HW provide this in first descriptor).
> >
> >  We likely have to change drivers RX-loop anyhow, as XDP_TX and
> >  XDP_REDIRECT will also need to "collect" all buffers before the packet
> >  can be forwarded. (Although this could potentially happen later in
> >  driver loop when it meet/find the End-Of-Packet descriptor bit).  
> 
> A few more points (mostly thinking out loud here):
> 
> - In any case we probably need to loop through the subsequent
>   descriptors in all cases, right? (i.e., if we run XDP on first
>   segment, and that returns DROP, the rest that are part of the packet
>   still need to be discarded). So we may as well delay XDP execution
>   until we have the whole packet?

For the XDP_DROP case, drivers usually have way to discard remaining
buffers/segments, to handle error cases.  But it heavily depend on the
driver, how tricky/convoluted this code is...

Generally I would say it would make sense to delay XDP execution until
all buffers/segments are "collected".  It would be the clean approach,
but will likely require refactoring of driver level code.

 
> - Will this allow us to run XDP on hardware-assembled GRO super-packets?

Big YES.  This is usually called LRO or TSO packets.  And yes, I also
want to support this use-case, which is also listed in [2].  If we go
down this road, this use-case is also important. (Especially related to
my alloc SKBs outside drivers[3], this is a hardware offload we must
support).


[2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org

[3] http://vger.kernel.org/netconf2019_files/xdp-metadata-discussion.pdf
-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
@ 2019-06-26 16:36                 ` Jesper Dangaard Brouer
  0 siblings, 0 replies; 28+ messages in thread
From: Jesper Dangaard Brouer @ 2019-06-26 16:36 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Machulsky, Zorik, Jubran, Samih, davem@davemloft.net,
	netdev@vger.kernel.org, Woodhouse, David, Matushevsky, Alexander,
	Bshara, Saeed, Wilson, Matt, Liguori, Anthony, Bshara, Nafea,
	Tzalik, Guy, Belgazal, Netanel, Saidi, Ali,
	Herrenschmidt, Benjamin, Kiyanovski, Arthur, Daniel Borkmann,
	Ilias Apalodimas, Alexei Starovoitov, Jakub Kicinski,
	xdp-newbies@vger.kernel.org, brouer

On Wed, 26 Jun 2019 17:14:32 +0200
Toke Høiland-Jørgensen <toke@redhat.com> wrote:

> Jesper Dangaard Brouer <brouer@redhat.com> writes:
> 
> > On Wed, 26 Jun 2019 13:52:16 +0200
> > Toke Høiland-Jørgensen <toke@redhat.com> wrote:
> >  
> >> Jesper Dangaard Brouer <brouer@redhat.com> writes:
> >>   
> >> > On Tue, 25 Jun 2019 03:19:22 +0000
> >> > "Machulsky, Zorik" <zorik@amazon.com> wrote:
> >> >    
> >> >> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> wrote:
> >> >> 
> >> >>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
> >> >>         
> >> >>     > This commit implements the basic functionality of drop/pass logic in the
> >> >>     > ena driver.      
> >> >>     
> >> >>     Usually we require a driver to implement all the XDP return codes,
> >> >>     before we accept it.  But as Daniel and I discussed with Zorik during
> >> >>     NetConf[1], we are going to make an exception and accept the driver
> >> >>     if you also implement XDP_TX.
> >> >>     
> >> >>     As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
> >> >>     later, given he/you wants AF_XDP support which requires XDP_REDIRECT.
> >> >> 
> >> >> Jesper, thanks for your comments and very helpful discussion during
> >> >> NetConf! That's the plan, as we agreed. From our side I would like to
> >> >> reiterate again the importance of multi-buffer support by xdp frame.
> >> >> We would really prefer not to see our MTU shrinking because of xdp
> >> >> support.       
> >> >
> >> > Okay we really need to make a serious attempt to find a way to support
> >> > multi-buffer packets with XDP. With the important criteria of not
> >> > hurting performance of the single-buffer per packet design.
> >> >
> >> > I've created a design document[2], that I will update based on our
> >> > discussions: [2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
> >> >
> >> > The use-case that really convinced me was Eric's packet header-split.
> >> >
> >> >
> >> > Lets refresh: Why XDP don't have multi-buffer support:
> >> >
> >> > XDP is designed for maximum performance, which is why certain driver-level
> >> > use-cases were not supported, like multi-buffer packets (like jumbo-frames).
> >> > As it e.g. complicated the driver RX-loop and memory model handling.
> >> >
> >> > The single buffer per packet design, is also tied into eBPF Direct-Access
> >> > (DA) to packet data, which can only be allowed if the packet memory is in
> >> > contiguous memory.  This DA feature is essential for XDP performance.
> >> >
> >> >
> >> > One way forward is to define that XDP only get access to the first
> >> > packet buffer, and it cannot see subsequent buffers. For XDP_TX and
> >> > XDP_REDIRECT to work then XDP still need to carry pointers (plus
> >> > len+offset) to the other buffers, which is 16 bytes per extra buffer.    
> >> 
> >> Yeah, I think this would be reasonable. As long as we can have a
> >> metadata field with the full length + still give XDP programs the
> >> ability to truncate the packet (i.e., discard the subsequent pages)  
> >
> > You touch upon some interesting complications already:
> >
> > 1. It is valuable for XDP bpf_prog to know "full" length?
> >    (if so, then we need to extend xdp ctx with info)
> >
> >  But if we need to know the full length, when the first-buffer is
> >  processed. Then realize that this affect the drivers RX-loop, because
> >  then we need to "collect" all the buffers before we can know the
> >  length (although some HW provide this in first descriptor).
> >
> >  We likely have to change drivers RX-loop anyhow, as XDP_TX and
> >  XDP_REDIRECT will also need to "collect" all buffers before the packet
> >  can be forwarded. (Although this could potentially happen later in
> >  driver loop when it meet/find the End-Of-Packet descriptor bit).  
> 
> A few more points (mostly thinking out loud here):
> 
> - In any case we probably need to loop through the subsequent
>   descriptors in all cases, right? (i.e., if we run XDP on first
>   segment, and that returns DROP, the rest that are part of the packet
>   still need to be discarded). So we may as well delay XDP execution
>   until we have the whole packet?

For the XDP_DROP case, drivers usually have way to discard remaining
buffers/segments, to handle error cases.  But it heavily depend on the
driver, how tricky/convoluted this code is...

Generally I would say it would make sense to delay XDP execution until
all buffers/segments are "collected".  It would be the clean approach,
but will likely require refactoring of driver level code.

 
> - Will this allow us to run XDP on hardware-assembled GRO super-packets?

Big YES.  This is usually called LRO or TSO packets.  And yes, I also
want to support this use-case, which is also listed in [2].  If we go
down this road, this use-case is also important. (Especially related to
my alloc SKBs outside drivers[3], this is a hardware offload we must
support).


[2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org

[3] http://vger.kernel.org/netconf2019_files/xdp-metadata-discussion.pdf
-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-26  8:38       ` XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support) Jesper Dangaard Brouer
  2019-06-26 11:52           ` Toke Høiland-Jørgensen
@ 2019-06-28  7:14         ` Eelco Chaudron
  2019-06-28  7:46             ` Toke Høiland-Jørgensen
  2019-06-28  8:22           ` Jesper Dangaard Brouer
  1 sibling, 2 replies; 28+ messages in thread
From: Eelco Chaudron @ 2019-06-28  7:14 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Machulsky, Zorik, Jubran, Samih, davem, netdev, Woodhouse, David,
	Matushevsky, Alexander, Bshara, Saeed, Wilson, Matt,
	Liguori, Anthony, Bshara, Nafea, Tzalik, Guy, Belgazal, Netanel,
	Saidi, Ali, Herrenschmidt, Benjamin, Kiyanovski, Arthur,
	Daniel Borkmann, Toke Høiland-Jørgensen,
	Ilias Apalodimas, Alexei Starovoitov, Jakub Kicinski, xdp-newbies



On 26 Jun 2019, at 10:38, Jesper Dangaard Brouer wrote:

> On Tue, 25 Jun 2019 03:19:22 +0000
> "Machulsky, Zorik" <zorik@amazon.com> wrote:
>
>> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> 
>> wrote:
>>
>>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
>>
>>     > This commit implements the basic functionality of drop/pass 
>> logic in the
>>     > ena driver.
>>
>>     Usually we require a driver to implement all the XDP return 
>> codes,
>>     before we accept it.  But as Daniel and I discussed with Zorik 
>> during
>>     NetConf[1], we are going to make an exception and accept the 
>> driver
>>     if you also implement XDP_TX.
>>
>>     As we trust that Zorik/Amazon will follow and implement 
>> XDP_REDIRECT
>>     later, given he/you wants AF_XDP support which requires 
>> XDP_REDIRECT.
>>
>> Jesper, thanks for your comments and very helpful discussion during
>> NetConf! That's the plan, as we agreed. From our side I would like to
>> reiterate again the importance of multi-buffer support by xdp frame.
>> We would really prefer not to see our MTU shrinking because of xdp
>> support.
>
> Okay we really need to make a serious attempt to find a way to support
> multi-buffer packets with XDP. With the important criteria of not
> hurting performance of the single-buffer per packet design.
>
> I've created a design document[2], that I will update based on our
> discussions: [2] 
> https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
>
> The use-case that really convinced me was Eric's packet header-split.
>
>
> Lets refresh: Why XDP don't have multi-buffer support:
>
> XDP is designed for maximum performance, which is why certain 
> driver-level
> use-cases were not supported, like multi-buffer packets (like 
> jumbo-frames).
> As it e.g. complicated the driver RX-loop and memory model handling.
>
> The single buffer per packet design, is also tied into eBPF 
> Direct-Access
> (DA) to packet data, which can only be allowed if the packet memory is 
> in
> contiguous memory.  This DA feature is essential for XDP performance.
>
>
> One way forward is to define that XDP only get access to the first
> packet buffer, and it cannot see subsequent buffers.  For XDP_TX and
> XDP_REDIRECT to work then XDP still need to carry pointers (plus
> len+offset) to the other buffers, which is 16 bytes per extra buffer.


I’ve seen various network processor HW designs, and they normally get 
the first x bytes (128 - 512) which they can manipulate 
(append/prepend/insert/modify/delete).

There are designs where they can “page in” the additional fragments 
but it’s expensive as it requires additional memory transfers. But the 
majority do not care (cannot change) the remaining fragments. Can also 
not think of a reason why you might want to remove something at the end 
of the frame (thinking about routing/forwarding needs here).

If we do want XDP to access other fragments we could do this through a 
helper which swaps the packet context?

//Eelco

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-28  7:14         ` Eelco Chaudron
@ 2019-06-28  7:46             ` Toke Høiland-Jørgensen
  2019-06-28  8:22           ` Jesper Dangaard Brouer
  1 sibling, 0 replies; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-06-28  7:46 UTC (permalink / raw)
  To: Eelco Chaudron, Jesper Dangaard Brouer
  Cc: Machulsky, Zorik, Jubran, Samih, davem, netdev, Woodhouse, David,
	Matushevsky, Alexander, Bshara, Saeed, Wilson, Matt,
	Liguori, Anthony, Bshara, Nafea, Tzalik, Guy, Belgazal, Netanel,
	Saidi, Ali, Herrenschmidt, Benjamin, Kiyanovski, Arthur,
	Daniel Borkmann, Ilias Apalodimas, Alexei Starovoitov,
	Jakub Kicinski, xdp-newbies

"Eelco Chaudron" <echaudro@redhat.com> writes:

> On 26 Jun 2019, at 10:38, Jesper Dangaard Brouer wrote:
>
>> On Tue, 25 Jun 2019 03:19:22 +0000
>> "Machulsky, Zorik" <zorik@amazon.com> wrote:
>>
>>> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> 
>>> wrote:
>>>
>>>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
>>>
>>>     > This commit implements the basic functionality of drop/pass 
>>> logic in the
>>>     > ena driver.
>>>
>>>     Usually we require a driver to implement all the XDP return 
>>> codes,
>>>     before we accept it.  But as Daniel and I discussed with Zorik 
>>> during
>>>     NetConf[1], we are going to make an exception and accept the 
>>> driver
>>>     if you also implement XDP_TX.
>>>
>>>     As we trust that Zorik/Amazon will follow and implement 
>>> XDP_REDIRECT
>>>     later, given he/you wants AF_XDP support which requires 
>>> XDP_REDIRECT.
>>>
>>> Jesper, thanks for your comments and very helpful discussion during
>>> NetConf! That's the plan, as we agreed. From our side I would like to
>>> reiterate again the importance of multi-buffer support by xdp frame.
>>> We would really prefer not to see our MTU shrinking because of xdp
>>> support.
>>
>> Okay we really need to make a serious attempt to find a way to support
>> multi-buffer packets with XDP. With the important criteria of not
>> hurting performance of the single-buffer per packet design.
>>
>> I've created a design document[2], that I will update based on our
>> discussions: [2] 
>> https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
>>
>> The use-case that really convinced me was Eric's packet header-split.
>>
>>
>> Lets refresh: Why XDP don't have multi-buffer support:
>>
>> XDP is designed for maximum performance, which is why certain 
>> driver-level
>> use-cases were not supported, like multi-buffer packets (like 
>> jumbo-frames).
>> As it e.g. complicated the driver RX-loop and memory model handling.
>>
>> The single buffer per packet design, is also tied into eBPF 
>> Direct-Access
>> (DA) to packet data, which can only be allowed if the packet memory is 
>> in
>> contiguous memory.  This DA feature is essential for XDP performance.
>>
>>
>> One way forward is to define that XDP only get access to the first
>> packet buffer, and it cannot see subsequent buffers.  For XDP_TX and
>> XDP_REDIRECT to work then XDP still need to carry pointers (plus
>> len+offset) to the other buffers, which is 16 bytes per extra buffer.
>
>
> I’ve seen various network processor HW designs, and they normally get 
> the first x bytes (128 - 512) which they can manipulate 
> (append/prepend/insert/modify/delete).
>
> There are designs where they can “page in” the additional fragments 
> but it’s expensive as it requires additional memory transfers. But the 
> majority do not care (cannot change) the remaining fragments. Can also 
> not think of a reason why you might want to remove something at the end 
> of the frame (thinking about routing/forwarding needs here).
>
> If we do want XDP to access other fragments we could do this through a 
> helper which swaps the packet context?

Yeah, I was also going to suggest a helper for that. It doesn't
necessarily need to swap the packet context, it could just return a new
pointer?

-Toke

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
@ 2019-06-28  7:46             ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-06-28  7:46 UTC (permalink / raw)
  To: Eelco Chaudron, Jesper Dangaard Brouer
  Cc: Machulsky, Zorik, Jubran, Samih, davem, netdev, Woodhouse, David,
	Matushevsky, Alexander, Bshara, Saeed, Wilson, Matt,
	Liguori, Anthony, Bshara, Nafea, Tzalik, Guy, Belgazal, Netanel,
	Saidi, Ali, Herrenschmidt, Benjamin, Kiyanovski, Arthur,
	Daniel Borkmann, Ilias Apalodimas, Alexei Starovoitov,
	Jakub Kicinski, xdp-newbies

"Eelco Chaudron" <echaudro@redhat.com> writes:

> On 26 Jun 2019, at 10:38, Jesper Dangaard Brouer wrote:
>
>> On Tue, 25 Jun 2019 03:19:22 +0000
>> "Machulsky, Zorik" <zorik@amazon.com> wrote:
>>
>>> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> 
>>> wrote:
>>>
>>>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
>>>
>>>     > This commit implements the basic functionality of drop/pass 
>>> logic in the
>>>     > ena driver.
>>>
>>>     Usually we require a driver to implement all the XDP return 
>>> codes,
>>>     before we accept it.  But as Daniel and I discussed with Zorik 
>>> during
>>>     NetConf[1], we are going to make an exception and accept the 
>>> driver
>>>     if you also implement XDP_TX.
>>>
>>>     As we trust that Zorik/Amazon will follow and implement 
>>> XDP_REDIRECT
>>>     later, given he/you wants AF_XDP support which requires 
>>> XDP_REDIRECT.
>>>
>>> Jesper, thanks for your comments and very helpful discussion during
>>> NetConf! That's the plan, as we agreed. From our side I would like to
>>> reiterate again the importance of multi-buffer support by xdp frame.
>>> We would really prefer not to see our MTU shrinking because of xdp
>>> support.
>>
>> Okay we really need to make a serious attempt to find a way to support
>> multi-buffer packets with XDP. With the important criteria of not
>> hurting performance of the single-buffer per packet design.
>>
>> I've created a design document[2], that I will update based on our
>> discussions: [2] 
>> https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
>>
>> The use-case that really convinced me was Eric's packet header-split.
>>
>>
>> Lets refresh: Why XDP don't have multi-buffer support:
>>
>> XDP is designed for maximum performance, which is why certain 
>> driver-level
>> use-cases were not supported, like multi-buffer packets (like 
>> jumbo-frames).
>> As it e.g. complicated the driver RX-loop and memory model handling.
>>
>> The single buffer per packet design, is also tied into eBPF 
>> Direct-Access
>> (DA) to packet data, which can only be allowed if the packet memory is 
>> in
>> contiguous memory.  This DA feature is essential for XDP performance.
>>
>>
>> One way forward is to define that XDP only get access to the first
>> packet buffer, and it cannot see subsequent buffers.  For XDP_TX and
>> XDP_REDIRECT to work then XDP still need to carry pointers (plus
>> len+offset) to the other buffers, which is 16 bytes per extra buffer.
>
>
> I’ve seen various network processor HW designs, and they normally get 
> the first x bytes (128 - 512) which they can manipulate 
> (append/prepend/insert/modify/delete).
>
> There are designs where they can “page in” the additional fragments 
> but it’s expensive as it requires additional memory transfers. But the 
> majority do not care (cannot change) the remaining fragments. Can also 
> not think of a reason why you might want to remove something at the end 
> of the frame (thinking about routing/forwarding needs here).
>
> If we do want XDP to access other fragments we could do this through a 
> helper which swaps the packet context?

Yeah, I was also going to suggest a helper for that. It doesn't
necessarily need to swap the packet context, it could just return a new
pointer?

-Toke

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-28  7:46             ` Toke Høiland-Jørgensen
  (?)
@ 2019-06-28 11:49             ` Eelco Chaudron
  -1 siblings, 0 replies; 28+ messages in thread
From: Eelco Chaudron @ 2019-06-28 11:49 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Jesper Dangaard Brouer, Machulsky, Zorik, Jubran, Samih, davem,
	netdev, Woodhouse, David, Matushevsky, Alexander, Bshara, Saeed,
	Wilson, Matt, Liguori, Anthony, Bshara, Nafea, Tzalik, Guy,
	Belgazal, Netanel, Saidi, Ali, Herrenschmidt, Benjamin,
	Kiyanovski, Arthur, Daniel Borkmann, Ilias Apalodimas,
	Alexei Starovoitov, Jakub Kicinski, xdp-newbies



On 28 Jun 2019, at 9:46, Toke Høiland-Jørgensen wrote:

> "Eelco Chaudron" <echaudro@redhat.com> writes:
>
>> On 26 Jun 2019, at 10:38, Jesper Dangaard Brouer wrote:
>>
>>> On Tue, 25 Jun 2019 03:19:22 +0000
>>> "Machulsky, Zorik" <zorik@amazon.com> wrote:
>>>
>>>> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" 
>>>> <brouer@redhat.com>
>>>> wrote:
>>>>
>>>>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
>>>>
>>>>     > This commit implements the basic functionality of drop/pass
>>>> logic in the
>>>>     > ena driver.
>>>>
>>>>     Usually we require a driver to implement all the XDP return
>>>> codes,
>>>>     before we accept it.  But as Daniel and I discussed with Zorik
>>>> during
>>>>     NetConf[1], we are going to make an exception and accept the
>>>> driver
>>>>     if you also implement XDP_TX.
>>>>
>>>>     As we trust that Zorik/Amazon will follow and implement
>>>> XDP_REDIRECT
>>>>     later, given he/you wants AF_XDP support which requires
>>>> XDP_REDIRECT.
>>>>
>>>> Jesper, thanks for your comments and very helpful discussion during
>>>> NetConf! That's the plan, as we agreed. From our side I would like 
>>>> to
>>>> reiterate again the importance of multi-buffer support by xdp 
>>>> frame.
>>>> We would really prefer not to see our MTU shrinking because of xdp
>>>> support.
>>>
>>> Okay we really need to make a serious attempt to find a way to 
>>> support
>>> multi-buffer packets with XDP. With the important criteria of not
>>> hurting performance of the single-buffer per packet design.
>>>
>>> I've created a design document[2], that I will update based on our
>>> discussions: [2]
>>> https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
>>>
>>> The use-case that really convinced me was Eric's packet 
>>> header-split.
>>>
>>>
>>> Lets refresh: Why XDP don't have multi-buffer support:
>>>
>>> XDP is designed for maximum performance, which is why certain
>>> driver-level
>>> use-cases were not supported, like multi-buffer packets (like
>>> jumbo-frames).
>>> As it e.g. complicated the driver RX-loop and memory model handling.
>>>
>>> The single buffer per packet design, is also tied into eBPF
>>> Direct-Access
>>> (DA) to packet data, which can only be allowed if the packet memory 
>>> is
>>> in
>>> contiguous memory.  This DA feature is essential for XDP 
>>> performance.
>>>
>>>
>>> One way forward is to define that XDP only get access to the first
>>> packet buffer, and it cannot see subsequent buffers.  For XDP_TX and
>>> XDP_REDIRECT to work then XDP still need to carry pointers (plus
>>> len+offset) to the other buffers, which is 16 bytes per extra 
>>> buffer.
>>
>>
>> I’ve seen various network processor HW designs, and they normally 
>> get
>> the first x bytes (128 - 512) which they can manipulate
>> (append/prepend/insert/modify/delete).
>>
>> There are designs where they can “page in” the additional 
>> fragments
>> but it’s expensive as it requires additional memory transfers. But 
>> the
>> majority do not care (cannot change) the remaining fragments. Can 
>> also
>> not think of a reason why you might want to remove something at the 
>> end
>> of the frame (thinking about routing/forwarding needs here).
>>
>> If we do want XDP to access other fragments we could do this through 
>> a
>> helper which swaps the packet context?
>
> Yeah, I was also going to suggest a helper for that. It doesn't
> necessarily need to swap the packet context, it could just return a 
> new
> pointer?

Yes that will work, my head was still thinking ASICs where there is 
limited SRAM space…

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support)
  2019-06-28  7:14         ` Eelco Chaudron
  2019-06-28  7:46             ` Toke Høiland-Jørgensen
@ 2019-06-28  8:22           ` Jesper Dangaard Brouer
  1 sibling, 0 replies; 28+ messages in thread
From: Jesper Dangaard Brouer @ 2019-06-28  8:22 UTC (permalink / raw)
  To: Eelco Chaudron
  Cc: Machulsky, Zorik, Jubran, Samih, davem, netdev, Woodhouse, David,
	Matushevsky, Alexander, Bshara, Saeed, Wilson, Matt,
	Liguori, Anthony, Bshara, Nafea, Tzalik, Guy, Belgazal, Netanel,
	Saidi, Ali, Herrenschmidt, Benjamin, Kiyanovski, Arthur,
	Daniel Borkmann, Toke Høiland-Jørgensen,
	Ilias Apalodimas, Alexei Starovoitov, Jakub Kicinski, xdp-newbies,
	brouer, Steffen Klassert

On Fri, 28 Jun 2019 09:14:39 +0200
"Eelco Chaudron" <echaudro@redhat.com> wrote:

> On 26 Jun 2019, at 10:38, Jesper Dangaard Brouer wrote:
> 
> > On Tue, 25 Jun 2019 03:19:22 +0000
> > "Machulsky, Zorik" <zorik@amazon.com> wrote:
> >  
> >> On 6/23/19, 7:21 AM, "Jesper Dangaard Brouer" <brouer@redhat.com> 
> >> wrote:
> >>
> >>     On Sun, 23 Jun 2019 10:06:49 +0300 <sameehj@amazon.com> wrote:
> >>  
> >>     > This commit implements the basic functionality of drop/pass logic in the  
> >>     > ena driver.  
> >>
> >>     Usually we require a driver to implement all the XDP return codes,
> >>     before we accept it.  But as Daniel and I discussed with Zorik during
> >>     NetConf[1], we are going to make an exception and accept the driver
> >>     if you also implement XDP_TX.
> >>
> >>     As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
> >>     later, given he/you wants AF_XDP support which requires XDP_REDIRECT.
> >>
> >> Jesper, thanks for your comments and very helpful discussion during
> >> NetConf! That's the plan, as we agreed. From our side I would like to
> >> reiterate again the importance of multi-buffer support by xdp frame.
> >> We would really prefer not to see our MTU shrinking because of xdp
> >> support.  
> >
> > Okay we really need to make a serious attempt to find a way to support
> > multi-buffer packets with XDP. With the important criteria of not
> > hurting performance of the single-buffer per packet design.
> >
> > I've created a design document[2], that I will update based on our
> > discussions: [2] 
> > https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org
> >
> > The use-case that really convinced me was Eric's packet header-split.
> >
> >
> > Lets refresh: Why XDP don't have multi-buffer support:
> >
> > XDP is designed for maximum performance, which is why certain 
> > driver-level
> > use-cases were not supported, like multi-buffer packets (like 
> > jumbo-frames).
> > As it e.g. complicated the driver RX-loop and memory model handling.
> >
> > The single buffer per packet design, is also tied into eBPF 
> > Direct-Access
> > (DA) to packet data, which can only be allowed if the packet memory is 
> > in
> > contiguous memory.  This DA feature is essential for XDP performance.
> >
> >
> > One way forward is to define that XDP only get access to the first
> > packet buffer, and it cannot see subsequent buffers.  For XDP_TX and
> > XDP_REDIRECT to work then XDP still need to carry pointers (plus
> > len+offset) to the other buffers, which is 16 bytes per extra buffer.  
> 
> 
> I’ve seen various network processor HW designs, and they normally get 
> the first x bytes (128 - 512) which they can manipulate 
> (append/prepend/insert/modify/delete).

Good data point, thank you!  It confirms that XDP only getting access to
the first packet-buffer makes sense, for most use-cases.

We also have to remember that XDP it not meant to handle every
use-case.  XDP is a software fast-path, that can accelerate certain
use-case.  We have the existing network stack as a fall-back for
handling the corner-cases, that would otherwise slowdown our XDP
fast-path.


> There are designs where they can “page in” the additional fragments 
> but it’s expensive as it requires additional memory transfers. But
> the majority do not care (cannot change) the remaining fragments. Can
> also not think of a reason why you might want to remove something at
> the end of the frame (thinking about routing/forwarding needs here).

Use-cases that need to adjust tail of packet:

- ICMP replies directly from XDP[1] need to shorten packet tail, but
  this use-case doesn't use fragments.

- IPsec need to add/extend packet tail for IPset-trailer[2], again
  unlikely that this needs fragments(?). (This use-case convinced me
  that we need to add extend-tail support to bpf_xdp_adjust_tail)

- DNS or memcached replies directly from XDP, need to extend packet
  tail, to have room for reply. (It would be interesting to allow larger
  replies, but I'm not sure we should ever support that).


> If we do want XDP to access other fragments we could do this through
> a helper which swaps the packet context?

That might be a way forward.  If the XDP developer have to call a
helper, then they should realize and "buy into" an additional
overhead/cost.


[1] https://github.com/torvalds/linux/blob/master/samples/bpf/xdp_adjust_tail_kern.c
[2] http://vger.kernel.org/netconf2019_files/xfrm_xdp.pdf
-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC V1 net-next 1/1] net: ena: implement XDP drop support
  2019-06-23  7:06 ` [RFC V1 net-next 1/1] net: ena: implement XDP drop support sameehj
  2019-06-23 14:09   ` Jesper Dangaard Brouer
  2019-06-23 14:21   ` Jesper Dangaard Brouer
@ 2019-06-23 14:28   ` David Ahern
  2019-06-23 14:51   ` Maciej Fijalkowski
  3 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-23 14:28 UTC (permalink / raw)
  To: sameehj, davem, netdev
  Cc: dwmw, zorik, matua, saeedb, msw, aliguori, nafea, gtzalik,
	netanel, alisaidi, benh, akiyano

On 6/23/19 1:06 AM, sameehj@amazon.com wrote:
> +static int ena_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
> +{
> +	struct ena_adapter *adapter = netdev_priv(netdev);
> +	struct bpf_prog *old_bpf_prog;
> +	int i;
> +
> +	if (ena_xdp_allowed(adapter)) {
> +		old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
> +
> +		for (i = 0; i < adapter->num_queues; i++)
> +			xchg(&adapter->rx_ring[i].xdp_bpf_prog, prog);
> +
> +		if (old_bpf_prog)
> +			bpf_prog_put(old_bpf_prog);
> +
> +	} else {
> +		netif_err(adapter, drv, adapter->netdev, "Failed to set xdp program, the current MTU (%d) is larger than the maximal allowed MTU (%lu) while xdp is on",
> +			  netdev->mtu, ENA_XDP_MAX_MTU);
> +		return -EFAULT;

unfortunate that extack has not been plumbed to ndo_bpf handler so that
message is passed to the user.

And EFAULT seems like the wrong return code.


> +	}
> +
> +	return 0;
> +}
> +
> +/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
> + * program as well as to query the current xdp program id.
> + */
> +static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
> +{
> +	struct ena_adapter *adapter = netdev_priv(netdev);
> +
> +	switch (bpf->command) {
> +	case XDP_SETUP_PROG:
> +		return ena_xdp_set(netdev, bpf->prog);
> +	case XDP_QUERY_PROG:
> +		bpf->prog_id = adapter->xdp_bpf_prog ?
> +			adapter->xdp_bpf_prog->aux->id : 0;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +	return 0;
> +}
> +
>  static int ena_change_mtu(struct net_device *dev, int new_mtu)
>  {
>  	struct ena_adapter *adapter = netdev_priv(dev);
>  	int ret;
>  
> +	if (new_mtu > ENA_XDP_MAX_MTU && ena_xdp_present(adapter)) {
> +		netif_err(adapter, drv, dev,
> +			  "Requested MTU value is not valid while xdp is enabled new_mtu: %d max mtu: %lu min mtu: %d\n",
> +			  new_mtu, ENA_XDP_MAX_MTU, ENA_MIN_MTU);
> +		return -EINVAL;
> +	}

If you manage dev->max_mtu as an XDP program is installed / removed this
check is not needed. Instead it is handled by the core change_mtu logic
and has better user semanitcs: link list shows the new MTU and trying to
change it above the new max a message is sent to the user as opposed to
kernel log.

>  	ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
>  	if (!ret) {
>  		netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC V1 net-next 1/1] net: ena: implement XDP drop support
  2019-06-23  7:06 ` [RFC V1 net-next 1/1] net: ena: implement XDP drop support sameehj
                     ` (2 preceding siblings ...)
  2019-06-23 14:28   ` [RFC V1 net-next 1/1] net: ena: implement XDP drop support David Ahern
@ 2019-06-23 14:51   ` Maciej Fijalkowski
  3 siblings, 0 replies; 28+ messages in thread
From: Maciej Fijalkowski @ 2019-06-23 14:51 UTC (permalink / raw)
  To: sameehj
  Cc: davem, netdev, dwmw, zorik, matua, saeedb, msw, aliguori, nafea,
	gtzalik, netanel, alisaidi, benh, akiyano

On Sun, 23 Jun 2019 10:06:49 +0300
<sameehj@amazon.com> wrote:

> From: Sameeh Jubran <sameehj@amazon.com>
> 
> This commit implements the basic functionality of drop/pass logic in the
> ena driver.
> 
> Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
> ---
>  drivers/net/ethernet/amazon/ena/ena_netdev.c | 83 +++++++++++++++++++-
>  drivers/net/ethernet/amazon/ena/ena_netdev.h | 29 +++++++
>  2 files changed, 111 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> index 20ec8ff03..3d65c0771 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> @@ -33,10 +33,10 @@
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>  
>  #ifdef CONFIG_RFS_ACCEL
> +#include <linux/bpf_trace.h>
>  #include <linux/cpu_rmap.h>
>  #endif /* CONFIG_RFS_ACCEL */
>  #include <linux/ethtool.h>
> -#include <linux/if_vlan.h>
>  #include <linux/kernel.h>
>  #include <linux/module.h>
>  #include <linux/numa.h>
> @@ -105,11 +105,82 @@ static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
>  		adapter->rx_ring[i].mtu = mtu;
>  }
>  
> +static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
> +{
> +	struct bpf_prog *xdp_prog = rx_ring->xdp_bpf_prog;
> +	u32 verdict = XDP_PASS;
> +
> +	rcu_read_lock();
> +
> +	if (!xdp_prog)
> +		goto out;
> +
> +	verdict = bpf_prog_run_xdp(xdp_prog, xdp);
> +
> +	if (unlikely(verdict == XDP_ABORTED))
> +		trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
> +	else if (unlikely(verdict > XDP_REDIRECT))

Shouldn't this be verdict >= XDP_TX at this point? You're not providing XDP TX
resources in this patch and you're saying that only drop/pass actions are
supported.

> +		bpf_warn_invalid_xdp_action(verdict);
> +out:
> +	rcu_read_unlock();
> +	return verdict;
> +}
> +
> +static int ena_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
> +{
> +	struct ena_adapter *adapter = netdev_priv(netdev);
> +	struct bpf_prog *old_bpf_prog;
> +	int i;
> +
> +	if (ena_xdp_allowed(adapter)) {
> +		old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
> +
> +		for (i = 0; i < adapter->num_queues; i++)
> +			xchg(&adapter->rx_ring[i].xdp_bpf_prog, prog);
> +
> +		if (old_bpf_prog)
> +			bpf_prog_put(old_bpf_prog);
> +
> +	} else {
> +		netif_err(adapter, drv, adapter->netdev, "Failed to set xdp program, the current MTU (%d) is larger than the maximal allowed MTU (%lu) while xdp is on",
> +			  netdev->mtu, ENA_XDP_MAX_MTU);

Consider using netlink's extack mechanism. Downside of this approach is that you
can't use format specifiers, so you won't tell user about the max allowed mtu
size for XDP case, but OTOH the message is printed right in front of user's
face in console, no need to look into dmesg.

> +		return -EFAULT;
> +	}
> +
> +	return 0;
> +}
> +
> +/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
> + * program as well as to query the current xdp program id.
> + */
> +static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
> +{
> +	struct ena_adapter *adapter = netdev_priv(netdev);
> +
> +	switch (bpf->command) {
> +	case XDP_SETUP_PROG:
> +		return ena_xdp_set(netdev, bpf->prog);
> +	case XDP_QUERY_PROG:
> +		bpf->prog_id = adapter->xdp_bpf_prog ?
> +			adapter->xdp_bpf_prog->aux->id : 0;
> +		break;
> +	default:

Again, consider the following:
NL_SET_ERR_MSG_MOD(bpf->extack, "Unsupported XDP command");

> +		return -EINVAL;
> +	}
> +	return 0;
> +}
> +
>  static int ena_change_mtu(struct net_device *dev, int new_mtu)
>  {
>  	struct ena_adapter *adapter = netdev_priv(dev);
>  	int ret;
>  
> +	if (new_mtu > ENA_XDP_MAX_MTU && ena_xdp_present(adapter)) {
> +		netif_err(adapter, drv, dev,
> +			  "Requested MTU value is not valid while xdp is enabled new_mtu: %d max mtu: %lu min mtu: %d\n",
> +			  new_mtu, ENA_XDP_MAX_MTU, ENA_MIN_MTU);
> +		return -EINVAL;
> +	}
>  	ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
>  	if (!ret) {
>  		netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
> @@ -888,6 +959,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
>  	va = page_address(rx_info->page) + rx_info->page_offset;
>  	prefetch(va + NET_IP_ALIGN);
>  
> +	if (ena_xdp_present_ring(rx_ring)) {
> +		rx_ring->xdp.data = va;
> +		rx_ring->xdp.data_meta = rx_ring->xdp.data;
> +		rx_ring->xdp.data_hard_start = rx_ring->xdp.data -
> +			rx_info->page_offset;
> +		rx_ring->xdp.data_end = rx_ring->xdp.data + len;
> +		if (ena_xdp_execute(rx_ring, &rx_ring->xdp) != XDP_PASS)
> +			return NULL;
> +	}

Hmm running XDP program within the function that is supposed to handle skb
seems a bit not intuitive. Couldn't you have this logic before the ena_rx_skb()
call in ena_clean_rx_irq?

Another thing, from performance perspective IMO it would be better to operate on
local struct xdp_buff, rather than accessing it from rx_ring.

>  	if (len <= rx_ring->rx_copybreak) {
>  		skb = ena_alloc_skb(rx_ring, false);
>  		if (unlikely(!skb))
> @@ -2549,6 +2629,7 @@ static const struct net_device_ops ena_netdev_ops = {
>  	.ndo_change_mtu		= ena_change_mtu,
>  	.ndo_set_mac_address	= NULL,
>  	.ndo_validate_addr	= eth_validate_addr,
> +	.ndo_bpf		= ena_xdp,
>  };
>  
>  static int ena_device_validate_params(struct ena_adapter *adapter,
> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
> index f2b6e2e05..e17965f7a 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
> +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
> @@ -35,6 +35,7 @@
>  
>  #include <linux/bitops.h>
>  #include <linux/etherdevice.h>
> +#include <linux/if_vlan.h>
>  #include <linux/inetdevice.h>
>  #include <linux/interrupt.h>
>  #include <linux/netdevice.h>
> @@ -139,6 +140,14 @@
>  
>  #define ENA_MMIO_DISABLE_REG_READ	BIT(0)
>  
> +/* The max MTU size is configured to be the ethernet frame without the overhead
> + * of the ethernet header, which can have VLAN header, and the frame check
> + * sequence (FCS).
> + * The buffer sizes we share with the device are defined to be ENA_PAGE_SIZE
> + */
> +#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \
> +				VLAN_HLEN)
> +
>  struct ena_irq {
>  	irq_handler_t handler;
>  	void *data;
> @@ -288,6 +297,8 @@ struct ena_ring {
>  
>  	u8 *push_buf_intermediate_buf;
>  	int empty_rx_queue;
> +	struct bpf_prog *xdp_bpf_prog;
> +	struct xdp_buff xdp;
>  } ____cacheline_aligned;
>  
>  struct ena_stats_dev {
> @@ -380,6 +391,9 @@ struct ena_adapter {
>  	enum ena_regs_reset_reason_types reset_reason;
>  
>  	u8 ena_extra_properties_count;
> +
> +	/* XDP structures */
> +	struct bpf_prog *xdp_bpf_prog;
>  };
>  
>  void ena_set_ethtool_ops(struct net_device *netdev);
> @@ -394,4 +408,19 @@ int ena_update_queue_sizes(struct ena_adapter *adapter,
>  
>  int ena_get_sset_count(struct net_device *netdev, int sset);
>  
> +static inline bool ena_xdp_present(struct ena_adapter *adapter)
> +{
> +	return !!adapter->xdp_bpf_prog;
> +}
> +
> +static inline bool ena_xdp_present_ring(struct ena_ring *ring)
> +{
> +	return !!ring->xdp_bpf_prog;
> +}
> +
> +static inline bool ena_xdp_allowed(struct ena_adapter *adapter)
> +{
> +	return adapter->netdev->mtu <= ENA_XDP_MAX_MTU;
> +}
> +
>  #endif /* !(ENA_H) */


^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2019-06-28 11:50 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-06-23  7:06 [RFC V1 net-next 0/1] Introduce xdp to ena sameehj
2019-06-23  7:06 ` [RFC V1 net-next 1/1] net: ena: implement XDP drop support sameehj
2019-06-23 14:09   ` Jesper Dangaard Brouer
2019-06-23 14:21   ` Jesper Dangaard Brouer
2019-06-25  3:19     ` Machulsky, Zorik
2019-06-26  8:38       ` XDP multi-buffer incl. jumbo-frames (Was: [RFC V1 net-next 1/1] net: ena: implement XDP drop support) Jesper Dangaard Brouer
2019-06-26 11:52         ` Toke Høiland-Jørgensen
2019-06-26 11:52           ` Toke Høiland-Jørgensen
2019-06-26 14:40           ` Jesper Dangaard Brouer
2019-06-26 14:40             ` Jesper Dangaard Brouer
2019-06-26 15:01             ` Toke Høiland-Jørgensen
2019-06-26 15:01               ` Toke Høiland-Jørgensen
2019-06-26 15:20               ` Willem de Bruijn
2019-06-26 16:42                 ` Jonathan Lemon
2019-06-26 20:00                   ` Jesper Dangaard Brouer
2019-06-27 22:07                     ` Jonathan Lemon
2019-06-28  8:46                 ` Jesper Dangaard Brouer
2019-06-26 15:14             ` Toke Høiland-Jørgensen
2019-06-26 15:14               ` Toke Høiland-Jørgensen
2019-06-26 16:36               ` Jesper Dangaard Brouer
2019-06-26 16:36                 ` Jesper Dangaard Brouer
2019-06-28  7:14         ` Eelco Chaudron
2019-06-28  7:46           ` Toke Høiland-Jørgensen
2019-06-28  7:46             ` Toke Høiland-Jørgensen
2019-06-28 11:49             ` Eelco Chaudron
2019-06-28  8:22           ` Jesper Dangaard Brouer
2019-06-23 14:28   ` [RFC V1 net-next 1/1] net: ena: implement XDP drop support David Ahern
2019-06-23 14:51   ` Maciej Fijalkowski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.