Netdev List
 help / color / mirror / Atom feed
* [PATCH v3 9/9] net: New ax88796 platform driver for Amiga X-Surf 100 Zorro board (m68k)
From: Michael Schmitz @ 2018-04-18  4:26 UTC (permalink / raw)
  To: netdev
  Cc: andrew, fthain, geert, f.fainelli, linux-m68k, Michael.Karcher,
	Michael Karcher, Michael Schmitz
In-Reply-To: <1523930895-6973-1-git-send-email-schmitzmic@gmail.com>

From: Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>

Add platform device driver to populate the ax88796 platform data from
information provided by the XSurf100 zorro device driver. The ax88796
module will be loaded through this module's probe function.

Signed-off-by: Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>
Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>

---

Changes in v3:
Suggested by Geert Uytterhoeven:
- use ei_local->reset_8390() instead of duplicating ax_reset_8390()
- use %pR to format struct resource pointers
- assign pdev and xs100 pointers in declaration
- don't split error messages
- change Kconfig logic to only require XSURF100 set on Amiga

Suggested by Andrew Lunn:
- add COMPILE_TEST to ax88796 Kconfig options
- use new Asix PHY driver for X-Surf 100
---
 drivers/net/ethernet/8390/Kconfig    |   17 ++-
 drivers/net/ethernet/8390/Makefile   |    1 +
 drivers/net/ethernet/8390/xsurf100.c |  381 ++++++++++++++++++++++++++++++++++
 3 files changed, 397 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/8390/xsurf100.c

diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig
index fdc6734..607dc00 100644
--- a/drivers/net/ethernet/8390/Kconfig
+++ b/drivers/net/ethernet/8390/Kconfig
@@ -29,8 +29,8 @@ config PCMCIA_AXNET
 	  called axnet_cs.  If unsure, say N.
 
 config AX88796
-	tristate "ASIX AX88796 NE2000 clone support"
-	depends on (ARM || MIPS || SUPERH)
+	tristate "ASIX AX88796 NE2000 clone support" if !ZORRO
+	depends on (ARM || MIPS || SUPERH || ZORRO || COMPILE_TEST)
 	select CRC32
 	select PHYLIB
 	select MDIO_BITBANG
@@ -45,6 +45,19 @@ config AX88796_93CX6
 	---help---
 	  Select this if your platform comes with an external 93CX6 eeprom.
 
+config XSURF100
+	tristate "Amiga XSurf 100 AX88796/NE2000 clone support"
+	depends on ZORRO
+	select AX88796
+	select ASIX_PHY
+	---help---
+	  This driver is for the Individual Computers X-Surf 100 Ethernet
+	  card (based on the Asix AX88796 chip). If you have such a card,
+	  say Y. Otherwise, say N.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called xsurf100.
+
 config HYDRA
 	tristate "Hydra support"
 	depends on ZORRO
diff --git a/drivers/net/ethernet/8390/Makefile b/drivers/net/ethernet/8390/Makefile
index f975c2f..3715f8d 100644
--- a/drivers/net/ethernet/8390/Makefile
+++ b/drivers/net/ethernet/8390/Makefile
@@ -16,4 +16,5 @@ obj-$(CONFIG_PCMCIA_PCNET) += pcnet_cs.o 8390.o
 obj-$(CONFIG_STNIC) += stnic.o 8390.o
 obj-$(CONFIG_ULTRA) += smc-ultra.o 8390.o
 obj-$(CONFIG_WD80x3) += wd.o 8390.o
+obj-$(CONFIG_XSURF100) += xsurf100.o
 obj-$(CONFIG_ZORRO8390) += zorro8390.o 8390.o
diff --git a/drivers/net/ethernet/8390/xsurf100.c b/drivers/net/ethernet/8390/xsurf100.c
new file mode 100644
index 0000000..7ab5ca0
--- /dev/null
+++ b/drivers/net/ethernet/8390/xsurf100.c
@@ -0,0 +1,381 @@
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/zorro.h>
+#include <net/ax88796.h>
+#include <asm/amigaints.h>
+
+#define ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF100 \
+		ZORRO_ID(INDIVIDUAL_COMPUTERS, 0x64, 0)
+
+#define XS100_IRQSTATUS_BASE 0x40
+#define XS100_8390_BASE 0x800
+
+/* Longword-access area. Translated to 2 16-bit access cycles by the
+ * X-Surf 100 FPGA
+ */
+#define XS100_8390_DATA32_BASE 0x8000
+#define XS100_8390_DATA32_SIZE 0x2000
+/* Sub-Areas for fast data register access; addresses relative to area begin */
+#define XS100_8390_DATA_READ32_BASE 0x0880
+#define XS100_8390_DATA_WRITE32_BASE 0x0C80
+#define XS100_8390_DATA_AREA_SIZE 0x80
+
+#define __NS8390_init ax_NS8390_init
+
+/* force unsigned long back to 'void __iomem *' */
+#define ax_convert_addr(_a) ((void __force __iomem *)(_a))
+
+#define ei_inb(_a) z_readb(ax_convert_addr(_a))
+#define ei_outb(_v, _a) z_writeb(_v, ax_convert_addr(_a))
+
+#define ei_inw(_a) z_readw(ax_convert_addr(_a))
+#define ei_outw(_v, _a) z_writew(_v, ax_convert_addr(_a))
+
+#define ei_inb_p(_a) ei_inb(_a)
+#define ei_outb_p(_v, _a) ei_outb(_v, _a)
+
+/* define EI_SHIFT() to take into account our register offsets */
+#define EI_SHIFT(x) (ei_local->reg_offset[(x)])
+
+/* Ensure we have our RCR base value */
+#define AX88796_PLATFORM
+
+static unsigned char version[] =
+		"ax88796.c: Copyright 2005,2007 Simtec Electronics\n";
+
+#include "lib8390.c"
+
+/* from ne.c */
+#define NE_CMD		EI_SHIFT(0x00)
+#define NE_RESET	EI_SHIFT(0x1f)
+#define NE_DATAPORT	EI_SHIFT(0x10)
+
+struct xsurf100_ax_plat_data {
+	struct ax_plat_data ax;
+	void __iomem *base_regs;
+	void __iomem *data_area;
+};
+
+static int is_xsurf100_network_irq(struct platform_device *pdev)
+{
+	struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev);
+
+	return (readw(xs100->base_regs + XS100_IRQSTATUS_BASE) & 0xaaaa) != 0;
+}
+
+/* These functions guarantee that the iomem is accessed with 32 bit
+ * cycles only. z_memcpy_fromio / z_memcpy_toio don't
+ */
+static void z_memcpy_fromio32(void *dst, const void __iomem *src, size_t bytes)
+{
+	while (bytes > 32) {
+		asm __volatile__
+		   ("movem.l (%0)+,%%d0-%%d7\n"
+		    "movem.l %%d0-%%d7,(%1)\n"
+		    "adda.l #32,%1" : "=a"(src), "=a"(dst)
+		    : "0"(src), "1"(dst) : "d0", "d1", "d2", "d3", "d4",
+					   "d5", "d6", "d7", "memory");
+		bytes -= 32;
+	}
+	while (bytes) {
+		*(uint32_t *)dst = z_readl(src);
+		src += 4;
+		dst += 4;
+		bytes -= 4;
+	}
+}
+
+static void z_memcpy_toio32(void __iomem *dst, const void *src, size_t bytes)
+{
+	while (bytes) {
+		z_writel(*(const uint32_t *)src, dst);
+		src += 4;
+		dst += 4;
+		bytes -= 4;
+	}
+}
+
+static void xs100_write(struct net_device *dev, const void *src,
+			unsigned int count)
+{
+	struct ei_device *ei_local = netdev_priv(dev);
+	struct platform_device *pdev = to_platform_device(dev->dev.parent);
+	struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev);
+
+	/* copy whole blocks */
+	while (count > XS100_8390_DATA_AREA_SIZE) {
+		z_memcpy_toio32(xs100->data_area +
+				XS100_8390_DATA_WRITE32_BASE, src,
+				XS100_8390_DATA_AREA_SIZE);
+		src += XS100_8390_DATA_AREA_SIZE;
+		count -= XS100_8390_DATA_AREA_SIZE;
+	}
+	/* copy whole dwords */
+	z_memcpy_toio32(xs100->data_area + XS100_8390_DATA_WRITE32_BASE,
+			src, count & ~3);
+	src += count & ~3;
+	if (count & 2) {
+		ei_outw(*(uint16_t *)src, ei_local->mem + NE_DATAPORT);
+		src += 2;
+	}
+	if (count & 1)
+		ei_outb(*(uint8_t *)src, ei_local->mem + NE_DATAPORT);
+}
+
+static void xs100_read(struct net_device *dev, void *dst, unsigned int count)
+{
+	struct ei_device *ei_local = netdev_priv(dev);
+	struct platform_device *pdev = to_platform_device(dev->dev.parent);
+	struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev);
+
+	/* copy whole blocks */
+	while (count > XS100_8390_DATA_AREA_SIZE) {
+		z_memcpy_fromio32(dst, xs100->data_area +
+				  XS100_8390_DATA_READ32_BASE,
+				  XS100_8390_DATA_AREA_SIZE);
+		dst += XS100_8390_DATA_AREA_SIZE;
+		count -= XS100_8390_DATA_AREA_SIZE;
+	}
+	/* copy whole dwords */
+	z_memcpy_fromio32(dst, xs100->data_area + XS100_8390_DATA_READ32_BASE,
+			  count & ~3);
+	dst += count & ~3;
+	if (count & 2) {
+		*(uint16_t *)dst = ei_inw(ei_local->mem + NE_DATAPORT);
+		dst += 2;
+	}
+	if (count & 1)
+		*(uint8_t *)dst = ei_inb(ei_local->mem + NE_DATAPORT);
+}
+
+/* Block input and output, similar to the Crynwr packet driver. If
+ * you are porting to a new ethercard, look at the packet driver
+ * source for hints. The NEx000 doesn't share the on-board packet
+ * memory -- you have to put the packet out through the "remote DMA"
+ * dataport using ei_outb.
+ */
+static void xs100_block_input(struct net_device *dev, int count,
+			      struct sk_buff *skb, int ring_offset)
+{
+	struct ei_device *ei_local = netdev_priv(dev);
+	void __iomem *nic_base = ei_local->mem;
+	char *buf = skb->data;
+
+	if (ei_local->dmaing) {
+		netdev_err(dev,
+			   "DMAing conflict in %s [DMAstat:%d][irqlock:%d]\n",
+			   __func__,
+			   ei_local->dmaing, ei_local->irqlock);
+		return;
+	}
+
+	ei_local->dmaing |= 0x01;
+
+	ei_outb(E8390_NODMA + E8390_PAGE0 + E8390_START, nic_base + NE_CMD);
+	ei_outb(count & 0xff, nic_base + EN0_RCNTLO);
+	ei_outb(count >> 8, nic_base + EN0_RCNTHI);
+	ei_outb(ring_offset & 0xff, nic_base + EN0_RSARLO);
+	ei_outb(ring_offset >> 8, nic_base + EN0_RSARHI);
+	ei_outb(E8390_RREAD + E8390_START, nic_base + NE_CMD);
+
+	xs100_read(dev, buf, count);
+
+	ei_local->dmaing &= ~1;
+}
+
+static void xs100_block_output(struct net_device *dev, int count,
+			       const unsigned char *buf, const int start_page)
+{
+	struct ei_device *ei_local = netdev_priv(dev);
+	void __iomem *nic_base = ei_local->mem;
+	unsigned long dma_start;
+
+	/* Round the count up for word writes. Do we need to do this?
+	 * What effect will an odd byte count have on the 8390?  I
+	 * should check someday.
+	 */
+	if (ei_local->word16 && (count & 0x01))
+		count++;
+
+	/* This *shouldn't* happen. If it does, it's the last thing
+	 * you'll see
+	 */
+	if (ei_local->dmaing) {
+		netdev_err(dev, 
+			   "DMAing conflict in %s [DMAstat:%d][irqlock:%d]\n",
+			   __func__,
+			   ei_local->dmaing, ei_local->irqlock);
+		return;
+	}
+
+	ei_local->dmaing |= 0x01;
+	/* We should already be in page 0, but to be safe... */
+	ei_outb(E8390_PAGE0 + E8390_START + E8390_NODMA, nic_base + NE_CMD);
+
+	ei_outb(ENISR_RDC, nic_base + EN0_ISR);
+
+	/* Now the normal output. */
+	ei_outb(count & 0xff, nic_base + EN0_RCNTLO);
+	ei_outb(count >> 8, nic_base + EN0_RCNTHI);
+	ei_outb(0x00, nic_base + EN0_RSARLO);
+	ei_outb(start_page, nic_base + EN0_RSARHI);
+
+	ei_outb(E8390_RWRITE + E8390_START, nic_base + NE_CMD);
+
+	xs100_write(dev, buf, count);
+
+	dma_start = jiffies;
+
+	while ((ei_inb(nic_base + EN0_ISR) & ENISR_RDC) == 0) {
+		if (jiffies - dma_start > 2 * HZ / 100) {	/* 20ms */
+			netdev_warn(dev, "timeout waiting for Tx RDC.\n");
+			ei_local->reset_8390(dev);
+			ax_NS8390_init(dev, 1);
+			break;
+		}
+	}
+
+	ei_outb(ENISR_RDC, nic_base + EN0_ISR);	/* Ack intr. */
+	ei_local->dmaing &= ~0x01;
+}
+
+static int xsurf100_probe(struct zorro_dev *zdev,
+			  const struct zorro_device_id *ent)
+{
+	struct platform_device *pdev;
+	struct xsurf100_ax_plat_data ax88796_data;
+	struct resource res[2] = {
+		DEFINE_RES_NAMED(IRQ_AMIGA_PORTS, 1, NULL,
+				 IORESOURCE_IRQ | IORESOURCE_IRQ_SHAREABLE),
+		DEFINE_RES_MEM(zdev->resource.start + XS100_8390_BASE,
+			       4 * 0x20)
+	};
+	int reg;
+	/* This table is referenced in the device structure, so it must
+	 * outlive the scope of xsurf100_probe.
+	 */
+	static u32 reg_offsets[32];
+	int ret = 0;
+
+	/* X-Surf 100 control and 32 bit ring buffer data access areas.
+	 * These resources are not used by the ax88796 driver, so must
+	 * be requested here and passed via platform data.
+	 */
+
+	if (!request_mem_region(zdev->resource.start, 0x100, zdev->name)) {
+		dev_err(&zdev->dev, "cannot reserve X-Surf 100 control registers\n");
+		return -ENXIO;
+	}
+
+	if (!request_mem_region(zdev->resource.start +
+				XS100_8390_DATA32_BASE,
+				XS100_8390_DATA32_SIZE,
+				"X-Surf 100 32-bit data access")) {
+		dev_err(&zdev->dev, "cannot reserve 32-bit area\n");
+		ret = -ENXIO;
+		goto exit_req;
+	}
+
+	for (reg = 0; reg < 0x20; reg++)
+		reg_offsets[reg] = 4 * reg;
+
+	memset(&ax88796_data, 0, sizeof(ax88796_data));
+	ax88796_data.ax.flags = AXFLG_HAS_EEPROM;
+	ax88796_data.ax.wordlength = 2;
+	ax88796_data.ax.dcr_val = 0x48;
+	ax88796_data.ax.rcr_val = 0x40;
+	ax88796_data.ax.reg_offsets = reg_offsets;
+	ax88796_data.ax.check_irq = is_xsurf100_network_irq;
+	ax88796_data.base_regs = ioremap(zdev->resource.start, 0x100);
+
+	/* error handling for ioremap regs */
+	if (!ax88796_data.base_regs) {
+		dev_err(&zdev->dev, "Cannot ioremap area %pR (registers)\n",
+			&zdev->resource);
+
+		ret = -ENXIO;
+		goto exit_req2;
+	}
+
+	ax88796_data.data_area = ioremap(zdev->resource.start +
+			XS100_8390_DATA32_BASE, XS100_8390_DATA32_SIZE);
+
+	/* error handling for ioremap data */
+	if (!ax88796_data.data_area) {
+		dev_err(&zdev->dev, 
+			"Cannot ioremap area %pR offset %x (32-bit access)\n",
+			&zdev->resource,  XS100_8390_DATA32_BASE);
+
+		ret = -ENXIO;
+		goto exit_mem;
+	}
+
+	ax88796_data.ax.block_output = xs100_block_output;
+	ax88796_data.ax.block_input = xs100_block_input;
+
+	pdev = platform_device_register_resndata(&zdev->dev, "ax88796",
+						 zdev->slotaddr, res, 2,
+						 &ax88796_data,
+						 sizeof(ax88796_data));
+
+	if (IS_ERR(pdev)) {
+		dev_err(&zdev->dev, "cannot register platform device\n");
+		ret = -ENXIO;
+		goto exit_mem2;
+	}
+
+	zorro_set_drvdata(zdev, pdev);
+
+	if (!ret)
+		return 0;
+
+ exit_mem2:
+	iounmap(ax88796_data.data_area);
+
+ exit_mem:
+	iounmap(ax88796_data.base_regs);
+
+ exit_req2:
+	release_mem_region(zdev->resource.start + XS100_8390_DATA32_BASE,
+			   XS100_8390_DATA32_SIZE);
+
+ exit_req:
+	release_mem_region(zdev->resource.start, 0x100);
+
+	return ret;
+}
+
+static void xsurf100_remove(struct zorro_dev *zdev)
+{
+	struct platform_device *pdev = zorro_get_drvdata(zdev);
+	struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev);
+
+	platform_device_unregister(pdev);
+
+	iounmap(xs100->base_regs);
+	release_mem_region(zdev->resource.start, 0x100);
+	iounmap(xs100->data_area);
+	release_mem_region(zdev->resource.start + XS100_8390_DATA32_BASE,
+			   XS100_8390_DATA32_SIZE);
+}
+
+static const struct zorro_device_id xsurf100_zorro_tbl[] = {
+	{ ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF100, },
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(zorro, xsurf100_zorro_tbl);
+
+static struct zorro_driver xsurf100_driver = {
+	.name           = "xsurf100",
+	.id_table       = xsurf100_zorro_tbl,
+	.probe          = xsurf100_probe,
+	.remove         = xsurf100_remove,
+};
+
+module_driver(xsurf100_driver, zorro_register_driver, zorro_unregister_driver);
+
+MODULE_DESCRIPTION("X-Surf 100 driver");
+MODULE_AUTHOR("Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>");
+MODULE_LICENSE("GPL v2");
-- 
1.7.0.4

^ permalink raw reply related

* Re: [PATCH 10/10] net: New ax88796 platform driver for Amiga X-Surf 100 Zorro board (m68k)
From: Michael Schmitz @ 2018-04-18  4:32 UTC (permalink / raw)
  To: Geert Uytterhoeven; +Cc: netdev, Linux/m68k, Michael Karcher, Michael Karcher
In-Reply-To: <CAMuHMdUnonyL93AmF3TdPcUPj5ZEuTb59ZgArH5BjLjcx8LcvA@mail.gmail.com>

Hi Geert,

On Wed, Apr 18, 2018 at 1:53 AM, Geert Uytterhoeven
<geert@linux-m68k.org> wrote:
>> --- /dev/null
>> +++ b/drivers/net/ethernet/8390/xsurf100.c
>> @@ -0,0 +1,411 @@
>> +#include <linux/module.h>
>> +#include <linux/netdevice.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/zorro.h>
>> +#include <net/ax88796.h>
>> +#include <asm/amigaints.h>
>> +
>> +#define ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF100 \
>> +               ZORRO_ID(INDIVIDUAL_COMPUTERS, 0x64, 0)
>
> Another long define to get rid of? ;-)

I decided to leave it that way - it doesn't stick out quite as badly
as the one in the ESP driver. Give me a yell if you insist.

Cheers,

  Michael

^ permalink raw reply

* Re: [PATCH net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
From: Yafang Shao @ 2018-04-18  4:42 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: Eric Dumazet, David Miller, Song Liu, netdev, LKML
In-Reply-To: <20180417234455.q6fgn7oroehmxk6l@ast-mbp>

On Wed, Apr 18, 2018 at 7:44 AM, Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
> On Mon, Apr 16, 2018 at 08:43:31AM -0700, Eric Dumazet wrote:
>>
>>
>> On 04/16/2018 08:33 AM, Yafang Shao wrote:
>> > tcp_rcv_space_adjust is called every time data is copied to user space,
>> > introducing a tcp tracepoint for which could show us when the packet is
>> > copied to user.
>> > This could help us figure out whether there's latency in user process.
>> >
>> > When a tcp packet arrives, tcp_rcv_established() will be called and with
>> > the existed tracepoint tcp_probe we could get the time when this packet
>> > arrives.
>> > Then this packet will be copied to user, and tcp_rcv_space_adjust will
>> > be called and with this new introduced tracepoint we could get the time
>> > when this packet is copied to user.
>> >
>> >     arrives time : user process time    => latency caused by user
>> >     tcp_probe      tcp_rcv_space_adjust
>> >
>> > Hence in the prink message, sk is printed as a key to connect these two
>> > tracepoints.
>> >
>>
>> socket pointer is not a key.
>>
>> TCP sockets can be reused pretty fast after free.
>>
>> I suggest you go for cookie instead, this is an unique 64bit identifier.
>> ( sock_gen_cookie() for details )
>
> I think would be even better if the stack would do this sock_gen_cookie()
> on its own in some way that user cannnot infere the order.
> In many cases we wanted to use socket cookie, but since it's not inited
> by default it's kinda useless.
> Turning this tracepoint on just to get cookie would be an ugly workaround.
>

Could we init it in sk_alloc() ?
Then in other code paths, for example sock_getsockopt or tracepoints,
we only read the value through a new inline function named
sock_read_cookie().


Thanks
Yafang

^ permalink raw reply

* Re: [PATCH ipsec-next] selftests: add xfrm state-policy-monitor to rtnetlink.sh
From: Steffen Klassert @ 2018-04-18  4:43 UTC (permalink / raw)
  To: Shannon Nelson; +Cc: netdev
In-Reply-To: <1523573999-17411-1-git-send-email-shannon.nelson@oracle.com>

On Thu, Apr 12, 2018 at 03:59:59PM -0700, Shannon Nelson wrote:
> Add a simple set of tests for the IPsec xfrm commands.
> 
> Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>

Applied to ipsec-next, thanks Shannon!

^ permalink raw reply

* [PATCH iproute2 net-next] vxlan: fix ttl inherit behavior
From: Hangbin Liu @ 2018-04-18  5:05 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger, Jiri Benc, Hangbin Liu

Like kernel net-next commit 72f6d71e491e6 ("vxlan: add ttl inherit support"),
vxlan ttl inherit should means inherit the inner protocol's ttl value.

But currently when we add vxlan with "ttl inherit", we only set ttl 0,
which is actually use whatever default value instead of inherit the inner
protocol's ttl value.

To make a difference with ttl inherit and ttl == 0, we add an attribute
IFLA_VXLAN_TTL_INHERIT when "ttl inherit" specified. And use "ttl auto"
to means "use whatever default value", the same behavior with ttl == 0.

Reported-by: Jianlin Shi <jishi@redhat.com>
Suggested-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 include/uapi/linux/if_link.h | 1 +
 ip/iplink_vxlan.c            | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index dab5246..387f873 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -511,6 +511,7 @@ enum {
 	IFLA_VXLAN_COLLECT_METADATA,
 	IFLA_VXLAN_LABEL,
 	IFLA_VXLAN_GPE,
+	IFLA_VXLAN_TTL_INHERIT,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
diff --git a/ip/iplink_vxlan.c b/ip/iplink_vxlan.c
index 661eaa7..5804db3 100644
--- a/ip/iplink_vxlan.c
+++ b/ip/iplink_vxlan.c
@@ -165,14 +165,18 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv,
 
 			NEXT_ARG();
 			check_duparg(&attrs, IFLA_VXLAN_TTL, "ttl", *argv);
-			if (strcmp(*argv, "inherit") != 0) {
+			if (strcmp(*argv, "inherit") == 0) {
+				addattr_l(n, 1024, IFLA_VXLAN_TTL_INHERIT, NULL, 0);
+			} else if (strcmp(*argv, "auto") == 0) {
+				addattr8(n, 1024, IFLA_VXLAN_TTL, ttl);
+			} else {
 				if (get_unsigned(&uval, *argv, 0))
 					invarg("invalid TTL", *argv);
 				if (uval > 255)
 					invarg("TTL must be <= 255", *argv);
 				ttl = uval;
+				addattr8(n, 1024, IFLA_VXLAN_TTL, ttl);
 			}
-			addattr8(n, 1024, IFLA_VXLAN_TTL, ttl);
 		} else if (!matches(*argv, "tos") ||
 			   !matches(*argv, "dsfield")) {
 			__u32 uval;
-- 
2.5.5

^ permalink raw reply related

* Re: [Regression] net/phy/micrel.c v4.9.94
From: Chris Ruehl @ 2018-04-18  5:09 UTC (permalink / raw)
  To: f.fainelli, netdev
In-Reply-To: <3bd29bdd-b5ab-03d5-ea53-292f9150ee4c@gtsys.com.hk>

On Wednesday, April 18, 2018 09:34 AM, Chris Ruehl wrote:
> Hello,
> 
> I like to get your heads up at a regression introduced in 4.9.94
> commitment lead to a kernel ops and make the network unusable on my MX6DL 
> customized board.
> 
> Race condition resume is called on startup and the phy not yet initialized.
> 
> [    7.313366] Unable to handle kernel NULL pointer dereference at virtual 
> address 00000008
> [    7.321602] pgd = ecfc0000
> 
> [    7.324950] [00000008] *pgd=8e901831
> 
> [    7.328652] Internal error: Oops: 17 [#1] PREEMPT SMP ARM
> 
> [    7.334061] Modules linked in:
> 
> [    7.337146] CPU: 0 PID: 269 Comm: ip Not tainted 4.9.94 #11
> 
> [    7.342725] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> 
> [    7.349259] task: ece59900 task.stack: ec9ea000
> 
> [    7.353809] PC is at kszphy_config_reset+0x14/0x148
> 
> [    7.358703] LR is at kszphy_resume+0x1c/0x6c
> 
> [    7.362983] pc : [<c056a24c>]    lr : [<c056a4fc>]    psr: 60030013
> 
> [    7.362983] sp : ec9eb918  ip : ec9eb938  fp : ec9eb934
> 
> [    7.374467] r10: 00000007  r9 : 00000000  r8 : ee693c00
> 
> [    7.379700] r7 : 00000000  r6 : 00000000  r5 : 00000000  r4 : ee6fc000
> 
> [    7.386234] r3 : 00000001  r2 : 00000000  r1 : 00000110  r0 : ee6fc000
> 
> [    7.392768] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> 
> [    7.399911] Control: 10c5387d  Table: 3cfc004a  DAC: 00000051
> 
> [    7.405663] Process ip (pid: 269, stack limit = 0xec9ea210)
> 
> [    7.411244] Stack: (0xec9eb918 to 0xec9ec000)
> 
> [    7.415611] b900: ee6fc000 00000000
> [    7.423800] b920: ee031000 00000000 ec9eb94c ec9eb938 c056a4fc c056a244 
> ee6fc000 00000000
> [    7.431988] b940: ec9eb97c ec9eb950 c05681e4 c056a4ec 00000007 ee6fc000 
> ee6fc000 c056ce7c
> [    7.440174] b960: c056ce7c ee031000 ee55c818 00000000 ec9eb99c ec9eb980 
> c05683cc c0568134
> [    7.448364] b980: 00000007 ec9eba10 ee6fc000 00000007 ec9eb9c4 ec9eb9a0 
> c0568450 c05683bc
> [    7.456550] b9a0: 00000007 00000005 ee031000 ec9eb9d3 00000200 c1508da4 
> ec9eba6c ec9eb9c8
> [    7.464736] b9c0: c056ce24 c0568410 00000005 ee03162c 32000001 30383831 
> 652e3030 72656874
> [    7.472921] b9e0: 2d74656e 00000031 000003e8 000000c8 c01732ec c0172adc 
> 000003e8 000000c8
> [    7.481109] ba00: 024000c0 ee55c000 c150e454 024000c0 38383132 2e303030 
> 65687465 74656e72
> [    7.489296] ba20: 303a312d ee000035 ec9eba6c ec9eba38 c0224b50 c0175eb8 
> ec9eba6c c056eb44
> [    7.497482] ba40: c056bbe0 f0c16000 ee031000 ee55c000 00000200 f0c16000 
> ee031000 ee55c000
> [    7.505667] ba60: ec9ebaa4 ec9eba70 c056eba4 c056cd1c 00000001 ee03162c 
> ec9ebaa4 ee031000
> [    7.513855] ba80: 00000000 c09566ec ee031030 00000000 ec9ccd10 ecb39900 
> ec9ebacc ec9ebaa8
> [    7.522043] baa0: c06ad6e0 c056e92c ec9ebacc ee031000 ee031000 00000001 
> 00001003 00001002
> [    7.530229] bac0: ec9ebaf4 ec9ebad0 c06ad99c c06ad63c 00001002 ee031000 
> ee031148 00001002
> [    7.538414] bae0: 00000000 00000000 ec9ebb1c ec9ebaf8 c06ada6c c06ad90c 
> 00001002 00000000
> [    7.546601] bb00: ee031000 ec9ebc28 00000000 c09566ec ec9ebb94 ec9ebb20 
> c06c1034 c06ada58
> [    7.554787] bb20: c0c50df8 2e184000 ec9ebb44 ec9ebb38 c0173528 c0173320 
> ec9ebbd4 c0e82b6c
> [    7.562972] bb40: 00000000 ece59dc8 ebb4e9d0 c9eae3f3 ece59900 00000003 
> ece59900 0000005e
> [    7.571157] bb60: c14e30ec c0d1e51c ece59900 00000000 ee031000 ec9ccd00 
> 00000000 00000000
> [    7.579346] bb80: ec9ebb98 00000000 ec9ebd04 ec9ebb98 c06c30cc c06c0d68 
> ec9ebbc4 00000000
> [    7.587531] bba0: c01758bc ecb39900 c09eb3a0 ec9ccd20 00000000 ec9ccd10 
> 00000001 ece59900
> [    7.595715] bbc0: c01e0e64 00000000 00000000 00000001 ec9ebbfc 00000000 
> 00000000 00000000
> [    7.603900] bbe0: 00000000 00000000 00000000 ffffff00 ec9ebc0c ec9ebc00 
> c0173528 c0173320
> [    7.612084] bc00: ec9ebc9c ec9ebc10 c01e0e64 c0173520 00000000 0000000e 
> ece59900 00000096
> [    7.620269] bc20: c14e30ec c0d1e51c 00000000 00000000 00000000 00000000 
> 00000000 00000000
> [    7.628452] bc40: 00000000 00000000 00000000 00000000 00000000 00000000 
> 00000000 00000000
> [    7.636636] bc60: 00000000 00000000 00000000 00000000 00000000 00000000 
> 00000000 00000000
> [    7.644819] bc80: 00000000 00000000 00000000 00000000 00000000 00000000 
> 00000000 00000000
> [    7.653003] bca0: 00000000 00000000 00000000 00000000 00000000 00000000 
> 00000000 00000000
> [    7.661186] bcc0: 00000000 00000000 00000000 00000000 00000000 00000000 
> c06d3870 00000000
> [    7.669372] bce0: ec9ccd00 ecb39900 c15226e4 00000000 00000000 ecb39900 
> ec9ebd44 ec9ebd08
> [    7.677556] bd00: c06c343c c06c2bdc c0869c2c c0173520 00000001 00000000 
> c06c06e4 00000000
> [    7.685741] bd20: 00000000 ec9ccd00 c06c32b8 ecb39900 ecb39900 00000000 
> ec9ebd64 ec9ebd48
> [    7.693926] bd40: c06d86cc c06c32c4 00000000 ecb39900 00000020 ec970400 
> ec9ebd7c ec9ebd68
> [    7.702110] bd60: c06c06f4 c06d8630 c06c06c4 ee15f400 ec9ebdac ec9ebd80 
> c06d802c c06c06d0
> [    7.710294] bd80: ec9ebf50 7fffffff ec970400 ec9ebf48 ec970400 00000000 
> 00000020 00000000
> [    7.718477] bda0: ec9ebe0c ec9ebdb0 c06d84e8 c06d7ec8 0000000c ec9ebe48 
> 0000000c 00000000
> [    7.726661] bdc0: beee97bc 00000008 00000000 ee0cbd80 00000000 0000010d 
> 00000000 00000000
> [    7.734845] bde0: ec9ebe24 ec9ebf48 00000000 eda478c0 00000128 00000000 
> 00000000 ec9ebe28
> [    7.743029] be00: ec9ebe1c ec9ebe10 c068b124 c06d8180 ec9ebf34 ec9ebe20 
> c068bcac c068b114
> [    7.751213] be20: ec9ebe8c 00000000 c01759cc c01704b0 ec9ebea4 ec9ebe40 
> c0209960 c01187ac
> [    7.759396] be40: 00000000 00000000 beeed828 00000020 00000000 00000000 
> 600f0113 ec98ce70
> [    7.767580] be60: beee9000 ed7ac714 ece59900 ed7ac6c0 00000817 beee977c 
> ed7ac714 00000055
> [    7.775764] be80: ec9ebea4 ec9ebe90 00000010 00000000 00000000 ec9ebfb0 
> ec9ebefc ec9ebea8
> [    7.783948] bea0: c0115b74 c016ec18 00000000 c0d55a61 c025e178 ffffffea 
> ee022f10 00000000
> [    7.792131] bec0: 00000000 00000000 00000800 00000000 c025ddc0 c0d0a44c 
> 00000817 c0115990
> [    7.800315] bee0: beee977c ec9ebfb0 00054694 beeedec0 ec9ebfac ec9ebf00 
> c0101368 c011599c
> [    7.808500] bf00: ec9ebf1c ec9ebf10 c025a578 eda478c0 00000000 beee97a4 
> 00000128 c0107ee4
> [    7.816685] bf20: ec9ea000 00000000 ec9ebf94 ec9ebf38 c068ca54 c068bad4 
> 00000000 00000000
> [    7.824868] bf40: 00000000 fffffff7 ec9ebe88 0000000c 00000001 00000000 
> 00000000 ec9ebe50
> [    7.833052] bf60: 00000000 00000001 00000000 00000000 00000000 00000000 
> ec9ebf94 0000000c
> [    7.841235] bf80: 00000010 beee9790 ec9ebfa4 ec9ebf98 c068ca94 c068ca18 
> 00000000 ec9ebfa8
> [    7.849419] bfa0: c0107d20 c068ca90 0000000c 00000010 00000003 beee97a4 
> 00000000 00000005
> [    7.857604] bfc0: 0000000c 00000010 beee9790 00000128 00000000 00054694 
> beee97a4 beee97c0
> [    7.865789] bfe0: 00000000 beee9774 00034d1d b6eaaf16 400f0030 00000003 
> 3fffd861 3fffdc61
> [    7.873968] Backtrace:
> 
> [    7.876450] [<c056a238>] (kszphy_config_reset) from [<c056a4fc>] 
> (kszphy_resume+0x1c/0x6c)
> [    7.884723]  r7:00000000 r6:ee031000 r5:00000000 r4:ee6fc000
> 
> [    7.890392] [<c056a4e0>] (kszphy_resume) from [<c05681e4>] 
> (phy_attach_direct+0xbc/0x1bc)
> [    7.898575]  r5:00000000 r4:ee6fc000
> 
> [    7.902158] [<c0568128>] (phy_attach_direct) from [<c05683cc>] 
> (phy_connect_direct+0x1c/0x54)
> [    7.910691]  r10:00000000 r9:ee55c818 r8:ee031000 r7:c056ce7c r6:c056ce7c 
> r5:ee6fc000
> [    7.918525]  r4:ee6fc000 r3:00000007
> 
> [    7.922108] [<c05683b0>] (phy_connect_direct) from [<c0568450>] 
> (phy_connect+0x4c/0x80)
> [    7.930117]  r6:00000007 r5:ee6fc000 r4:ec9eba10 r3:00000007
> 
> [    7.935785] [<c0568404>] (phy_connect) from [<c056ce24>] 
> (fec_enet_mii_probe+0x114/0x16c)
> [    7.943969]  r8:c1508da4 r7:00000200 r6:ec9eb9d3 r5:ee031000 r4:00000005 
> r3:00000007
> [    7.951720] [<c056cd10>] (fec_enet_mii_probe) from [<c056eba4>] 
> (fec_enet_open+0x284/0x320)
> [    7.960076]  r6:ee55c000 r5:ee031000 r4:f0c16000
> 
> [    7.964704] [<c056e920>] (fec_enet_open) from [<c06ad6e0>] 
> (__dev_open+0xb0/0x114)
> [    7.972281]  r10:ecb39900 r9:ec9ccd10 r8:00000000 r7:ee031030 r6:c09566ec 
> r5:00000000
> [    7.980113]  r4:ee031000
> 
> [    7.982655] [<c06ad630>] (__dev_open) from [<c06ad99c>] 
> (__dev_change_flags+0x9c/0x14c)
> [    7.990665]  r7:00001002 r6:00001003 r5:00000001 r4:ee031000
> 
> [    7.996333] [<c06ad900>] (__dev_change_flags) from [<c06ada6c>] 
> (dev_change_flags+0x20/0x50)
> [    8.004777]  r8:00000000 r7:00000000 r6:00001002 r5:ee031148 r4:ee031000 
> r3:00001002
> [    8.012533] [<c06ada4c>] (dev_change_flags) from [<c06c1034>] 
> (do_setlink+0x2d8/0x838)
> [    8.020456]  r8:c09566ec r7:00000000 r6:ec9ebc28 r5:ee031000 r4:00000000 
> r3:00001002
> [    8.028207] [<c06c0d5c>] (do_setlink) from [<c06c30cc>] 
> (rtnl_newlink+0x4fc/0x6e8)
> [    8.035784]  r10:00000000 r9:ec9ebb98 r8:00000000 r7:00000000 r6:ec9ccd00 
> r5:ee031000
> [    8.043616]  r4:00000000
> 
> [    8.046159] [<c06c2bd0>] (rtnl_newlink) from [<c06c343c>] 
> (rtnetlink_rcv_msg+0x184/0x234)
> [    8.054343]  r10:ecb39900 r9:00000000 r8:00000000 r7:c15226e4 r6:ecb39900 
> r5:ec9ccd00
> [    8.062175]  r4:00000000
> 
> [    8.064720] [<c06c32b8>] (rtnetlink_rcv_msg) from [<c06d86cc>] 
> (netlink_rcv_skb+0xa8/0xc4)
> [    8.072990]  r8:00000000 r7:ecb39900 r6:ecb39900 r5:c06c32b8 r4:ec9ccd00
> [    8.079701] [<c06d8624>] (netlink_rcv_skb) from [<c06c06f4>] 
> (rtnetlink_rcv+0x30/0x38)
> [    8.087622]  r6:ec970400 r5:00000020 r4:ecb39900 r3:00000000
> [    8.093291] [<c06c06c4>] (rtnetlink_rcv) from [<c06d802c>] 
> (netlink_unicast+0x170/0x1f8)
> [    8.101384]  r4:ee15f400 r3:c06c06c4
> [    8.104968] [<c06d7ebc>] (netlink_unicast) from [<c06d84e8>] 
> (netlink_sendmsg+0x374/0x388)
> [    8.113238]  r8:00000000 r7:00000020 r6:00000000 r5:ec970400 r4:ec9ebf48
> [    8.119952] [<c06d8174>] (netlink_sendmsg) from [<c068b124>] 
> (sock_sendmsg+0x1c/0x2c)
> [    8.127789]  r10:ec9ebe28 r9:00000000 r8:00000000 r7:00000128 r6:eda478c0 
> r5:00000000
> [    8.135621]  r4:ec9ebf48
> [    8.138167] [<c068b108>] (sock_sendmsg) from [<c068bcac>] 
> (___sys_sendmsg+0x1e4/0x20c)
> [    8.146095] [<c068bac8>] (___sys_sendmsg) from [<c068ca54>] 
> (__sys_sendmsg+0x48/0x78)
> [    8.153932]  r10:00000000 r9:ec9ea000 r8:c0107ee4 r7:00000128 r6:beee97a4 
> r5:00000000
> [    8.161764]  r4:eda478c0
> [    8.164304] [<c068ca0c>] (__sys_sendmsg) from [<c068ca94>] 
> (SyS_sendmsg+0x10/0x14)
> [    8.171880]  r6:beee9790 r5:00000010 r4:0000000c
> [    8.176510] [<c068ca84>] (SyS_sendmsg) from [<c0107d20>] 
> (ret_fast_syscall+0x0/0x1c)
> [    8.184261] Code: e92dd8f0 e24cb004 e590628c e1a04000 (e5d63008)
> [    8.192713] ---[ end trace 07c02ee14784bc48 ]---
> 
> Kind regards
> Chris
> 
> 

If I look at the patch I think it should call kszphy_config_init() not _reset()
in the resume function:


@@ -715,8 +723,14 @@ static int kszphy_suspend(struct phy_device *phydev)

  static int kszphy_resume(struct phy_device *phydev)
  {
+	int ret;
+
  	genphy_resume(phydev);

-	ret = kszphy_config_reset(phydev);
+       ret = kszphy_config_init(phydev);
+	if (ret)
+		return ret;
+

^ permalink raw reply

* Re: [PATCH v3 00/10] New network driver for Amiga X-Surf 100 (m68k)
From: Michael Schmitz @ 2018-04-18  5:10 UTC (permalink / raw)
  To: netdev
  Cc: Andrew Lunn, Finn Thain, Geert Uytterhoeven, Florian Fainelli,
	Linux/m68k, Michael Karcher
In-Reply-To: <1524025616-3722-1-git-send-email-schmitzmic@gmail.com>

All,

just noticed belatedly that the Makefile hunk of patch 9 does no
longer apply cleanly in 4.17-rc1, sorry. My series was based on 4.16.
I'll resend that one, OK?

Cheers,

  Michael


On Wed, Apr 18, 2018 at 4:26 PM, Michael Schmitz <schmitzmic@gmail.com> wrote:
> This patch series adds support for the Individual Computers X-Surf 100
> network card for m68k Amiga, a network adapter based on the AX88796 chip set.
>
> The driver was originally written for kernel version 3.19 by Michael Karcher
> (see CC:), and adapted to 4.16 for submission to netdev by me. Questions
> regarding motivation for some of the changes are probably best directed at
> Michael Karcher.
>
> The driver has been tested by Adrian <glaubitz@physik.fu-berlin.de> who will
> send his Tested-by tag separately.
>
> A few changes to the ax88796 driver were required:
> - to read the MAC address, some setup of the ax99796 chip must be done,
> - attach to the MII bus only on device open to allow module unloading,
> - allow to supersede ax_block_input/ax_block_output by card-specific
>   optimized code,
> - use an optional interrupt status callback to allow easier sharing of the
>   card interrupt,
> - set IRQF_SHARED if platform IRQ resource is marked shareable,
>
> The Asix Electronix PHY used on the X-Surf 100 is buggy, and causes the
> software reset to hang if the previous command sent to the PHY was also
> a soft reset. This bug requires addition of a PHY driver for Asix PHYs
> to provide a fixed .soft_reset function, included in this series.
>
> Some additional cleanup:
> - do not attempt to free IRQ in ax_remove (complements 82533ad9a1c),
> - clear platform drvdata on probe fail and module remove.
>
> Changes since v1:
>
> Raised in review by Andrew Lunn:
> - move MII code around to avoid need for forward declaration
> - combine patches 2 and 7 to add cleanup in error path
>
> Changes since v2:
>
> - corrected authorship attribution to Michael Karcher
>
> Suggested by Geert Uytterhoeven:
> - use ei_local->reset_8390() instead of duplicating ax_reset_8390()
> - use %pR to format struct resource pointers
> - assign pdev and xs100 pointers in declaration
> - don't split error messages
> - change Kconfig logic to only require XSURF100 set on Amiga
>
> Suggested by Andrew Lunn:
> - add COMPILE_TEST to ax88796 Kconfig options
> - use new Asix PHY driver for X-Surf 100
>
> Suggested by Andrew Lunn/Finn Thain:
> - declare struct sk_buff in ax88796.h
> - correct whitespace error in ax88796.h
>
> This series' patches, in order:
>
> 1/9 net: phy: new Asix Electronics PHY driver
> 2/9 net: ax88796: Fix MAC address reading
> 3/9 net: ax88796: Attach MII bus only when open
> 4/9 net: ax88796: Do not free IRQ in ax_remove() (already freed in ax_close()).
> 5/9 net: ax88796: Add block_input/output hooks to ax_plat_data
> 6/9 net: ax88796: add interrupt status callback to platform data
> 7/9 net: ax88796: set IRQF_SHARED flag when IRQ resource is marked as shareable
> 8/9 net: ax88796: release platform device drvdata on probe error and module remove
> 9/9 net: New ax88796 platform driver for Amiga X-Surf 100 Zorro board (m68k)
>
>  drivers/net/ethernet/8390/Kconfig    |   17 ++-
>  drivers/net/ethernet/8390/Makefile   |    1 +
>  drivers/net/ethernet/8390/ax88796.c  |  228 ++++++++++++--------
>  drivers/net/ethernet/8390/xsurf100.c |  381 ++++++++++++++++++++++++++++++++++
>  drivers/net/phy/Kconfig              |    6 +
>  drivers/net/phy/Makefile             |    1 +
>  drivers/net/phy/asix.c               |   65 ++++++
>  drivers/net/phy/phy_device.c         |    3 +-
>  include/linux/phy.h                  |    1 +
>  include/net/ax88796.h                |   14 ++
>  10 files changed, 621 insertions(+), 96 deletions(-)
>
> Cheers,
>
>   Michael

^ permalink raw reply

* Re: [PATCH iproute2 net-next] vxlan: fix ttl inherit behavior
From: Hangbin Liu @ 2018-04-18  5:10 UTC (permalink / raw)
  To: network dev; +Cc: Stephen Hemminger, Jiri Benc, Hangbin Liu
In-Reply-To: <1524027948-5395-1-git-send-email-liuhangbin@gmail.com>

Hi Stephen,

The patch's subject contains fix. But the kernel feature is applied on net-next.
So I'm not sure if iproute2 net-next is suitable. If you are OK with the patch,
please feel free to apply it on the branch which you think is suitable.

Thanks
Hangbin

On 18 April 2018 at 13:05, Hangbin Liu <liuhangbin@gmail.com> wrote:
> Like kernel net-next commit 72f6d71e491e6 ("vxlan: add ttl inherit support"),
> vxlan ttl inherit should means inherit the inner protocol's ttl value.
>
> But currently when we add vxlan with "ttl inherit", we only set ttl 0,
> which is actually use whatever default value instead of inherit the inner
> protocol's ttl value.
>
> To make a difference with ttl inherit and ttl == 0, we add an attribute
> IFLA_VXLAN_TTL_INHERIT when "ttl inherit" specified. And use "ttl auto"
> to means "use whatever default value", the same behavior with ttl == 0.
>
> Reported-by: Jianlin Shi <jishi@redhat.com>
> Suggested-by: Jiri Benc <jbenc@redhat.com>
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>

^ permalink raw reply

* Re: [PATCH v3 00/10] New network driver for Amiga X-Surf 100 (m68k)
From: Finn Thain @ 2018-04-18  5:45 UTC (permalink / raw)
  To: Michael Schmitz
  Cc: netdev, Andrew Lunn, Geert Uytterhoeven, Florian Fainelli,
	Linux/m68k, Michael Karcher
In-Reply-To: <CAOmrzk+zUTmSzXWU9WoXYauBx2Z4qkAh+Y4d49faA8Tu5RRQnQ@mail.gmail.com>

On Wed, 18 Apr 2018, Michael Schmitz wrote:

> All,
> 
> just noticed belatedly that the Makefile hunk of patch 9 does no
> longer apply cleanly in 4.17-rc1, sorry. My series was based on 4.16.
> I'll resend that one, OK?
> 

I might end up simpler to resend the whole series --

> Cheers,
> 
>   Michael
> 
> 
> > 1/9 net: phy: new Asix Electronics PHY driver
> > 2/9 net: ax88796: Fix MAC address reading
> > 3/9 net: ax88796: Attach MII bus only when open
> > 4/9 net: ax88796: Do not free IRQ in ax_remove() (already freed in ax_close()).
> > 5/9 net: ax88796: Add block_input/output hooks to ax_plat_data

I found that git am rejects this one, though 'patch' applies it with fuzz.

> > 6/9 net: ax88796: add interrupt status callback to platform data
> > 7/9 net: ax88796: set IRQF_SHARED flag when IRQ resource is marked as shareable
> > 8/9 net: ax88796: release platform device drvdata on probe error and module remove
> > 9/9 net: New ax88796 platform driver for Amiga X-Surf 100 Zorro board (m68k)

git am rejected this one and also complained about trailing whitespace.

I'd rebase on v4.17-rc1 and also run checkpatch over the results.

-- 

> >
> >  drivers/net/ethernet/8390/Kconfig    |   17 ++-
> >  drivers/net/ethernet/8390/Makefile   |    1 +
> >  drivers/net/ethernet/8390/ax88796.c  |  228 ++++++++++++--------
> >  drivers/net/ethernet/8390/xsurf100.c |  381 ++++++++++++++++++++++++++++++++++
> >  drivers/net/phy/Kconfig              |    6 +
> >  drivers/net/phy/Makefile             |    1 +
> >  drivers/net/phy/asix.c               |   65 ++++++
> >  drivers/net/phy/phy_device.c         |    3 +-
> >  include/linux/phy.h                  |    1 +
> >  include/net/ax88796.h                |   14 ++
> >  10 files changed, 621 insertions(+), 96 deletions(-)
> >
> > Cheers,
> >
> >   Michael

^ permalink raw reply

* Re: [PATCH 1/1] net/mlx4_core: avoid resetting HCA when accessing an offline device
From: Yanjun Zhu @ 2018-04-18  5:46 UTC (permalink / raw)
  To: Tariq Toukan, netdev, linux-rdma, haakon.bugge
In-Reply-To: <6dd17e45-e27e-8451-42ab-1a4551d3a651@mellanox.com>



On 2018/4/17 23:37, Tariq Toukan wrote:
>
>
> On 16/04/2018 4:02 AM, Zhu Yanjun wrote:
>> While a faulty cable is used or HCA firmware error, HCA device will
>> be offline. When the driver is accessing this offline device, the
>> following call trace will pop out.
>>
>> "
>> ...
>>    [<ffffffff816e4842>] dump_stack+0x63/0x81
>>    [<ffffffff816e459e>] panic+0xcc/0x21b
>>    [<ffffffffa03e5f8a>] mlx4_enter_error_state+0xba/0xf0 [mlx4_core]
>>    [<ffffffffa03e7298>] mlx4_cmd_reset_flow+0x38/0x60 [mlx4_core]
>>    [<ffffffffa03e7381>] mlx4_cmd_poll+0xc1/0x2e0 [mlx4_core]
>>    [<ffffffffa03e9f00>] __mlx4_cmd+0xb0/0x160 [mlx4_core]
>>    [<ffffffffa0406934>] mlx4_SENSE_PORT+0x54/0xd0 [mlx4_core]
>>    [<ffffffffa03f5f54>] mlx4_dev_cap+0x4a4/0xb50 [mlx4_core]
>> ...
>> "
>> In the above call trace, the function mlx4_cmd_poll calls the function
>> mlx4_cmd_post to access the HCA while HCA is offline. Then mlx4_cmd_post
>> returns an error -EIO. Per -EIO, the function mlx4_cmd_poll calls
>> mlx4_cmd_reset_flow to reset HCA. And the above call trace pops out.
>>
>> This is not reasonable. Since HCA device is offline when it is being
>> accessed, it should not be reset again.
>>
>> In this patch, since HCA is offline, the function mlx4_cmd_post returns
>> an error -EINVAL. Per -EINVAL, the function mlx4_cmd_poll directly 
>> returns
>> instead of resetting HCA.
>>
>> CC: Srinivas Eeda <srinivas.eeda@oracle.com>
>> CC: Junxiao Bi <junxiao.bi@oracle.com>
>> Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
>> Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
>> ---
>>   drivers/net/ethernet/mellanox/mlx4/cmd.c | 8 ++++++++
>>   1 file changed, 8 insertions(+)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c 
>> b/drivers/net/ethernet/mellanox/mlx4/cmd.c
>> index 6a9086d..f1c8c42 100644
>> --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
>> +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
>> @@ -451,6 +451,8 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, 
>> u64 in_param, u64 out_param,
>>            * Device is going through error recovery
>>            * and cannot accept commands.
>>            */
>> +        mlx4_err(dev, "%s : Device is in error recovery.\n", __func__);
>> +        ret = -EINVAL;
>>           goto out;
>>       }
>>   @@ -657,6 +659,9 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, 
>> u64 in_param, u64 *out_param,
>>       }
>>     out_reset:
>> +    if (err == -EINVAL)
>> +        goto out;
>> +
>
> See below.
>
>>       if (err)
>>           err = mlx4_cmd_reset_flow(dev, op, op_modifier, err);
>>   out:
>> @@ -766,6 +771,9 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, 
>> u64 in_param, u64 *out_param,
>>           *out_param = context->out_param;
>>     out_reset:
>> +    if (err == -EINVAL)
>> +        goto out;
>> +
>>       if (err)
>
> Instead, just do here: if (err && err != -EINVAL)
>
>>           err = mlx4_cmd_reset_flow(dev, op, op_modifier, err);
>>   out:
>>
>
> I am not sure this does not mistakenly cover other cases that already 
> exist and have (err == -EINVAL).
>
> For example, this line is hard to predict:
> err = mlx4_status_to_errno
> and later on, we might get into
> if (mlx4_closing_cmd_fatal_error(op, stat))
> which leads to out_reset.
Thanks a lot.
Sure. I agree with you that "err = mlx4_status_to_errno" and "if 
(mlx4_closing_cmd_fatal_error(op, stat))" will also make "err=-EINVAL".
This will mistakenly go to out instead of resetting HCA device.

I will make a new patch to avoid the above error.

Zhu Yanjun
>
> We must have a deeper look at this.
> But a better option is, change the error indication to uniquely 
> indicate "already in error recovery".
>

^ permalink raw reply

* [PATCH bpf-next v2 00/11] introduction of bpf_xdp_adjust_tail
From: Nikita V. Shirokov @ 2018-04-18  4:29 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann; +Cc: netdev, Nikita V. Shirokov

In this patch series i'm add new bpf helper which allow to manupulate
xdp's data_end pointer. right now only "shrinking" (reduce packet's size
by moving pointer) is supported (and i see no use case for "growing").
Main use case for such helper is to be able to generate controll (ICMP)
messages from XDP context. such messages usually contains first N bytes
from original packets as a payload, and this is exactly what this helper
would allow us to do (see patch 3 for sample program, where we generate
ICMP "packet too big" message). This helper could be usefull for load
balancing applications where after additional encapsulation, resulting
packet could be bigger then interface MTU.
Aside from new helper this patch series contains minor changes in device
drivers (for ones which requires), so they would recal packet's length
not only when head pointer was adjusted, but if tail's one as well.

v1->v2:
 * fixed kbuild warning
 * made offset eq 0 invalid for xdp_bpf_adjust_tail
 * splitted bpf_prog_test_run fix and selftests in sep commits
 * added SPDX licence where applicable
 * some reshuffling in patches order (tests now in the end)


Nikita V. Shirokov (11):
  bpf: making bpf_prog_test run aware of possible data_end ptr change
  bpf: adding tests for bpf_xdp_adjust_tail
  bpf: adding bpf_xdp_adjust_tail helper
  bpf: make generic xdp compatible w/ bpf_xdp_adjust_tail
  bpf: make mlx4 compatible w/ bpf_xdp_adjust_tail
  bpf: make bnxt compatible w/ bpf_xdp_adjust_tail
  bpf: make cavium thunder compatible w/ bpf_xdp_adjust_tail
  bpf: make netronome nfp compatible w/ bpf_xdp_adjust_tail
  bpf: make tun compatible w/ bpf_xdp_adjust_tail
  bpf: make virtio compatible w/ bpf_xdp_adjust_tail
  bpf: add bpf_xdp_adjust_tail sample prog

 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c      |   2 +-
 drivers/net/ethernet/cavium/thunder/nicvf_main.c   |   2 +-
 drivers/net/ethernet/mellanox/mlx4/en_rx.c         |   2 +-
 .../net/ethernet/netronome/nfp/nfp_net_common.c    |   2 +-
 drivers/net/tun.c                                  |   3 +-
 drivers/net/virtio_net.c                           |   7 +-
 include/uapi/linux/bpf.h                           |  10 +-
 net/bpf/test_run.c                                 |   3 +-
 net/core/dev.c                                     |  10 +-
 net/core/filter.c                                  |  29 +++-
 samples/bpf/Makefile                               |   4 +
 samples/bpf/xdp_adjust_tail_kern.c                 | 152 +++++++++++++++++++++
 samples/bpf/xdp_adjust_tail_user.c                 | 142 +++++++++++++++++++
 tools/include/uapi/linux/bpf.h                     |  10 +-
 tools/testing/selftests/bpf/Makefile               |   2 +-
 tools/testing/selftests/bpf/bpf_helpers.h          |   5 +
 tools/testing/selftests/bpf/test_adjust_tail.c     |  30 ++++
 tools/testing/selftests/bpf/test_progs.c           |  32 +++++
 18 files changed, 435 insertions(+), 12 deletions(-)
 create mode 100644 samples/bpf/xdp_adjust_tail_kern.c
 create mode 100644 samples/bpf/xdp_adjust_tail_user.c
 create mode 100644 tools/testing/selftests/bpf/test_adjust_tail.c

-- 
2.15.1

^ permalink raw reply

* [PATCH bpf-next v2 01/11] bpf: adding bpf_xdp_adjust_tail helper
From: Nikita V. Shirokov @ 2018-04-18  4:29 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann; +Cc: netdev, Nikita V. Shirokov
In-Reply-To: <20180418042951.17183-1-tehnerd@tehnerd.com>

Adding new bpf helper which would allow us to manipulate
xdp's data_end pointer, and allow us to reduce packet's size
indended use case: to generate ICMP messages from XDP context,
where such message would contain truncated original packet.
---
 include/uapi/linux/bpf.h | 10 +++++++++-
 net/core/filter.c        | 29 ++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c5ec89732a8d..9a2d1a04eb24 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -755,6 +755,13 @@ union bpf_attr {
  *     @addr: pointer to struct sockaddr to bind socket to
  *     @addr_len: length of sockaddr structure
  *     Return: 0 on success or negative error code
+ *
+ * int bpf_xdp_adjust_tail(xdp_md, delta)
+ *     Adjust the xdp_md.data_end by delta. Only shrinking of packet's
+ *     size is supported.
+ *     @xdp_md: pointer to xdp_md
+ *     @delta: A negative integer to be added to xdp_md.data_end
+ *     Return: 0 on success or negative on error
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -821,7 +828,8 @@ union bpf_attr {
 	FN(msg_apply_bytes),		\
 	FN(msg_cork_bytes),		\
 	FN(msg_pull_data),		\
-	FN(bind),
+	FN(bind),			\
+	FN(xdp_adjust_tail),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index a374b8560bc4..29318598fd60 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2725,6 +2725,30 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
+{
+	void *data_end = xdp->data_end + offset;
+
+	/* only shrinking is allowed for now. */
+	if (unlikely(offset >= 0))
+		return -EINVAL;
+
+	if (unlikely(data_end < xdp->data + ETH_HLEN))
+		return -EINVAL;
+
+	xdp->data_end = data_end;
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
+	.func		= bpf_xdp_adjust_tail,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
 {
 	void *meta = xdp->data_meta + offset;
@@ -3074,7 +3098,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_l4_csum_replace ||
 	    func == bpf_xdp_adjust_head ||
 	    func == bpf_xdp_adjust_meta ||
-	    func == bpf_msg_pull_data)
+	    func == bpf_msg_pull_data ||
+	    func == bpf_xdp_adjust_tail)
 		return true;
 
 	return false;
@@ -3888,6 +3913,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_redirect_proto;
 	case BPF_FUNC_redirect_map:
 		return &bpf_xdp_redirect_map_proto;
+	case BPF_FUNC_xdp_adjust_tail:
+		return &bpf_xdp_adjust_tail_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 02/11] bpf: make generic xdp compatible w/ bpf_xdp_adjust_tail
From: Nikita V. Shirokov @ 2018-04-18  4:29 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, David S. Miller 
  Cc: netdev, Nikita V. Shirokov
In-Reply-To: <20180418042951.17183-1-tehnerd@tehnerd.com>

w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as
well (only "decrease" of pointer's location is going to be supported).
changing of this pointer will change packet's size.
for generic XDP we need to reflect this packet's length change by
adjusting skb's tail pointer

Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/dev.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 969462ebb296..11c789231a03 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3996,9 +3996,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 				     struct bpf_prog *xdp_prog)
 {
 	struct netdev_rx_queue *rxqueue;
+	void *orig_data, *orig_data_end;
 	u32 metalen, act = XDP_DROP;
 	struct xdp_buff xdp;
-	void *orig_data;
 	int hlen, off;
 	u32 mac_len;
 
@@ -4037,6 +4037,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	xdp.data_meta = xdp.data;
 	xdp.data_end = xdp.data + hlen;
 	xdp.data_hard_start = skb->data - skb_headroom(skb);
+	orig_data_end = xdp.data_end;
 	orig_data = xdp.data;
 
 	rxqueue = netif_get_rxqueue(skb);
@@ -4051,6 +4052,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 		__skb_push(skb, -off);
 	skb->mac_header += off;
 
+	/* check if bpf_xdp_adjust_tail was used. it can only "shrink"
+	 * pckt.
+	 */
+	off = orig_data_end - xdp.data_end;
+	if (off != 0)
+		skb_set_tail_pointer(skb, xdp.data_end - xdp.data);
+
 	switch (act) {
 	case XDP_REDIRECT:
 	case XDP_TX:
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 03/11] bpf: make mlx4 compatible w/ bpf_xdp_adjust_tail
From: Nikita V. Shirokov @ 2018-04-18  4:29 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Tariq Toukan
  Cc: netdev, Nikita V. Shirokov
In-Reply-To: <20180418042951.17183-1-tehnerd@tehnerd.com>

w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as
well (only "decrease" of pointer's location is going to be supported).
changing of this pointer will change packet's size.
for mlx4 driver we will just calculate packet's length unconditionally
(the same way as it's already being done in mlx5)

Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 5c613c6663da..efc55feddc5c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -775,8 +775,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 
 			act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
+			length = xdp.data_end - xdp.data;
 			if (xdp.data != orig_data) {
-				length = xdp.data_end - xdp.data;
 				frags[0].page_offset = xdp.data -
 					xdp.data_hard_start;
 				va = xdp.data;
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 04/11] bpf: make bnxt compatible w/ bpf_xdp_adjust_tail
From: Nikita V. Shirokov @ 2018-04-18  4:29 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Michael Chan
  Cc: netdev, Nikita V. Shirokov
In-Reply-To: <20180418042951.17183-1-tehnerd@tehnerd.com>

w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as
well (only "decrease" of pointer's location is going to be supported).
changing of this pointer will change packet's size.
for bnxt driver we will just calculate packet's length unconditionally

Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 1389ab5e05df..1f0e872d0667 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -113,10 +113,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 	if (tx_avail != bp->tx_ring_size)
 		*event &= ~BNXT_RX_EVENT;
 
+	*len = xdp.data_end - xdp.data;
 	if (orig_data != xdp.data) {
 		offset = xdp.data - xdp.data_hard_start;
 		*data_ptr = xdp.data_hard_start + offset;
-		*len = xdp.data_end - xdp.data;
 	}
 	switch (act) {
 	case XDP_PASS:
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 05/11] bpf: make cavium thunder compatible w/ bpf_xdp_adjust_tail
From: Nikita V. Shirokov @ 2018-04-18  4:29 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Robert Richter,
	Sunil Goutham
  Cc: netdev, Nikita V. Shirokov
In-Reply-To: <20180418042951.17183-1-tehnerd@tehnerd.com>

w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as
well (only "decrease" of pointer's location is going to be supported).
changing of this pointer will change packet's size.
for cavium's thunder driver we will just calculate packet's length
unconditionally

Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/cavium/thunder/nicvf_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 707db3304396..7135db45927e 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -538,9 +538,9 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
 	action = bpf_prog_run_xdp(prog, &xdp);
 	rcu_read_unlock();
 
+	len = xdp.data_end - xdp.data;
 	/* Check if XDP program has changed headers */
 	if (orig_data != xdp.data) {
-		len = xdp.data_end - xdp.data;
 		offset = orig_data - xdp.data;
 		dma_addr -= offset;
 	}
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 06/11] bpf: make netronome nfp compatible w/ bpf_xdp_adjust_tail
From: Nikita V. Shirokov @ 2018-04-18  4:29 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Jakub Kicinski
  Cc: netdev, Nikita V. Shirokov
In-Reply-To: <20180418042951.17183-1-tehnerd@tehnerd.com>

w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as
well (only "decrease" of pointer's location is going to be supported).
changing of this pointer will change packet's size.
for nfp driver we will just calculate packet's length unconditionally

Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 1eb6549f2a54..d9111c077699 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1722,7 +1722,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
 			act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
-			pkt_len -= xdp.data - orig_data;
+			pkt_len = xdp.data_end - xdp.data;
 			pkt_off += xdp.data - orig_data;
 
 			switch (act) {
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 07/11] bpf: make tun compatible w/ bpf_xdp_adjust_tail
From: Nikita V. Shirokov @ 2018-04-18  4:29 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Jason Wang, David S. Miller,
	Michael S. Tsirkin
  Cc: netdev, Nikita V. Shirokov
In-Reply-To: <20180418042951.17183-1-tehnerd@tehnerd.com>

w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as
well (only "decrease" of pointer's location is going to be supported).
changing of this pointer will change packet's size.
for tun driver we need to adjust XDP_PASS handling by recalculating
length of the packet if it was passed to the TCP/IP stack
(in case if after xdp's prog run data_end pointer was adjusted)

Reviewed-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 1e58be152d5c..901351a6ed21 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1696,6 +1696,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 			return NULL;
 		case XDP_PASS:
 			delta = orig_data - xdp.data;
+			len = xdp.data_end - xdp.data;
 			break;
 		default:
 			bpf_warn_invalid_xdp_action(act);
@@ -1716,7 +1717,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	}
 
 	skb_reserve(skb, pad - delta);
-	skb_put(skb, len + delta);
+	skb_put(skb, len);
 	get_page(alloc_frag->page);
 	alloc_frag->offset += buflen;
 
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 08/11] bpf: make virtio compatible w/ bpf_xdp_adjust_tail
From: Nikita V. Shirokov @ 2018-04-18  4:29 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann,
	Michael S. Tsirkin mst @ redhat . com , Jason Wang 
  Cc: netdev, Nikita V. Shirokov
In-Reply-To: <20180418042951.17183-1-tehnerd@tehnerd.com>

w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as
well (only "decrease" of pointer's location is going to be supported).
changing of this pointer will change packet's size.
for virtio driver we need to adjust XDP_PASS handling by recalculating
length of the packet if it was passed to the TCP/IP stack

Reviewed-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 01694e26f03e..779a4f798522 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -606,6 +606,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 		case XDP_PASS:
 			/* Recalculate length in case bpf program changed it */
 			delta = orig_data - xdp.data;
+			len = xdp.data_end - xdp.data;
 			break;
 		case XDP_TX:
 			xdpf = convert_to_xdp_frame(&xdp);
@@ -642,7 +643,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 		goto err;
 	}
 	skb_reserve(skb, headroom - delta);
-	skb_put(skb, len + delta);
+	skb_put(skb, len);
 	if (!delta) {
 		buf += header_offset;
 		memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
@@ -757,6 +758,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 			offset = xdp.data -
 					page_address(xdp_page) - vi->hdr_len;
 
+			/* recalculate len if xdp.data or xdp.data_end were
+			 * adjusted
+			 */
+			len = xdp.data_end - xdp.data;
 			/* We can only create skb based on xdp_page. */
 			if (unlikely(xdp_page != page)) {
 				rcu_read_unlock();
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 09/11] bpf: making bpf_prog_test run aware of possible data_end ptr change
From: Nikita V. Shirokov @ 2018-04-18  4:29 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann; +Cc: netdev, Nikita V. Shirokov
In-Reply-To: <20180418042951.17183-1-tehnerd@tehnerd.com>

after introduction of bpf_xdp_adjust_tail helper packet length
could be changed not only if xdp->data pointer has been changed
but xdp->data_end as well. making bpf_prog_test_run aware of this
possibility
---
 net/bpf/test_run.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2ced48662c1f..68c3578343b4 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -170,7 +170,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 	xdp.rxq = &rxqueue->xdp_rxq;
 
 	retval = bpf_test_run(prog, &xdp, repeat, &duration);
-	if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN)
+	if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
+	    xdp.data_end != xdp.data + size)
 		size = xdp.data_end - xdp.data;
 	ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
 	kfree(data);
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 10/11] bpf: adding tests for bpf_xdp_adjust_tail
From: Nikita V. Shirokov @ 2018-04-18  4:29 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann; +Cc: netdev, Nikita V. Shirokov
In-Reply-To: <20180418042951.17183-1-tehnerd@tehnerd.com>

adding selftests for bpf_xdp_adjust_tail helper. in this synthetic test
we are testing that 1) if data_end < data helper will return EINVAL
2) for normal use case packet's length would be reduced.
---
 tools/include/uapi/linux/bpf.h                 | 10 +++++++-
 tools/testing/selftests/bpf/Makefile           |  2 +-
 tools/testing/selftests/bpf/bpf_helpers.h      |  3 +++
 tools/testing/selftests/bpf/test_adjust_tail.c | 30 ++++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.c       | 32 ++++++++++++++++++++++++++
 5 files changed, 75 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/test_adjust_tail.c

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9d07465023a2..56bf493ba7ed 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -755,6 +755,13 @@ union bpf_attr {
  *     @addr: pointer to struct sockaddr to bind socket to
  *     @addr_len: length of sockaddr structure
  *     Return: 0 on success or negative error code
+ *
+ * int bpf_xdp_adjust_tail(xdp_md, delta)
+ *     Adjust the xdp_md.data_end by delta. Only shrinking of packet's
+ *     size is supported.
+ *     @xdp_md: pointer to xdp_md
+ *     @delta: A negative integer to be added to xdp_md.data_end
+ *     Return: 0 on success or negative on error
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -821,7 +828,8 @@ union bpf_attr {
 	FN(msg_apply_bytes),		\
 	FN(msg_cork_bytes),		\
 	FN(msg_pull_data),		\
-	FN(bind),
+	FN(bind),			\
+	FN(xdp_adjust_tail),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 0a315ddabbf4..3e819dc70bee 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -31,7 +31,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
 	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
-	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o
+	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index d8223d99f96d..50c607014b22 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -96,6 +96,9 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
 	(void *) BPF_FUNC_msg_pull_data;
 static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
 	(void *) BPF_FUNC_bind;
+static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
+	(void *) BPF_FUNC_xdp_adjust_tail;
+
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/test_adjust_tail.c b/tools/testing/selftests/bpf/test_adjust_tail.c
new file mode 100644
index 000000000000..4cd5e860c903
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_adjust_tail.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("xdp_adjust_tail")
+int _xdp_adjust_tail(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	int offset = 0;
+
+	if (data_end - data == 54)
+		offset = 256;
+	else
+		offset = 20;
+	if (bpf_xdp_adjust_tail(xdp, 0 - offset))
+		return XDP_DROP;
+	return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index faadbe233966..eedda98d7bb1 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -166,6 +166,37 @@ static void test_xdp(void)
 	bpf_object__close(obj);
 }
 
+static void test_xdp_adjust_tail(void)
+{
+	const char *file = "./test_adjust_tail.o";
+	struct bpf_object *obj;
+	char buf[128];
+	__u32 duration, retval, size;
+	int err, prog_fd;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+
+	CHECK(err || errno || retval != XDP_DROP,
+	      "ipv4", "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != XDP_TX || size != 54,
+	      "ipv6", "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+	bpf_object__close(obj);
+}
+
+
+
 #define MAGIC_VAL 0x1234
 #define NUM_ITER 100000
 #define VIP_NUM 5
@@ -1177,6 +1208,7 @@ int main(void)
 {
 	test_pkt_access();
 	test_xdp();
+	test_xdp_adjust_tail();
 	test_l4lb_all();
 	test_xdp_noinline();
 	test_tcp_estats();
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 11/11] bpf: add bpf_xdp_adjust_tail sample prog
From: Nikita V. Shirokov @ 2018-04-18  4:29 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann; +Cc: netdev, Nikita V. Shirokov
In-Reply-To: <20180418042951.17183-1-tehnerd@tehnerd.com>

adding bpf's sample program which is using bpf_xdp_adjust_tail helper
by generating ICMPv4 "packet to big" message if ingress packet's size is
bigger then 600 bytes
---
 samples/bpf/Makefile                      |   4 +
 samples/bpf/xdp_adjust_tail_kern.c        | 152 ++++++++++++++++++++++++++++++
 samples/bpf/xdp_adjust_tail_user.c        | 142 ++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/bpf_helpers.h |   2 +
 4 files changed, 300 insertions(+)
 create mode 100644 samples/bpf/xdp_adjust_tail_kern.c
 create mode 100644 samples/bpf/xdp_adjust_tail_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4d6a6edd4bf6..aa8c392e2e52 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -44,6 +44,7 @@ hostprogs-y += xdp_monitor
 hostprogs-y += xdp_rxq_info
 hostprogs-y += syscall_tp
 hostprogs-y += cpustat
+hostprogs-y += xdp_adjust_tail
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
@@ -95,6 +96,7 @@ xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
 xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
 syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
 cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
+xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -148,6 +150,7 @@ always += xdp_rxq_info_kern.o
 always += xdp2skb_meta_kern.o
 always += syscall_tp_kern.o
 always += cpustat_kern.o
+always += xdp_adjust_tail_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -193,6 +196,7 @@ HOSTLOADLIBES_xdp_monitor += -lelf
 HOSTLOADLIBES_xdp_rxq_info += -lelf
 HOSTLOADLIBES_syscall_tp += -lelf
 HOSTLOADLIBES_cpustat += -lelf
+HOSTLOADLIBES_xdp_adjust_tail += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c
new file mode 100644
index 000000000000..411fdb21f8bc
--- /dev/null
+++ b/samples/bpf/xdp_adjust_tail_kern.c
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program shows how to use bpf_xdp_adjust_tail() by
+ * generating ICMPv4 "packet to big" (unreachable/ df bit set frag needed
+ * to be more preice in case of v4)" where receiving packets bigger then
+ * 600 bytes.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include "bpf_helpers.h"
+
+#define DEFAULT_TTL 64
+#define MAX_PCKT_SIZE 600
+#define ICMP_TOOBIG_SIZE 98
+#define ICMP_TOOBIG_PAYLOAD_SIZE 92
+
+struct bpf_map_def SEC("maps") icmpcnt = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = 1,
+};
+
+static __always_inline void count_icmp(void)
+{
+	u64 key = 0;
+	u64 *icmp_count;
+
+	icmp_count = bpf_map_lookup_elem(&icmpcnt, &key);
+	if (icmp_count)
+		*icmp_count += 1;
+}
+
+static __always_inline void swap_mac(void *data, struct ethhdr *orig_eth)
+{
+	struct ethhdr *eth;
+
+	eth = data;
+	memcpy(eth->h_source, orig_eth->h_dest, ETH_ALEN);
+	memcpy(eth->h_dest, orig_eth->h_source, ETH_ALEN);
+	eth->h_proto = orig_eth->h_proto;
+}
+
+static __always_inline __u16 csum_fold_helper(__u32 csum)
+{
+	return ~((csum & 0xffff) + (csum >> 16));
+}
+
+static __always_inline void ipv4_csum(void *data_start, int data_size,
+				      __u32 *csum)
+{
+	*csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
+	*csum = csum_fold_helper(*csum);
+}
+
+static __always_inline int send_icmp4_too_big(struct xdp_md *xdp)
+{
+	int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr);
+
+	if (bpf_xdp_adjust_head(xdp, 0 - headroom))
+		return XDP_DROP;
+	void *data = (void *)(long)xdp->data;
+	void *data_end = (void *)(long)xdp->data_end;
+
+	if (data + (ICMP_TOOBIG_SIZE + headroom) > data_end)
+		return XDP_DROP;
+
+	struct iphdr *iph, *orig_iph;
+	struct icmphdr *icmp_hdr;
+	struct ethhdr *orig_eth;
+	__u32 csum = 0;
+	__u64 off = 0;
+
+	orig_eth = data + headroom;
+	swap_mac(data, orig_eth);
+	off += sizeof(struct ethhdr);
+	iph = data + off;
+	off += sizeof(struct iphdr);
+	icmp_hdr = data + off;
+	off += sizeof(struct icmphdr);
+	orig_iph = data + off;
+	icmp_hdr->type = ICMP_DEST_UNREACH;
+	icmp_hdr->code = ICMP_FRAG_NEEDED;
+	icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr));
+	icmp_hdr->checksum = 0;
+	ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum);
+	icmp_hdr->checksum = csum;
+	iph->ttl = DEFAULT_TTL;
+	iph->daddr = orig_iph->saddr;
+	iph->saddr = orig_iph->daddr;
+	iph->version = 4;
+	iph->ihl = 5;
+	iph->protocol = IPPROTO_ICMP;
+	iph->tos = 0;
+	iph->tot_len = htons(
+		ICMP_TOOBIG_SIZE + headroom - sizeof(struct ethhdr));
+	iph->check = 0;
+	csum = 0;
+	ipv4_csum(iph, sizeof(struct iphdr), &csum);
+	iph->check = csum;
+	count_icmp();
+	return XDP_TX;
+}
+
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	int pckt_size = data_end - data;
+	int offset;
+
+	if (pckt_size > MAX_PCKT_SIZE) {
+		offset = pckt_size - ICMP_TOOBIG_SIZE;
+		if (bpf_xdp_adjust_tail(xdp, 0 - offset))
+			return XDP_PASS;
+		return send_icmp4_too_big(xdp);
+	}
+	return XDP_PASS;
+}
+
+SEC("xdp_icmp")
+int _xdp_icmp(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct ethhdr *eth = data;
+	__u16 h_proto;
+
+	if (eth + 1 > data_end)
+		return XDP_DROP;
+
+	h_proto = eth->h_proto;
+
+	if (h_proto == htons(ETH_P_IP))
+		return handle_ipv4(xdp);
+	else
+		return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
new file mode 100644
index 000000000000..f621a541b574
--- /dev/null
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <arpa/inet.h>
+#include <netinet/ether.h>
+#include <unistd.h>
+#include <time.h>
+#include "bpf_load.h"
+#include "libbpf.h"
+#include "bpf_util.h"
+
+#define STATS_INTERVAL_S 2U
+
+static int ifindex = -1;
+static __u32 xdp_flags;
+
+static void int_exit(int sig)
+{
+	if (ifindex > -1)
+		bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	exit(0);
+}
+
+/* simple "icmp packet too big sent" counter
+ */
+static void poll_stats(unsigned int kill_after_s)
+{
+	time_t started_at = time(NULL);
+	__u64 value = 0;
+	int key = 0;
+
+
+	while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
+		sleep(STATS_INTERVAL_S);
+
+		assert(bpf_map_lookup_elem(map_fd[0], &key, &value) == 0);
+
+		printf("icmp \"packet too big\" sent: %10llu pkts\n", value);
+	}
+}
+
+static void usage(const char *cmd)
+{
+	printf("Start a XDP prog which send ICMP \"packet too big\" \n"
+		"messages if ingress packet is bigger then MAX_SIZE bytes\n");
+	printf("Usage: %s [...]\n", cmd);
+	printf("    -i <ifindex> Interface Index\n");
+	printf("    -T <stop-after-X-seconds> Default: 0 (forever)\n");
+	printf("    -S use skb-mode\n");
+	printf("    -N enforce native mode\n");
+	printf("    -h Display this help\n");
+}
+
+int main(int argc, char **argv)
+{
+	unsigned char opt_flags[256] = {};
+	unsigned int kill_after_s = 0;
+	const char *optstr = "i:T:SNh";
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	char filename[256];
+	int opt;
+	int i;
+
+
+	for (i = 0; i < strlen(optstr); i++)
+		if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
+			opt_flags[(unsigned char)optstr[i]] = 1;
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+
+		switch (opt) {
+		case 'i':
+			ifindex = atoi(optarg);
+			break;
+		case 'T':
+			kill_after_s = atoi(optarg);
+			break;
+		case 'S':
+			xdp_flags |= XDP_FLAGS_SKB_MODE;
+			break;
+		case 'N':
+			xdp_flags |= XDP_FLAGS_DRV_MODE;
+			break;
+		default:
+			usage(argv[0]);
+			return 1;
+		}
+		opt_flags[opt] = 0;
+	}
+
+	for (i = 0; i < strlen(optstr); i++) {
+		if (opt_flags[(unsigned int)optstr[i]]) {
+			fprintf(stderr, "Missing argument -%c\n", optstr[i]);
+			usage(argv[0]);
+			return 1;
+		}
+	}
+
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
+		return 1;
+	}
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	if (!prog_fd[0]) {
+		printf("load_bpf_file: %s\n", strerror(errno));
+		return 1;
+	}
+
+	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
+
+	if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+		printf("link set xdp fd failed\n");
+		return 1;
+	}
+
+	poll_stats(kill_after_s);
+
+	bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 50c607014b22..9271576bdc8f 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -132,6 +132,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
 	(void *) BPF_FUNC_l3_csum_replace;
 static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
 	(void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
+	(void *) BPF_FUNC_csum_diff;
 static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
 	(void *) BPF_FUNC_skb_under_cgroup;
 static int (*bpf_skb_change_head)(void *, int len, int flags) =
-- 
2.15.1

^ permalink raw reply related

* Re: [PATCH net-next v4 0/3] kernel: add support to collect hardware logs in crash recovery kernel
From: Dave Young @ 2018-04-18  6:15 UTC (permalink / raw)
  To: Rahul Lakkireddy
  Cc: netdev, kexec, linux-fsdevel, linux-kernel, indranil, nirranjan,
	stephen, ganeshgr, ebiederm, akpm, torvalds, davem, viro
In-Reply-To: <cover.1523950321.git.rahul.lakkireddy@chelsio.com>

Hi Rahul,
On 04/17/18 at 01:14pm, Rahul Lakkireddy wrote:
> On production servers running variety of workloads over time, kernel
> panic can happen sporadically after days or even months. It is
> important to collect as much debug logs as possible to root cause
> and fix the problem, that may not be easy to reproduce. Snapshot of
> underlying hardware/firmware state (like register dump, firmware
> logs, adapter memory, etc.), at the time of kernel panic will be very
> helpful while debugging the culprit device driver.
> 
> This series of patches add new generic framework that enable device
> drivers to collect device specific snapshot of the hardware/firmware
> state of the underlying device in the crash recovery kernel. In crash
> recovery kernel, the collected logs are added as elf notes to
> /proc/vmcore, which is copied by user space scripts for post-analysis.
> 
> The sequence of actions done by device drivers to append their device
> specific hardware/firmware logs to /proc/vmcore are as follows:
> 
> 1. During probe (before hardware is initialized), device drivers
> register to the vmcore module (via vmcore_add_device_dump()), with
> callback function, along with buffer size and log name needed for
> firmware/hardware log collection.

I assumed the elf notes info should be prepared while kexec_[file_]load
phase. But I did not read the old comment, not sure if it has been discussed
or not.

If do this in 2nd kernel a question is driver can be loaded later than vmcore init.
How to guarantee the function works if vmcore reading happens before
the driver is loaded?

Also it is possible that kdump initramfs does not contains the driver
module.

Am I missing something?

> 
> 2. vmcore module allocates the buffer with requested size. It adds
> an elf note and invokes the device driver's registered callback
> function.
> 
> 3. Device driver collects all hardware/firmware logs into the buffer
> and returns control back to vmcore module.
> 
> The device specific hardware/firmware logs can be seen as elf notes:
> 
> # readelf -n /proc/vmcore
> 
> Displaying notes found at file offset 0x00001000 with length 0x04003288:
>   Owner                 Data size	Description
>   VMCOREDD_cxgb4_0000:02:00.4 0x02000fd8	Unknown note type: (0x00000700)
>   VMCOREDD_cxgb4_0000:04:00.4 0x02000fd8	Unknown note type: (0x00000700)
>   CORE                 0x00000150	NT_PRSTATUS (prstatus structure)
>   CORE                 0x00000150	NT_PRSTATUS (prstatus structure)
>   CORE                 0x00000150	NT_PRSTATUS (prstatus structure)
>   CORE                 0x00000150	NT_PRSTATUS (prstatus structure)
>   CORE                 0x00000150	NT_PRSTATUS (prstatus structure)
>   CORE                 0x00000150	NT_PRSTATUS (prstatus structure)
>   CORE                 0x00000150	NT_PRSTATUS (prstatus structure)
>   CORE                 0x00000150	NT_PRSTATUS (prstatus structure)
>   VMCOREINFO           0x0000074f	Unknown note type: (0x00000000)
> 
> Patch 1 adds API to vmcore module to allow drivers to register callback
> to collect the device specific hardware/firmware logs.  The logs will
> be added to /proc/vmcore as elf notes.
> 
> Patch 2 updates read and mmap logic to append device specific hardware/
> firmware logs as elf notes.
> 
> Patch 3 shows a cxgb4 driver example using the API to collect
> hardware/firmware logs in crash recovery kernel, before hardware is
> initialized.
> 
> Thanks,
> Rahul
> 
> RFC v1: https://lkml.org/lkml/2018/3/2/542
> RFC v2: https://lkml.org/lkml/2018/3/16/326
> 
> ---
> v4:
> - Made __vmcore_add_device_dump() static.
> - Moved compile check to define vmcore_add_device_dump() to
>   crash_dump.h to fix compilation when vmcore.c is not compiled in.
> - Convert ---help--- to help in Kconfig as indicated by checkpatch.
> - Rebased to tip.
> 
> v3:
> - Dropped sysfs crashdd module.
> - Exported dumps as elf notes. Suggested by Eric Biederman
>   <ebiederm@xmission.com>.  Added as patch 2 in this version.
> - Added CONFIG_PROC_VMCORE_DEVICE_DUMP to allow configuring device
>   dump support.
> - Moved logic related to adding dumps from crashdd to vmcore module.
> - Rename all crashdd* to vmcoredd*.
> - Updated comments.
> 
> v2:
> - Added ABI Documentation for crashdd.
> - Directly use octal permission instead of macro.
> 
> Changes since rfc v2:
> - Moved exporting crashdd from procfs to sysfs. Suggested by
>   Stephen Hemminger <stephen@networkplumber.org>
> - Moved code from fs/proc/crashdd.c to fs/crashdd/ directory.
> - Replaced all proc API with sysfs API and updated comments.
> - Calling driver callback before creating the binary file under
>   crashdd sysfs.
> - Changed binary dump file permission from S_IRUSR to S_IRUGO.
> - Changed module name from CRASH_DRIVER_DUMP to CRASH_DEVICE_DUMP.
> 
> rfc v2:
> - Collecting logs in 2nd kernel instead of during kernel panic.
>   Suggested by Eric Biederman <ebiederm@xmission.com>.
> - Added new crashdd module that exports /proc/crashdd/ containing
>   driver's registered hardware/firmware logs in patch 1.
> - Replaced the API to allow drivers to register their hardware/firmware
>   log collect routine in crash recovery kernel in patch 1.
> - Updated patch 2 to use the new API in patch 1.
> 
> Rahul Lakkireddy (3):
>   vmcore: add API to collect hardware dump in second kernel
>   vmcore: append device dumps to vmcore as elf notes
>   cxgb4: collect hardware dump in second kernel
> 
>  drivers/net/ethernet/chelsio/cxgb4/cxgb4.h       |   4 +
>  drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c |  25 ++
>  drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h |   3 +
>  drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c  |  10 +
>  fs/proc/Kconfig                                  |  10 +
>  fs/proc/vmcore.c                                 | 399 ++++++++++++++++++++++-
>  include/linux/crash_core.h                       |   4 +
>  include/linux/crash_dump.h                       |  17 +
>  include/linux/kcore.h                            |   6 +
>  include/uapi/linux/elf.h                         |   1 +
>  10 files changed, 466 insertions(+), 13 deletions(-)
> 
> -- 
> 2.14.1
> 
> 
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec

Thanks
Dave

^ permalink raw reply

* [PATCH net-next 0/4] tracking TCP data delivery and ECN stats
From: Yuchung Cheng @ 2018-04-18  6:18 UTC (permalink / raw)
  To: davem; +Cc: netdev, edumazet, ncardwell, soheil, Yuchung Cheng

This patch series improve tracking the data delivery status
  1. minor improvement on SYN data
  2. accounting bytes delivered with CE marks
  3. exporting the delivery stats to applications

s.t. users can get better sense of TCP performance at per host,
per connection, and even per application message level.

Yuchung Cheng (4):
  tcp: better delivery accounting for SYN-ACK and SYN-data
  tcp: new helper to calculate newly delivered
  tcp: track total bytes delivered with ECN CE marks
  tcp: export packets delivery info

 include/linux/tcp.h       |  1 +
 include/uapi/linux/snmp.h |  2 ++
 include/uapi/linux/tcp.h  |  5 +++++
 net/ipv4/proc.c           |  2 ++
 net/ipv4/tcp.c            |  8 +++++++-
 net/ipv4/tcp_input.c      | 33 ++++++++++++++++++++++++++++-----
 6 files changed, 45 insertions(+), 6 deletions(-)

-- 
2.17.0.484.g0c8726318c-goog

^ permalink raw reply

* [PATCH net-next 1/4] tcp: better delivery accounting for SYN-ACK and SYN-data
From: Yuchung Cheng @ 2018-04-18  6:18 UTC (permalink / raw)
  To: davem; +Cc: netdev, edumazet, ncardwell, soheil, Yuchung Cheng
In-Reply-To: <20180418061849.220459-1-ycheng@google.com>

the tcp_sock:delivered has inconsistent accounting for SYN and FIN.
1. it counts pure FIN
2. it counts pure SYN
3. it counts SYN-data twice
4. it does not count SYN-ACK

For congestion control perspective it does not matter much as C.C. only
cares about the difference not the aboslute value. But the next patch
would export this field to user-space so it's better to report the absolute
value w/o these caveats.

This patch counts SYN, SYN-ACK, or SYN-data delivery once always in
the "delivered" field.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Soheil Hassas Yeganeh <soheil@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/tcp_input.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f93687f97d80..2499248d4a67 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5567,9 +5567,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 		return true;
 	}
 	tp->syn_data_acked = tp->syn_data;
-	if (tp->syn_data_acked)
-		NET_INC_STATS(sock_net(sk),
-				LINUX_MIB_TCPFASTOPENACTIVE);
+	if (tp->syn_data_acked) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
+		/* SYN-data is counted as two separate packets in tcp_ack() */
+		if (tp->delivered > 1)
+			--tp->delivered;
+	}
 
 	tcp_fastopen_add_skb(sk, synack);
 
@@ -5901,6 +5904,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	}
 	switch (sk->sk_state) {
 	case TCP_SYN_RECV:
+		tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */
 		if (!tp->srtt_us)
 			tcp_synack_rtt_meas(sk, req);
 
-- 
2.17.0.484.g0c8726318c-goog

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox