Netdev List
 help / color / mirror / Atom feed
* [RFC PATCH 3/6] net: ethernet: ti: cpsw: add MQPRIO Qdisc offload
From: Ivan Khoronzhuk @ 2018-05-18 21:15 UTC (permalink / raw)
  To: grygorii.strashko, davem
  Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
	vinicius.gomes, henrik, jesus.sanchez-palencia, Ivan Khoronzhuk
In-Reply-To: <20180518211510.13341-1-ivan.khoronzhuk@linaro.org>

That's possible to offload vlan to tc priority mapping with
assumption sk_prio == L2 prio.

Example:
$ ethtool -L eth0 rx 1 tx 4

$ qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 1

$ tc -g class show dev eth0
+---(100:ffe2) mqprio
|    +---(100:3) mqprio
|    +---(100:4) mqprio
|    
+---(100:ffe1) mqprio
|    +---(100:2) mqprio
|    
+---(100:ffe0) mqprio
     +---(100:1) mqprio

Here, 100:1 is txq0, 100:2 is txq1, 100:3 is txq2, 100:4 is txq3
txq0 belongs to tc0, txq1 to tc1, txq2 and txq3 to tc2
The offload part only maps L2 prio to classes of traffic, but not
to transmit queues, so to direct traffic to traffic class vlan has
to be created with appropriate egress map.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
 drivers/net/ethernet/ti/cpsw.c | 82 ++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 9bd615da04d3..4b232cda5436 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -39,6 +39,7 @@
 #include <linux/sys_soc.h>
 
 #include <linux/pinctrl/consumer.h>
+#include <net/pkt_cls.h>
 
 #include "cpsw.h"
 #include "cpsw_ale.h"
@@ -153,6 +154,8 @@ do {								\
 #define IRQ_NUM			2
 #define CPSW_MAX_QUEUES		8
 #define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
+#define CPSW_TC_NUM			4
+#define CPSW_FIFO_SHAPERS_NUM		(CPSW_TC_NUM - 1)
 
 #define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT	29
 #define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK		GENMASK(2, 0)
@@ -453,6 +456,7 @@ struct cpsw_priv {
 	u8				mac_addr[ETH_ALEN];
 	bool				rx_pause;
 	bool				tx_pause;
+	bool				mqprio_hw;
 	u32 emac_port;
 	struct cpsw_common *cpsw;
 };
@@ -1577,6 +1581,14 @@ static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_common *cpsw)
 	soft_reset_slave(slave);
 }
 
+static int cpsw_tc_to_fifo(int tc, int num_tc)
+{
+	if (tc == num_tc - 1)
+		return 0;
+
+	return CPSW_FIFO_SHAPERS_NUM - tc;
+}
+
 static int cpsw_ndo_open(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
@@ -2190,6 +2202,75 @@ static int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate)
 	return ret;
 }
 
+static int cpsw_set_tc(struct net_device *ndev, void *type_data)
+{
+	struct tc_mqprio_qopt_offload *mqprio = type_data;
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int fifo, num_tc, count, offset;
+	struct cpsw_slave *slave;
+	u32 tx_prio_map = 0;
+	int i, tc, ret;
+
+	num_tc = mqprio->qopt.num_tc;
+	if (num_tc > CPSW_TC_NUM)
+		return -EINVAL;
+
+	if (mqprio->mode != TC_MQPRIO_MODE_DCB)
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(cpsw->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(cpsw->dev);
+		return ret;
+	}
+
+	if (num_tc) {
+		for (i = 0; i < 8; i++) {
+			tc = mqprio->qopt.prio_tc_map[i];
+			fifo = cpsw_tc_to_fifo(tc, num_tc);
+			tx_prio_map |= fifo << (4 * i);
+		}
+
+		netdev_set_num_tc(ndev, num_tc);
+		for (i = 0; i < num_tc; i++) {
+			count = mqprio->qopt.count[i];
+			offset = mqprio->qopt.offset[i];
+			netdev_set_tc_queue(ndev, i, count, offset);
+		}
+	}
+
+	if (!mqprio->qopt.hw) {
+		/* restore default configuration */
+		netdev_reset_tc(ndev);
+		tx_prio_map = TX_PRIORITY_MAPPING;
+	}
+
+	priv->mqprio_hw = mqprio->qopt.hw;
+
+	offset = cpsw->version == CPSW_VERSION_1 ?
+		 CPSW1_TX_PRI_MAP : CPSW2_TX_PRI_MAP;
+
+	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+	slave_write(slave, tx_prio_map, offset);
+
+	pm_runtime_put_sync(cpsw->dev);
+
+	return 0;
+}
+
+static int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+			     void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		return cpsw_set_tc(ndev, type_data);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops cpsw_netdev_ops = {
 	.ndo_open		= cpsw_ndo_open,
 	.ndo_stop		= cpsw_ndo_stop,
@@ -2205,6 +2286,7 @@ static const struct net_device_ops cpsw_netdev_ops = {
 #endif
 	.ndo_vlan_rx_add_vid	= cpsw_ndo_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= cpsw_ndo_vlan_rx_kill_vid,
+	.ndo_setup_tc           = cpsw_ndo_setup_tc,
 };
 
 static int cpsw_get_regs_len(struct net_device *ndev)
-- 
2.17.0

^ permalink raw reply related

* [RFC PATCH 5/6] net: ethernet: ti: cpsw: restore shaper configuration while down/up
From: Ivan Khoronzhuk @ 2018-05-18 21:15 UTC (permalink / raw)
  To: grygorii.strashko, davem
  Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
	vinicius.gomes, henrik, jesus.sanchez-palencia, Ivan Khoronzhuk
In-Reply-To: <20180518211510.13341-1-ivan.khoronzhuk@linaro.org>

Need to restore shapers configuration after interface was down/up.
This is needed as appropriate configuration is still replicated in
kernel settings. This only shapers context restore, so vlan
configuration should be restored by user if needed, especially for
devices with one port where vlan frames are sent via ALE.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
 drivers/net/ethernet/ti/cpsw.c | 47 ++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index c7710b0e1c17..c3e88be36c1b 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1807,6 +1807,51 @@ static int cpsw_set_cbs(struct net_device *ndev,
 	return ret;
 }
 
+static void cpsw_cbs_resume(struct cpsw_slave *slave, struct cpsw_priv *priv)
+{
+	int fifo, bw;
+
+	for (fifo = CPSW_FIFO_SHAPERS_NUM; fifo > 0; fifo--) {
+		bw = priv->fifo_bw[fifo];
+		if (!bw)
+			continue;
+
+		cpsw_set_fifo_rlimit(priv, fifo, bw);
+	}
+}
+
+static void cpsw_mqprio_resume(struct cpsw_slave *slave, struct cpsw_priv *priv)
+{
+	struct cpsw_common *cpsw = priv->cpsw;
+	u32 tx_prio_map = 0;
+	int i, tc, fifo;
+	u32 tx_prio_rg;
+
+	if (!priv->mqprio_hw)
+		return;
+
+	for (i = 0; i < 8; i++) {
+		tc = netdev_get_prio_tc_map(priv->ndev, i);
+		fifo = CPSW_FIFO_SHAPERS_NUM - tc;
+		tx_prio_map |= fifo << (4 * i);
+	}
+
+	tx_prio_rg = cpsw->version == CPSW_VERSION_1 ?
+		     CPSW1_TX_PRI_MAP : CPSW2_TX_PRI_MAP;
+
+	slave_write(slave, tx_prio_map, tx_prio_rg);
+}
+
+/* restore resources after port reset */
+static void cpsw_restore(struct cpsw_priv *priv)
+{
+	/* restore MQPRIO offload */
+	for_each_slave(priv, cpsw_mqprio_resume, priv);
+
+	/* restore CBS offload */
+	for_each_slave(priv, cpsw_cbs_resume, priv);
+}
+
 static int cpsw_ndo_open(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
@@ -1886,6 +1931,8 @@ static int cpsw_ndo_open(struct net_device *ndev)
 
 	}
 
+	cpsw_restore(priv);
+
 	/* Enable Interrupt pacing if configured */
 	if (cpsw->coal_intvl != 0) {
 		struct ethtool_coalesce coal;
-- 
2.17.0

^ permalink raw reply related

* Re: [patch net-next 0/5] devlink: introduce port flavours and common phys_port_name generation
From: Jakub Kicinski @ 2018-05-18 21:16 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: netdev, davem, idosch, mlxsw, andrew, vivien.didelot, f.fainelli,
	michael.chan, ganeshgr, saeedm, simon.horman,
	pieter.jansenvanvuuren, john.hurley, dirk.vandermerwe,
	alexander.h.duyck, ogerlitz, dsahern, vijaya.guvva,
	satananda.burla, raghu.vatsavayi, felix.manlunas, gospo,
	sathya.perla, vasundhara-v.volam, tariqt, eranbe,
	jeffrey.t.kirsher, roopa
In-Reply-To: <20180518072904.29523-1-jiri@resnulli.us>

On Fri, 18 May 2018 09:28:59 +0200, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@mellanox.com>
> 
> This patchset resolves 2 issues we have right now:
> 1) There are many netdevices / ports in the system, for port, pf, vf
>    represenatation but the user has no way to see which is which
> 2) The ndo_get_phys_port_name is implemented in each driver separatelly,
>    which may lead to inconsistent names between drivers.
> 
> This patchset introduces port flavours which should address the first
> problem. In this initial patchset, I focus on DSA and their port
> flavours. As a follow-up, I plan to add PF and VF representor flavours.
> However, that needs additional dependencies in drivers (nfp, mlx5).
> 
> The common phys_port_name generation is used by mlxsw. An example output
> for mlxsw looks like this:

FWIW this series LGTM!

^ permalink raw reply

* Re: [PATCH 05/15] mtd: nand: pxa3xx: remove the dmaengine compat need
From: Daniel Mack @ 2018-05-18 21:31 UTC (permalink / raw)
  To: Robert Jarzmik, Haojian Zhuang, Bartlomiej Zolnierkiewicz,
	Tejun Heo, Vinod Koul, Mauro Carvalho Chehab, Ulf Hansson,
	Ezequiel Garcia, Boris Brezillon, David Woodhouse, Brian Norris,
	Marek Vasut, Richard Weinberger, Cyrille Pitchen, Nicolas Pitre,
	Samuel Ortiz, Greg Kroah-Hartman, Jaroslav Kysela, Takashi Iwai,
	Liam Girdwood, Mark Brown, Arnd Bergmann
  Cc: devel, alsa-devel, netdev, linux-mmc, linux-kernel, linux-ide,
	linux-mtd, dmaengine, Robert Jarzmik, linux-arm-kernel,
	linux-media
In-Reply-To: <20180402142656.26815-6-robert.jarzmik@free.fr>

[-- Attachment #1: Type: text/plain, Size: 761 bytes --]

Hi Robert,

Thanks for this series.

On Monday, April 02, 2018 04:26 PM, Robert Jarzmik wrote:
> From: Robert Jarzmik <robert.jarzmik@renault.com>
> 
> As the pxa architecture switched towards the dmaengine slave map, the
> old compatibility mechanism to acquire the dma requestor line number and
> priority are not needed anymore.
> 
> This patch simplifies the dma resource acquisition, using the more
> generic function dma_request_slave_channel().
> 
> Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
> ---
>   drivers/mtd/nand/pxa3xx_nand.c | 10 +---------

This driver was replaced by drivers/mtd/nand/raw/marvell_nand.c 
recently, so this patch can be dropped. I attached a version for the new 
driver which you can pick instead.


Thanks,
Daniel

[-- Attachment #2: 0001-mtd-rawnand-marvell-remove-dmaengine-compat-code.patch --]
[-- Type: text/x-patch, Size: 1633 bytes --]

>From c63bc40bdfe2d596e42919235840109a2f1b2776 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Sat, 12 May 2018 21:50:13 +0200
Subject: [PATCH] mtd: rawnand: marvell: remove dmaengine compat code

As the pxa architecture switched towards the dmaengine slave map, the
old compatibility mechanism to acquire the dma requestor line number and
priority are not needed anymore.

This patch simplifies the dma resource acquisition, using the more
generic function dma_request_slave_channel().

Signed-off-by: Daniel Mack <daniel@zonque.org>
---
 drivers/mtd/nand/raw/marvell_nand.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index ebb1d141b900..30017cd7d91c 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -2612,8 +2612,6 @@ static int marvell_nfc_init_dma(struct marvell_nfc *nfc)
 						    dev);
 	struct dma_slave_config config = {};
 	struct resource *r;
-	dma_cap_mask_t mask;
-	struct pxad_param param;
 	int ret;
 
 	if (!IS_ENABLED(CONFIG_PXA_DMA)) {
@@ -2632,14 +2630,7 @@ static int marvell_nfc_init_dma(struct marvell_nfc *nfc)
 		return -ENXIO;
 	}
 
-	param.drcmr = r->start;
-	param.prio = PXAD_PRIO_LOWEST;
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_SLAVE, mask);
-	nfc->dma_chan =
-		dma_request_slave_channel_compat(mask, pxad_filter_fn,
-						 &param, nfc->dev,
-						 "data");
+	nfc->dma_chan = dma_request_slave_channel(nfc->dev, "data");
 	if (!nfc->dma_chan) {
 		dev_err(nfc->dev,
 			"Unable to request data DMA channel\n");
-- 
2.14.3


[-- Attachment #3: Type: text/plain, Size: 176 bytes --]

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related

* [PATCH v2] isdn: eicon: fix a missing-check bug
From: Wenwen Wang @ 2018-05-18 21:33 UTC (permalink / raw)
  To: Wenwen Wang
  Cc: Kangjie Lu, Armin Schindler, Karsten Keil,
	open list:ISDN SUBSYSTEM, open list

In divasmain.c, the function divas_write() firstly invokes the function
diva_xdi_open_adapter() to open the adapter that matches with the adapter
number provided by the user, and then invokes the function diva_xdi_write()
to perform the write operation using the matched adapter. The two functions
diva_xdi_open_adapter() and diva_xdi_write() are located in diva.c.

In diva_xdi_open_adapter(), the user command is copied to the object 'msg'
from the userspace pointer 'src' through the function pointer 'cp_fn',
which eventually calls copy_from_user() to do the copy. Then, the adapter
number 'msg.adapter' is used to find out a matched adapter from the
'adapter_queue'. A matched adapter will be returned if it is found.
Otherwise, NULL is returned to indicate the failure of the verification on
the adapter number.

As mentioned above, if a matched adapter is returned, the function
diva_xdi_write() is invoked to perform the write operation. In this
function, the user command is copied once again from the userspace pointer
'src', which is the same as the 'src' pointer in diva_xdi_open_adapter() as
both of them are from the 'buf' pointer in divas_write(). Similarly, the
copy is achieved through the function pointer 'cp_fn', which finally calls
copy_from_user(). After the successful copy, the corresponding command
processing handler of the matched adapter is invoked to perform the write
operation.

It is obvious that there are two copies here from userspace, one is in
diva_xdi_open_adapter(), and one is in diva_xdi_write(). Plus, both of
these two copies share the same source userspace pointer, i.e., the 'buf'
pointer in divas_write(). Given that a malicious userspace process can race
to change the content pointed by the 'buf' pointer, this can pose potential
security issues. For example, in the first copy, the user provides a valid
adapter number to pass the verification process and a valid adapter can be
found. Then the user can modify the adapter number to an invalid number.
This way, the user can bypass the verification process of the adapter
number and inject inconsistent data.

This patch reuses the data copied in
diva_xdi_open_adapter() and passes it to diva_xdi_write(). This way, the
above issues can be avoided.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
---
 drivers/isdn/hardware/eicon/diva.c      | 20 +++++++++++++-------
 drivers/isdn/hardware/eicon/diva.h      |  5 +++--
 drivers/isdn/hardware/eicon/divasmain.c | 18 +++++++++++-------
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/drivers/isdn/hardware/eicon/diva.c b/drivers/isdn/hardware/eicon/diva.c
index 944a7f3..fa239d8 100644
--- a/drivers/isdn/hardware/eicon/diva.c
+++ b/drivers/isdn/hardware/eicon/diva.c
@@ -388,10 +388,9 @@ void divasa_xdi_driver_unload(void)
 **  Receive and process command from user mode utility
 */
 void *diva_xdi_open_adapter(void *os_handle, const void __user *src,
-			    int length,
+			    int length, diva_xdi_um_cfg_cmd_t *msg,
 			    divas_xdi_copy_from_user_fn_t cp_fn)
 {
-	diva_xdi_um_cfg_cmd_t msg;
 	diva_os_xdi_adapter_t *a = NULL;
 	diva_os_spin_lock_magic_t old_irql;
 	struct list_head *tmp;
@@ -401,21 +400,21 @@ void *diva_xdi_open_adapter(void *os_handle, const void __user *src,
 			 length, sizeof(diva_xdi_um_cfg_cmd_t)))
 			return NULL;
 	}
-	if ((*cp_fn) (os_handle, &msg, src, sizeof(msg)) <= 0) {
+	if ((*cp_fn) (os_handle, msg, src, sizeof(*msg)) <= 0) {
 		DBG_ERR(("A: A(?) open, write error"))
 			return NULL;
 	}
 	diva_os_enter_spin_lock(&adapter_lock, &old_irql, "open_adapter");
 	list_for_each(tmp, &adapter_queue) {
 		a = list_entry(tmp, diva_os_xdi_adapter_t, link);
-		if (a->controller == (int)msg.adapter)
+		if (a->controller == (int)msg->adapter)
 			break;
 		a = NULL;
 	}
 	diva_os_leave_spin_lock(&adapter_lock, &old_irql, "open_adapter");
 
 	if (!a) {
-		DBG_ERR(("A: A(%d) open, adapter not found", msg.adapter))
+		DBG_ERR(("A: A(%d) open, adapter not found", msg->adapter))
 			}
 
 	return (a);
@@ -437,7 +436,8 @@ void diva_xdi_close_adapter(void *adapter, void *os_handle)
 
 int
 diva_xdi_write(void *adapter, void *os_handle, const void __user *src,
-	       int length, divas_xdi_copy_from_user_fn_t cp_fn)
+	       int length, diva_xdi_um_cfg_cmd_t *msg,
+	       divas_xdi_copy_from_user_fn_t cp_fn)
 {
 	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) adapter;
 	void *data;
@@ -459,7 +459,13 @@ diva_xdi_write(void *adapter, void *os_handle, const void __user *src,
 			return (-2);
 	}
 
-	length = (*cp_fn) (os_handle, data, src, length);
+	if (msg) {
+		*(diva_xdi_um_cfg_cmd_t *)data = *msg;
+		length = (*cp_fn) (os_handle, (char *)data + sizeof(*msg),
+				   src + sizeof(*msg), length - sizeof(*msg));
+	} else {
+		length = (*cp_fn) (os_handle, data, src, length);
+	}
 	if (length > 0) {
 		if ((*(a->interface.cmd_proc))
 		    (a, (diva_xdi_um_cfg_cmd_t *) data, length)) {
diff --git a/drivers/isdn/hardware/eicon/diva.h b/drivers/isdn/hardware/eicon/diva.h
index b067032..eb454c5 100644
--- a/drivers/isdn/hardware/eicon/diva.h
+++ b/drivers/isdn/hardware/eicon/diva.h
@@ -20,10 +20,11 @@ int diva_xdi_read(void *adapter, void *os_handle, void __user *dst,
 		  int max_length, divas_xdi_copy_to_user_fn_t cp_fn);
 
 int diva_xdi_write(void *adapter, void *os_handle, const void __user *src,
-		   int length, divas_xdi_copy_from_user_fn_t cp_fn);
+		   int length, diva_xdi_um_cfg_cmd_t *msg,
+		   divas_xdi_copy_from_user_fn_t cp_fn);
 
 void *diva_xdi_open_adapter(void *os_handle, const void __user *src,
-			    int length,
+			    int length, diva_xdi_um_cfg_cmd_t *msg,
 			    divas_xdi_copy_from_user_fn_t cp_fn);
 
 void diva_xdi_close_adapter(void *adapter, void *os_handle);
diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c
index b9980e8..b6a3950 100644
--- a/drivers/isdn/hardware/eicon/divasmain.c
+++ b/drivers/isdn/hardware/eicon/divasmain.c
@@ -591,19 +591,22 @@ static int divas_release(struct inode *inode, struct file *file)
 static ssize_t divas_write(struct file *file, const char __user *buf,
 			   size_t count, loff_t *ppos)
 {
+	diva_xdi_um_cfg_cmd_t msg;
 	int ret = -EINVAL;
 
 	if (!file->private_data) {
 		file->private_data = diva_xdi_open_adapter(file, buf,
-							   count,
+							   count, &msg,
 							   xdi_copy_from_user);
-	}
-	if (!file->private_data) {
-		return (-ENODEV);
+		if (!file->private_data)
+			return (-ENODEV);
+		ret = diva_xdi_write(file->private_data, file,
+				     buf, count, &msg, xdi_copy_from_user);
+	} else {
+		ret = diva_xdi_write(file->private_data, file,
+				     buf, count, NULL, xdi_copy_from_user);
 	}
 
-	ret = diva_xdi_write(file->private_data, file,
-			     buf, count, xdi_copy_from_user);
 	switch (ret) {
 	case -1:		/* Message should be removed from rx mailbox first */
 		ret = -EBUSY;
@@ -622,11 +625,12 @@ static ssize_t divas_write(struct file *file, const char __user *buf,
 static ssize_t divas_read(struct file *file, char __user *buf,
 			  size_t count, loff_t *ppos)
 {
+	diva_xdi_um_cfg_cmd_t msg;
 	int ret = -EINVAL;
 
 	if (!file->private_data) {
 		file->private_data = diva_xdi_open_adapter(file, buf,
-							   count,
+							   count, &msg,
 							   xdi_copy_from_user);
 	}
 	if (!file->private_data) {
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH iproute2] Allow to configure /var/run/netns directory
From: Stephen Hemminger @ 2018-05-18 21:53 UTC (permalink / raw)
  To: Pavel Maltsev; +Cc: netdev, lorenzo
In-Reply-To: <20180515214946.222797-1-pavelm@google.com>

On Tue, 15 May 2018 14:49:46 -0700
Pavel Maltsev <pavelm@google.com> wrote:

> Currently NETNS_RUN_DIR is hardcoded and refers to /var/run/netns.
> However, some systems (e.g. Android) doesn't have /var
> which results in error attempts to create network namespaces on these
> systems.  This change makes NETNS_RUN_DIR configurable at build time
> by allowing to pass environment variable to configre script.
> 
> For example: NETNS_RUN_DIR=/mnt/vendor/netns ./configure && make
> 
> Tested: verified that iproute2 with configuration mentioned above
> creates namespaces in /mnt/vendor/netns
> 
> Signed-off-by: Pavel Maltsev <pavelm@google.com>

The directory path should definitely be overrideable on the build.
The configure script is already messy enough, lets do it instead like
the other runtime directories are already done ARPDDIR and CONFDIR.

Something like?

diff --git a/Makefile b/Makefile
index b526d3b5b5c4..ab828669e711 100644
--- a/Makefile
+++ b/Makefile
@@ -16,6 +16,7 @@ PREFIX?=/usr
 LIBDIR?=$(PREFIX)/lib
 SBINDIR?=/sbin
 CONFDIR?=/etc/iproute2
+NETNS_RUN_DIR?=/var/run/netns
 DATADIR?=$(PREFIX)/share
 HDRDIR?=$(PREFIX)/include/iproute2
 DOCDIR?=$(DATADIR)/doc/iproute2
@@ -34,7 +35,7 @@ ifneq ($(SHARED_LIBS),y)
 DEFINES+= -DNO_SHARED_LIBS
 endif
 
-DEFINES+=-DCONFDIR=\"$(CONFDIR)\"
+DEFINES+=-DCONFDIR=\"$(CONFDIR)\" -DNETNS_RUN_DIR=\"$(NETNS_RUN_DIR)\"
 
 #options for decnet
 ADDLIB+=dnet_ntop.o dnet_pton.o
diff --git a/include/namespace.h b/include/namespace.h
index aed7ce08507f..e47f9b5d49d1 100644
--- a/include/namespace.h
+++ b/include/namespace.h
@@ -8,8 +8,13 @@
 #include <sys/syscall.h>
 #include <errno.h>
 
+#ifndef NETNS_RUN_DIR
 #define NETNS_RUN_DIR "/var/run/netns"
+#endif
+
+#ifndef NETNS_ETC_DIR
 #define NETNS_ETC_DIR "/etc/netns"
+#endif
 
 #ifndef CLONE_NEWNET
 #define CLONE_NEWNET 0x40000000	/* New network namespace (lo, device, names sockets, etc) */

^ permalink raw reply related

* Re: [PATCH net-next v2 1/3] net: ethernet: ti: Allow most drivers with COMPILE_TEST
From: kbuild test robot @ 2018-05-18 21:54 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: kbuild-all, netdev, fugang.duan, Florian Fainelli,
	David S. Miller, Andrew Lunn, open list
In-Reply-To: <20180516185258.20508-2-f.fainelli@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 19228 bytes --]

Hi Florian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Florian-Fainelli/net-ethernet-ti-Allow-most-drivers-with-COMPILE_TEST/20180519-043005
config: ia64-allmodconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=ia64 

All warnings (new ones prefixed by >>):

   drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_free_rx_desc_chain':
>> drivers/net/ethernet/ti/netcp_core.c:613:13: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
      buf_ptr = (void *)GET_SW_DATA0(ndesc);
                ^
   drivers/net/ethernet/ti/netcp_core.c:622:12: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
     buf_ptr = (void *)GET_SW_DATA0(desc);
               ^
   drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_process_one_rx_packet':
   drivers/net/ethernet/ti/netcp_core.c:681:16: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
     org_buf_ptr = (void *)GET_SW_DATA0(desc);
                   ^
   drivers/net/ethernet/ti/netcp_core.c:718:10: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
      page = (struct page *)GET_SW_DATA0(ndesc);
             ^
   drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_free_rx_buf':
   drivers/net/ethernet/ti/netcp_core.c:822:13: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
      buf_ptr = (void *)GET_SW_DATA0(desc);
                ^
   drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_allocate_rx_buf':
>> drivers/net/ethernet/ti/netcp_core.c:906:16: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
      sw_data[0] = (u32)bufptr;
                   ^
   drivers/net/ethernet/ti/netcp_core.c:919:16: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
      sw_data[0] = (u32)page;
                   ^
   drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_process_tx_compl_packets':
   drivers/net/ethernet/ti/netcp_core.c:1041:9: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
      skb = (struct sk_buff *)GET_SW_DATA0(desc);
            ^
   drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_tx_submit_skb':
   drivers/net/ethernet/ti/netcp_core.c:1256:15: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     SET_SW_DATA0((u32)skb, desc);
                  ^
   drivers/net/ethernet/ti/netcp_core.c:181:49: note: in definition of macro 'SET_SW_DATA0'
    #define SET_SW_DATA0(data, desc) set_sw_data(0, data, desc)
                                                    ^~~~

vim +613 drivers/net/ethernet/ti/netcp_core.c

84640e27 Karicheri, Muralidharan 2015-01-15  591  
84640e27 Karicheri, Muralidharan 2015-01-15  592  static void netcp_free_rx_desc_chain(struct netcp_intf *netcp,
84640e27 Karicheri, Muralidharan 2015-01-15  593  				     struct knav_dma_desc *desc)
84640e27 Karicheri, Muralidharan 2015-01-15  594  {
84640e27 Karicheri, Muralidharan 2015-01-15  595  	struct knav_dma_desc *ndesc;
84640e27 Karicheri, Muralidharan 2015-01-15  596  	dma_addr_t dma_desc, dma_buf;
84640e27 Karicheri, Muralidharan 2015-01-15  597  	unsigned int buf_len, dma_sz = sizeof(*ndesc);
84640e27 Karicheri, Muralidharan 2015-01-15  598  	void *buf_ptr;
958d104e Arnd Bergmann           2015-12-18  599  	u32 tmp;
84640e27 Karicheri, Muralidharan 2015-01-15  600  
84640e27 Karicheri, Muralidharan 2015-01-15  601  	get_words(&dma_desc, 1, &desc->next_desc);
84640e27 Karicheri, Muralidharan 2015-01-15  602  
84640e27 Karicheri, Muralidharan 2015-01-15  603  	while (dma_desc) {
84640e27 Karicheri, Muralidharan 2015-01-15  604  		ndesc = knav_pool_desc_unmap(netcp->rx_pool, dma_desc, dma_sz);
84640e27 Karicheri, Muralidharan 2015-01-15  605  		if (unlikely(!ndesc)) {
84640e27 Karicheri, Muralidharan 2015-01-15  606  			dev_err(netcp->ndev_dev, "failed to unmap Rx desc\n");
84640e27 Karicheri, Muralidharan 2015-01-15  607  			break;
84640e27 Karicheri, Muralidharan 2015-01-15  608  		}
958d104e Arnd Bergmann           2015-12-18  609  		get_pkt_info(&dma_buf, &tmp, &dma_desc, ndesc);
06324481 Karicheri, Muralidharan 2016-02-19  610  		/* warning!!!! We are retrieving the virtual ptr in the sw_data
06324481 Karicheri, Muralidharan 2016-02-19  611  		 * field as a 32bit value. Will not work on 64bit machines
06324481 Karicheri, Muralidharan 2016-02-19  612  		 */
06324481 Karicheri, Muralidharan 2016-02-19 @613  		buf_ptr = (void *)GET_SW_DATA0(ndesc);
06324481 Karicheri, Muralidharan 2016-02-19  614  		buf_len = (int)GET_SW_DATA1(desc);
84640e27 Karicheri, Muralidharan 2015-01-15  615  		dma_unmap_page(netcp->dev, dma_buf, PAGE_SIZE, DMA_FROM_DEVICE);
84640e27 Karicheri, Muralidharan 2015-01-15  616  		__free_page(buf_ptr);
84640e27 Karicheri, Muralidharan 2015-01-15  617  		knav_pool_desc_put(netcp->rx_pool, desc);
84640e27 Karicheri, Muralidharan 2015-01-15  618  	}
06324481 Karicheri, Muralidharan 2016-02-19  619  	/* warning!!!! We are retrieving the virtual ptr in the sw_data
06324481 Karicheri, Muralidharan 2016-02-19  620  	 * field as a 32bit value. Will not work on 64bit machines
06324481 Karicheri, Muralidharan 2016-02-19  621  	 */
06324481 Karicheri, Muralidharan 2016-02-19  622  	buf_ptr = (void *)GET_SW_DATA0(desc);
06324481 Karicheri, Muralidharan 2016-02-19  623  	buf_len = (int)GET_SW_DATA1(desc);
89907779 Arnd Bergmann           2015-12-08  624  
84640e27 Karicheri, Muralidharan 2015-01-15  625  	if (buf_ptr)
84640e27 Karicheri, Muralidharan 2015-01-15  626  		netcp_frag_free(buf_len <= PAGE_SIZE, buf_ptr);
84640e27 Karicheri, Muralidharan 2015-01-15  627  	knav_pool_desc_put(netcp->rx_pool, desc);
84640e27 Karicheri, Muralidharan 2015-01-15  628  }
84640e27 Karicheri, Muralidharan 2015-01-15  629  
84640e27 Karicheri, Muralidharan 2015-01-15  630  static void netcp_empty_rx_queue(struct netcp_intf *netcp)
84640e27 Karicheri, Muralidharan 2015-01-15  631  {
6a8162e9 Michael Scherban        2017-01-06  632  	struct netcp_stats *rx_stats = &netcp->stats;
84640e27 Karicheri, Muralidharan 2015-01-15  633  	struct knav_dma_desc *desc;
84640e27 Karicheri, Muralidharan 2015-01-15  634  	unsigned int dma_sz;
84640e27 Karicheri, Muralidharan 2015-01-15  635  	dma_addr_t dma;
84640e27 Karicheri, Muralidharan 2015-01-15  636  
84640e27 Karicheri, Muralidharan 2015-01-15  637  	for (; ;) {
84640e27 Karicheri, Muralidharan 2015-01-15  638  		dma = knav_queue_pop(netcp->rx_queue, &dma_sz);
84640e27 Karicheri, Muralidharan 2015-01-15  639  		if (!dma)
84640e27 Karicheri, Muralidharan 2015-01-15  640  			break;
84640e27 Karicheri, Muralidharan 2015-01-15  641  
84640e27 Karicheri, Muralidharan 2015-01-15  642  		desc = knav_pool_desc_unmap(netcp->rx_pool, dma, dma_sz);
84640e27 Karicheri, Muralidharan 2015-01-15  643  		if (unlikely(!desc)) {
84640e27 Karicheri, Muralidharan 2015-01-15  644  			dev_err(netcp->ndev_dev, "%s: failed to unmap Rx desc\n",
84640e27 Karicheri, Muralidharan 2015-01-15  645  				__func__);
6a8162e9 Michael Scherban        2017-01-06  646  			rx_stats->rx_errors++;
84640e27 Karicheri, Muralidharan 2015-01-15  647  			continue;
84640e27 Karicheri, Muralidharan 2015-01-15  648  		}
84640e27 Karicheri, Muralidharan 2015-01-15  649  		netcp_free_rx_desc_chain(netcp, desc);
6a8162e9 Michael Scherban        2017-01-06  650  		rx_stats->rx_dropped++;
84640e27 Karicheri, Muralidharan 2015-01-15  651  	}
84640e27 Karicheri, Muralidharan 2015-01-15  652  }
84640e27 Karicheri, Muralidharan 2015-01-15  653  
84640e27 Karicheri, Muralidharan 2015-01-15  654  static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
84640e27 Karicheri, Muralidharan 2015-01-15  655  {
6a8162e9 Michael Scherban        2017-01-06  656  	struct netcp_stats *rx_stats = &netcp->stats;
84640e27 Karicheri, Muralidharan 2015-01-15  657  	unsigned int dma_sz, buf_len, org_buf_len;
84640e27 Karicheri, Muralidharan 2015-01-15  658  	struct knav_dma_desc *desc, *ndesc;
84640e27 Karicheri, Muralidharan 2015-01-15  659  	unsigned int pkt_sz = 0, accum_sz;
84640e27 Karicheri, Muralidharan 2015-01-15  660  	struct netcp_hook_list *rx_hook;
84640e27 Karicheri, Muralidharan 2015-01-15  661  	dma_addr_t dma_desc, dma_buff;
84640e27 Karicheri, Muralidharan 2015-01-15  662  	struct netcp_packet p_info;
84640e27 Karicheri, Muralidharan 2015-01-15  663  	struct sk_buff *skb;
84640e27 Karicheri, Muralidharan 2015-01-15  664  	void *org_buf_ptr;
69d707d0 Karicheri, Muralidharan 2017-01-06  665  	u32 tmp;
84640e27 Karicheri, Muralidharan 2015-01-15  666  
84640e27 Karicheri, Muralidharan 2015-01-15  667  	dma_desc = knav_queue_pop(netcp->rx_queue, &dma_sz);
84640e27 Karicheri, Muralidharan 2015-01-15  668  	if (!dma_desc)
84640e27 Karicheri, Muralidharan 2015-01-15  669  		return -1;
84640e27 Karicheri, Muralidharan 2015-01-15  670  
84640e27 Karicheri, Muralidharan 2015-01-15  671  	desc = knav_pool_desc_unmap(netcp->rx_pool, dma_desc, dma_sz);
84640e27 Karicheri, Muralidharan 2015-01-15  672  	if (unlikely(!desc)) {
84640e27 Karicheri, Muralidharan 2015-01-15  673  		dev_err(netcp->ndev_dev, "failed to unmap Rx desc\n");
84640e27 Karicheri, Muralidharan 2015-01-15  674  		return 0;
84640e27 Karicheri, Muralidharan 2015-01-15  675  	}
84640e27 Karicheri, Muralidharan 2015-01-15  676  
84640e27 Karicheri, Muralidharan 2015-01-15  677  	get_pkt_info(&dma_buff, &buf_len, &dma_desc, desc);
06324481 Karicheri, Muralidharan 2016-02-19  678  	/* warning!!!! We are retrieving the virtual ptr in the sw_data
06324481 Karicheri, Muralidharan 2016-02-19  679  	 * field as a 32bit value. Will not work on 64bit machines
06324481 Karicheri, Muralidharan 2016-02-19  680  	 */
06324481 Karicheri, Muralidharan 2016-02-19  681  	org_buf_ptr = (void *)GET_SW_DATA0(desc);
06324481 Karicheri, Muralidharan 2016-02-19  682  	org_buf_len = (int)GET_SW_DATA1(desc);
84640e27 Karicheri, Muralidharan 2015-01-15  683  
84640e27 Karicheri, Muralidharan 2015-01-15  684  	if (unlikely(!org_buf_ptr)) {
84640e27 Karicheri, Muralidharan 2015-01-15  685  		dev_err(netcp->ndev_dev, "NULL bufptr in desc\n");
84640e27 Karicheri, Muralidharan 2015-01-15  686  		goto free_desc;
84640e27 Karicheri, Muralidharan 2015-01-15  687  	}
84640e27 Karicheri, Muralidharan 2015-01-15  688  
84640e27 Karicheri, Muralidharan 2015-01-15  689  	pkt_sz &= KNAV_DMA_DESC_PKT_LEN_MASK;
84640e27 Karicheri, Muralidharan 2015-01-15  690  	accum_sz = buf_len;
84640e27 Karicheri, Muralidharan 2015-01-15  691  	dma_unmap_single(netcp->dev, dma_buff, buf_len, DMA_FROM_DEVICE);
84640e27 Karicheri, Muralidharan 2015-01-15  692  
84640e27 Karicheri, Muralidharan 2015-01-15  693  	/* Build a new sk_buff for the primary buffer */
84640e27 Karicheri, Muralidharan 2015-01-15  694  	skb = build_skb(org_buf_ptr, org_buf_len);
84640e27 Karicheri, Muralidharan 2015-01-15  695  	if (unlikely(!skb)) {
84640e27 Karicheri, Muralidharan 2015-01-15  696  		dev_err(netcp->ndev_dev, "build_skb() failed\n");
84640e27 Karicheri, Muralidharan 2015-01-15  697  		goto free_desc;
84640e27 Karicheri, Muralidharan 2015-01-15  698  	}
84640e27 Karicheri, Muralidharan 2015-01-15  699  
84640e27 Karicheri, Muralidharan 2015-01-15  700  	/* update data, tail and len */
84640e27 Karicheri, Muralidharan 2015-01-15  701  	skb_reserve(skb, NETCP_SOP_OFFSET);
84640e27 Karicheri, Muralidharan 2015-01-15  702  	__skb_put(skb, buf_len);
84640e27 Karicheri, Muralidharan 2015-01-15  703  
84640e27 Karicheri, Muralidharan 2015-01-15  704  	/* Fill in the page fragment list */
84640e27 Karicheri, Muralidharan 2015-01-15  705  	while (dma_desc) {
84640e27 Karicheri, Muralidharan 2015-01-15  706  		struct page *page;
84640e27 Karicheri, Muralidharan 2015-01-15  707  
84640e27 Karicheri, Muralidharan 2015-01-15  708  		ndesc = knav_pool_desc_unmap(netcp->rx_pool, dma_desc, dma_sz);
84640e27 Karicheri, Muralidharan 2015-01-15  709  		if (unlikely(!ndesc)) {
84640e27 Karicheri, Muralidharan 2015-01-15  710  			dev_err(netcp->ndev_dev, "failed to unmap Rx desc\n");
84640e27 Karicheri, Muralidharan 2015-01-15  711  			goto free_desc;
84640e27 Karicheri, Muralidharan 2015-01-15  712  		}
84640e27 Karicheri, Muralidharan 2015-01-15  713  
84640e27 Karicheri, Muralidharan 2015-01-15  714  		get_pkt_info(&dma_buff, &buf_len, &dma_desc, ndesc);
06324481 Karicheri, Muralidharan 2016-02-19  715  		/* warning!!!! We are retrieving the virtual ptr in the sw_data
06324481 Karicheri, Muralidharan 2016-02-19  716  		 * field as a 32bit value. Will not work on 64bit machines
06324481 Karicheri, Muralidharan 2016-02-19  717  		 */
5a717843 Rex Chang               2018-01-16 @718  		page = (struct page *)GET_SW_DATA0(ndesc);
84640e27 Karicheri, Muralidharan 2015-01-15  719  
84640e27 Karicheri, Muralidharan 2015-01-15  720  		if (likely(dma_buff && buf_len && page)) {
84640e27 Karicheri, Muralidharan 2015-01-15  721  			dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE,
84640e27 Karicheri, Muralidharan 2015-01-15  722  				       DMA_FROM_DEVICE);
84640e27 Karicheri, Muralidharan 2015-01-15  723  		} else {
89907779 Arnd Bergmann           2015-12-08  724  			dev_err(netcp->ndev_dev, "Bad Rx desc dma_buff(%pad), len(%d), page(%p)\n",
89907779 Arnd Bergmann           2015-12-08  725  				&dma_buff, buf_len, page);
84640e27 Karicheri, Muralidharan 2015-01-15  726  			goto free_desc;
84640e27 Karicheri, Muralidharan 2015-01-15  727  		}
84640e27 Karicheri, Muralidharan 2015-01-15  728  
84640e27 Karicheri, Muralidharan 2015-01-15  729  		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
84640e27 Karicheri, Muralidharan 2015-01-15  730  				offset_in_page(dma_buff), buf_len, PAGE_SIZE);
84640e27 Karicheri, Muralidharan 2015-01-15  731  		accum_sz += buf_len;
84640e27 Karicheri, Muralidharan 2015-01-15  732  
84640e27 Karicheri, Muralidharan 2015-01-15  733  		/* Free the descriptor */
84640e27 Karicheri, Muralidharan 2015-01-15  734  		knav_pool_desc_put(netcp->rx_pool, ndesc);
84640e27 Karicheri, Muralidharan 2015-01-15  735  	}
84640e27 Karicheri, Muralidharan 2015-01-15  736  
84640e27 Karicheri, Muralidharan 2015-01-15  737  	/* check for packet len and warn */
84640e27 Karicheri, Muralidharan 2015-01-15  738  	if (unlikely(pkt_sz != accum_sz))
84640e27 Karicheri, Muralidharan 2015-01-15  739  		dev_dbg(netcp->ndev_dev, "mismatch in packet size(%d) & sum of fragments(%d)\n",
84640e27 Karicheri, Muralidharan 2015-01-15  740  			pkt_sz, accum_sz);
84640e27 Karicheri, Muralidharan 2015-01-15  741  
4cd85a61 Karicheri, Muralidharan 2017-01-06  742  	/* Newer version of the Ethernet switch can trim the Ethernet FCS
4cd85a61 Karicheri, Muralidharan 2017-01-06  743  	 * from the packet and is indicated in hw_cap. So trim it only for
4cd85a61 Karicheri, Muralidharan 2017-01-06  744  	 * older h/w
4cd85a61 Karicheri, Muralidharan 2017-01-06  745  	 */
4cd85a61 Karicheri, Muralidharan 2017-01-06  746  	if (!(netcp->hw_cap & ETH_SW_CAN_REMOVE_ETH_FCS))
84640e27 Karicheri, Muralidharan 2015-01-15  747  		__pskb_trim(skb, skb->len - ETH_FCS_LEN);
84640e27 Karicheri, Muralidharan 2015-01-15  748  
84640e27 Karicheri, Muralidharan 2015-01-15  749  	/* Call each of the RX hooks */
84640e27 Karicheri, Muralidharan 2015-01-15  750  	p_info.skb = skb;
6246168b WingMan Kwok            2016-12-08  751  	skb->dev = netcp->ndev;
84640e27 Karicheri, Muralidharan 2015-01-15  752  	p_info.rxtstamp_complete = false;
69d707d0 Karicheri, Muralidharan 2017-01-06  753  	get_desc_info(&tmp, &p_info.eflags, desc);
69d707d0 Karicheri, Muralidharan 2017-01-06  754  	p_info.epib = desc->epib;
69d707d0 Karicheri, Muralidharan 2017-01-06  755  	p_info.psdata = (u32 __force *)desc->psdata;
69d707d0 Karicheri, Muralidharan 2017-01-06  756  	p_info.eflags = ((p_info.eflags >> KNAV_DMA_DESC_EFLAGS_SHIFT) &
69d707d0 Karicheri, Muralidharan 2017-01-06  757  			 KNAV_DMA_DESC_EFLAGS_MASK);
84640e27 Karicheri, Muralidharan 2015-01-15  758  	list_for_each_entry(rx_hook, &netcp->rxhook_list_head, list) {
84640e27 Karicheri, Muralidharan 2015-01-15  759  		int ret;
84640e27 Karicheri, Muralidharan 2015-01-15  760  
84640e27 Karicheri, Muralidharan 2015-01-15  761  		ret = rx_hook->hook_rtn(rx_hook->order, rx_hook->hook_data,
84640e27 Karicheri, Muralidharan 2015-01-15  762  					&p_info);
84640e27 Karicheri, Muralidharan 2015-01-15  763  		if (unlikely(ret)) {
84640e27 Karicheri, Muralidharan 2015-01-15  764  			dev_err(netcp->ndev_dev, "RX hook %d failed: %d\n",
84640e27 Karicheri, Muralidharan 2015-01-15  765  				rx_hook->order, ret);
69d707d0 Karicheri, Muralidharan 2017-01-06  766  			/* Free the primary descriptor */
6a8162e9 Michael Scherban        2017-01-06  767  			rx_stats->rx_dropped++;
69d707d0 Karicheri, Muralidharan 2017-01-06  768  			knav_pool_desc_put(netcp->rx_pool, desc);
84640e27 Karicheri, Muralidharan 2015-01-15  769  			dev_kfree_skb(skb);
84640e27 Karicheri, Muralidharan 2015-01-15  770  			return 0;
84640e27 Karicheri, Muralidharan 2015-01-15  771  		}
84640e27 Karicheri, Muralidharan 2015-01-15  772  	}
69d707d0 Karicheri, Muralidharan 2017-01-06  773  	/* Free the primary descriptor */
69d707d0 Karicheri, Muralidharan 2017-01-06  774  	knav_pool_desc_put(netcp->rx_pool, desc);
84640e27 Karicheri, Muralidharan 2015-01-15  775  
6a8162e9 Michael Scherban        2017-01-06  776  	u64_stats_update_begin(&rx_stats->syncp_rx);
6a8162e9 Michael Scherban        2017-01-06  777  	rx_stats->rx_packets++;
6a8162e9 Michael Scherban        2017-01-06  778  	rx_stats->rx_bytes += skb->len;
6a8162e9 Michael Scherban        2017-01-06  779  	u64_stats_update_end(&rx_stats->syncp_rx);
84640e27 Karicheri, Muralidharan 2015-01-15  780  
84640e27 Karicheri, Muralidharan 2015-01-15  781  	/* push skb up the stack */
84640e27 Karicheri, Muralidharan 2015-01-15  782  	skb->protocol = eth_type_trans(skb, netcp->ndev);
84640e27 Karicheri, Muralidharan 2015-01-15  783  	netif_receive_skb(skb);
84640e27 Karicheri, Muralidharan 2015-01-15  784  	return 0;
84640e27 Karicheri, Muralidharan 2015-01-15  785  
84640e27 Karicheri, Muralidharan 2015-01-15  786  free_desc:
84640e27 Karicheri, Muralidharan 2015-01-15  787  	netcp_free_rx_desc_chain(netcp, desc);
6a8162e9 Michael Scherban        2017-01-06  788  	rx_stats->rx_errors++;
84640e27 Karicheri, Muralidharan 2015-01-15  789  	return 0;
84640e27 Karicheri, Muralidharan 2015-01-15  790  }
84640e27 Karicheri, Muralidharan 2015-01-15  791  

:::::: The code at line 613 was first introduced by commit
:::::: 0632448134d0ac1450a19d26f90948fde3b558ad net: netcp: rework the code for get/set sw_data in dma desc

:::::: TO: Karicheri, Muralidharan <m-karicheri2@ti.com>
:::::: CC: David S. Miller <davem@davemloft.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 50018 bytes --]

^ permalink raw reply

* Re: [PATCH iproute2] ip link: Do not call ll_name_to_index when creating a new link
From: Stephen Hemminger @ 2018-05-18 22:08 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev
In-Reply-To: <be2fd550-2276-9127-b481-9b8062e31b96@gmail.com>

On Thu, 17 May 2018 18:17:12 -0600
David Ahern <dsahern@gmail.com> wrote:

> On 5/17/18 4:36 PM, Stephen Hemminger wrote:
> > On Thu, 17 May 2018 16:22:37 -0600
> > dsahern@kernel.org wrote:
> >   
> >> From: David Ahern <dsahern@gmail.com>
> >>
> >> Using iproute2 to create a bridge and add 4094 vlans to it can take from
> >> 2 to 3 *minutes*. The reason is the extraneous call to ll_name_to_index.
> >> ll_name_to_index results in an ioctl(SIOCGIFINDEX) call which in turn
> >> invokes dev_load. If the index does not exist, which it won't when
> >> creating a new link, dev_load calls modprobe twice -- once for
> >> netdev-NAME and again for NAME. This is unnecessary overhead for each
> >> link create.
> >>
> >> When ip link is invoked for a new device, there is no reason to
> >> call ll_name_to_index for the new device. With this patch, creating
> >> a bridge and adding 4094 vlans takes less than 3 *seconds*.
> >>
> >> Signed-off-by: David Ahern <dsahern@gmail.com>  
> > 
> > Yes this looks like a real problem.
> > Isn't the cache supposed to reduce this?
> > 
> > Don't like to make lots of special case flags.
> >   
> 
> The device does not exist, so it won't be in any cache. ll_name_to_index
> already checks it though before calling if_nametoindex.

Good point, I just don't like adding more conditional paths in a function
it is a common source of errors.

What about just pushing the lookup down to the leaf functions that need it?

diff --git a/ip/ip_common.h b/ip/ip_common.h
index 1b89795caa58..49eb7d7bed40 100644
--- a/ip/ip_common.h
+++ b/ip/ip_common.h
@@ -36,7 +36,7 @@ int print_addrlabel(const struct sockaddr_nl *who,
 int print_neigh(const struct sockaddr_nl *who,
 		struct nlmsghdr *n, void *arg);
 int ipaddr_list_link(int argc, char **argv);
-void ipaddr_get_vf_rate(int, int *, int *, int);
+void ipaddr_get_vf_rate(int, int *, int *, const char *);
 void iplink_usage(void) __attribute__((noreturn));
 
 void iproute_reset_filter(int ifindex);
@@ -145,7 +145,7 @@ int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp);
 void lwt_print_encap(FILE *fp, struct rtattr *encap_type, struct rtattr *encap);
 
 /* iplink_xdp.c */
-int xdp_parse(int *argc, char ***argv, struct iplink_req *req, __u32 ifindex,
+int xdp_parse(int *argc, char ***argv, struct iplink_req *req, const char *ifname,
 	      bool generic, bool drv, bool offload);
 void xdp_dump(FILE *fp, struct rtattr *tb, bool link, bool details);
 
diff --git a/ip/ipaddress.c b/ip/ipaddress.c
index 75539e057f6a..00da14c6f97c 100644
--- a/ip/ipaddress.c
+++ b/ip/ipaddress.c
@@ -1967,14 +1967,20 @@ ipaddr_loop_each_vf(struct rtattr *tb[], int vfnum, int *min, int *max)
 	exit(1);
 }
 
-void ipaddr_get_vf_rate(int vfnum, int *min, int *max, int idx)
+void ipaddr_get_vf_rate(int vfnum, int *min, int *max, const char *dev)
 {
 	struct nlmsg_chain linfo = { NULL, NULL};
 	struct rtattr *tb[IFLA_MAX+1];
 	struct ifinfomsg *ifi;
 	struct nlmsg_list *l;
 	struct nlmsghdr *n;
-	int len;
+	int idx, len;
+
+	idx = ll_name_to_index(dev);
+	if (idx == 0) {
+		fprintf(stderr, "Device %s does not exist\n", dev);
+		exit(1);
+	}
 
 	if (rtnl_wilddump_request(&rth, AF_UNSPEC, RTM_GETLINK) < 0) {
 		perror("Cannot send dump request");
diff --git a/ip/iplink.c b/ip/iplink.c
index 22afe0221f3c..9ff5f692a1d4 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -242,9 +242,10 @@ static int iplink_have_newlink(void)
 }
 #endif /* ! IPLINK_IOCTL_COMPAT */
 
-static int nl_get_ll_addr_len(unsigned int dev_index)
+static int nl_get_ll_addr_len(const char *ifname)
 {
 	int len;
+	int dev_index = ll_name_to_index(ifname);
 	struct iplink_req req = {
 		.n = {
 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
@@ -259,6 +260,9 @@ static int nl_get_ll_addr_len(unsigned int dev_index)
 	struct nlmsghdr *answer;
 	struct rtattr *tb[IFLA_MAX+1];
 
+	if (dev_index == 0)
+		return -1;
+
 	if (rtnl_talk(&rth, &req.n, &answer) < 0)
 		return -1;
 
@@ -337,7 +341,7 @@ static void iplink_parse_vf_vlan_info(int vf, int *argcp, char ***argvp,
 }
 
 static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
-			   struct iplink_req *req, int dev_index)
+			   struct iplink_req *req, const char *dev)
 {
 	char new_rate_api = 0, count = 0, override_legacy_rate = 0;
 	struct ifla_vf_rate tivt;
@@ -373,7 +377,7 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
 		NEXT_ARG();
 		if (matches(*argv, "mac") == 0) {
 			struct ifla_vf_mac ivm = { 0 };
-			int halen = nl_get_ll_addr_len(dev_index);
+			int halen = nl_get_ll_addr_len(dev);
 
 			NEXT_ARG();
 			ivm.vf = vf;
@@ -542,7 +546,7 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
 		int tmin, tmax;
 
 		if (tivt.min_tx_rate == -1 || tivt.max_tx_rate == -1) {
-			ipaddr_get_vf_rate(tivt.vf, &tmin, &tmax, dev_index);
+			ipaddr_get_vf_rate(tivt.vf, &tmin, &tmax, dev);
 			if (tivt.min_tx_rate == -1)
 				tivt.min_tx_rate = tmin;
 			if (tivt.max_tx_rate == -1)
@@ -583,7 +587,6 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char **type)
 	int vf = -1;
 	int numtxqueues = -1;
 	int numrxqueues = -1;
-	int dev_index = 0;
 	int link_netnsid = -1;
 	int index = 0;
 	int group = -1;
@@ -605,10 +608,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char **type)
 			if (check_ifname(*argv))
 				invarg("\"name\" not a valid ifname", *argv);
 			name = *argv;
-			if (!dev) {
+			if (!dev)
 				dev = name;
-				dev_index = ll_name_to_index(dev);
-			}
 		} else if (strcmp(*argv, "index") == 0) {
 			NEXT_ARG();
 			if (index)
@@ -660,7 +661,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char **type)
 			bool offload = strcmp(*argv, "xdpoffload") == 0;
 
 			NEXT_ARG();
-			if (xdp_parse(&argc, &argv, req, dev_index,
+			if (xdp_parse(&argc, &argv, req, dev,
 				      generic, drv, offload))
 				exit(-1);
 
@@ -750,10 +751,10 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char **type)
 
 			vflist = addattr_nest(&req->n, sizeof(*req),
 					      IFLA_VFINFO_LIST);
-			if (dev_index == 0)
+			if (!dev)
 				missarg("dev");
 
-			len = iplink_parse_vf(vf, &argc, &argv, req, dev_index);
+			len = iplink_parse_vf(vf, &argc, &argv, req, dev);
 			if (len < 0)
 				return -1;
 			addattr_nest_end(&req->n, vflist);
@@ -916,7 +917,6 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char **type)
 			if (check_ifname(*argv))
 				invarg("\"dev\" not a valid ifname", *argv);
 			dev = *argv;
-			dev_index = ll_name_to_index(dev);
 		}
 		argc--; argv++;
 	}
@@ -931,8 +931,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char **type)
 	else if (!strcmp(name, dev))
 		name = dev;
 
-	if (dev_index && addr_len) {
-		int halen = nl_get_ll_addr_len(dev_index);
+	if (dev && addr_len) {
+		int halen = nl_get_ll_addr_len(dev);
 
 		if (halen >= 0 && halen != addr_len) {
 			fprintf(stderr,
diff --git a/ip/iplink_xdp.c b/ip/iplink_xdp.c
index 83826358aa22..dd4fd1fd3a3b 100644
--- a/ip/iplink_xdp.c
+++ b/ip/iplink_xdp.c
@@ -48,8 +48,8 @@ static int xdp_delete(struct xdp_req *xdp)
 	return 0;
 }
 
-int xdp_parse(int *argc, char ***argv, struct iplink_req *req, __u32 ifindex,
-	      bool generic, bool drv, bool offload)
+int xdp_parse(int *argc, char ***argv, struct iplink_req *req,
+	      const char *ifname, bool generic, bool drv, bool offload)
 {
 	struct bpf_cfg_in cfg = {
 		.type = BPF_PROG_TYPE_XDP,
@@ -61,6 +61,8 @@ int xdp_parse(int *argc, char ***argv, struct iplink_req *req, __u32 ifindex,
 	};
 
 	if (offload) {
+		int ifindex = ll_name_to_index(ifname);
+
 		if (!ifindex)
 			incomplete_command();
 		cfg.ifindex = ifindex;

^ permalink raw reply related

* Re: [PATCH bpf-next v2 7/7] tools/bpftool: add perf subcommand
From: Y Song @ 2018-05-18 22:13 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Yonghong Song, Quentin Monnet, peterz, Alexei Starovoitov,
	Daniel Borkmann, netdev, kernel-team
In-Reply-To: <20180518135127.1f886b67@cakuba>

On Fri, May 18, 2018 at 1:51 PM, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
> On Thu, 17 May 2018 22:03:10 -0700, Yonghong Song wrote:
>> The new command "bpftool perf [show | list]" will traverse
>> all processes under /proc, and if any fd is associated
>> with a perf event, it will print out related perf event
>> information. Documentation is also added.
>
> Thanks for the changes, it looks good with some minor nits which can be
> addressed as follow up if there is no other need to respin.  Please
> consider it:
>
> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>

Most likely will need respin. Will make suggested changes then.

>
>> Below is an example to show the results using bcc commands.
>> Running the following 4 bcc commands:
>>   kprobe:     trace.py '__x64_sys_nanosleep'
>>   kretprobe:  trace.py 'r::__x64_sys_nanosleep'
>>   tracepoint: trace.py 't:syscalls:sys_enter_nanosleep'
>>   uprobe:     trace.py 'p:/home/yhs/a.out:main'
>>
>> The bpftool command line and result:
>>
>>   $ bpftool perf
>>   pid 21711  fd 5: prog_id 5  kprobe  func __x64_sys_write  offset 0
>>   pid 21765  fd 5: prog_id 7  kretprobe  func __x64_sys_nanosleep  offset 0
>>   pid 21767  fd 5: prog_id 8  tracepoint  sys_enter_nanosleep
>>   pid 21800  fd 5: prog_id 9  uprobe  filename /home/yhs/a.out  offset 1159
>>
>>   $ bpftool -j perf
>>   {"pid":21711,"fd":5,"prog_id":5,"attach_info":"kprobe","func":"__x64_sys_write","offset":0}, \
>>   {"pid":21765,"fd":5,"prog_id":7,"attach_info":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
>>   {"pid":21767,"fd":5,"prog_id":8,"attach_info":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
>>   {"pid":21800,"fd":5,"prog_id":9,"attach_info":"uprobe","filename":"/home/yhs/a.out","offset":1159}
>
> nit: this is now an array

Sorry, this is probably updated in middle of work. Will make the change in
the next revision.

>
>>   $ bpftool prog
>>   5: kprobe  name probe___x64_sys  tag e495a0c82f2c7a8d  gpl
>>         loaded_at 2018-05-15T04:46:37-0700  uid 0
>>         xlated 200B  not jited  memlock 4096B  map_ids 4
>>   7: kprobe  name probe___x64_sys  tag f2fdee479a503abf  gpl
>>         loaded_at 2018-05-15T04:48:32-0700  uid 0
>>         xlated 200B  not jited  memlock 4096B  map_ids 7
>>   8: tracepoint  name tracepoint__sys  tag 5390badef2395fcf  gpl
>>         loaded_at 2018-05-15T04:48:48-0700  uid 0
>>         xlated 200B  not jited  memlock 4096B  map_ids 8
>>   9: kprobe  name probe_main_1  tag 0a87bdc2e2953b6d  gpl
>>         loaded_at 2018-05-15T04:49:52-0700  uid 0
>>         xlated 200B  not jited  memlock 4096B  map_ids 9
>>
>>   $ ps ax | grep "python ./trace.py"
>>   21711 pts/0    T      0:03 python ./trace.py __x64_sys_write
>>   21765 pts/0    S+     0:00 python ./trace.py r::__x64_sys_nanosleep
>>   21767 pts/2    S+     0:00 python ./trace.py t:syscalls:sys_enter_nanosleep
>>   21800 pts/3    S+     0:00 python ./trace.py p:/home/yhs/a.out:main
>>   22374 pts/1    S+     0:00 grep --color=auto python ./trace.py
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>
>> diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
>> index b301c9b..3680ad4 100644
>> --- a/tools/bpf/bpftool/bash-completion/bpftool
>> +++ b/tools/bpf/bpftool/bash-completion/bpftool
>> @@ -448,6 +448,15 @@ _bpftool()
>>                      ;;
>>              esac
>>              ;;
>> +        cgroup)
>
> s/cgroup/perf/ :)

A mistake in my side to consolidate different version of code.
I did have "perf" in one of my versions and tested it properly.

>
>> +            case $command in
>> +                *)
>> +                    [[ $prev == $object ]] && \
>> +                        COMPREPLY=( $( compgen -W 'help \
>> +                            show list' -- "$cur" ) )
>> +                    ;;
>> +            esac
>> +            ;;
>>      esac
>>  } &&
>>  complete -F _bpftool bpftool
>
>> +static int show_proc(const char *fpath, const struct stat *sb,
>> +                  int tflag, struct FTW *ftwbuf)
>> +{
>> +     __u64 probe_offset, probe_addr;
>> +     __u32 prog_id, attach_info;
>> +     int err, pid = 0, fd = 0;
>> +     const char *pch;
>> +     char buf[4096];
>> +
>> +     /* prefix always /proc */
>> +     pch = fpath + 5;
>> +     if (*pch == '\0')
>> +             return 0;
>> +
>> +     /* pid should be all numbers */
>> +     pch++;
>> +     while (isdigit(*pch)) {
>> +             pid = pid * 10 + *pch - '0';
>> +             pch++;
>> +     }
>> +     if (*pch == '\0')
>> +             return 0;
>> +     if (*pch != '/')
>> +             return FTW_SKIP_SUBTREE;
>> +
>> +     /* check /proc/<pid>/fd directory */
>> +     pch++;
>> +     if (strncmp(pch, "fd", 2))
>> +             return FTW_SKIP_SUBTREE;
>> +     pch += 2;
>> +     if (*pch == '\0')
>> +             return 0;
>> +     if (*pch != '/')
>> +             return FTW_SKIP_SUBTREE;
>> +
>> +     /* check /proc/<pid>/fd/<fd_num> */
>> +     pch++;
>> +     while (isdigit(*pch)) {
>> +             fd = fd * 10 + *pch - '0';
>> +             pch++;
>> +     }
>> +     if (*pch != '\0')
>> +             return FTW_SKIP_SUBTREE;
>> +
>> +     /* query (pid, fd) for potential perf events */
>> +     err = bpf_task_fd_query(pid, fd, 0, buf, sizeof(buf), &prog_id,
>> +                             &attach_info, &probe_offset, &probe_addr);
>> +     if (err < 0)
>> +             return 0;
>
> nit: it could be nice from user perspective to detect whether kernel
>      supports the command and fail if not.  Otherwise user is not sure
>      if there is no output because kernel lacks support or because
>      there were really no attached progs.  Just a thought, not really
>      a requirement.

I agree with you. it is good to output an error if the kernel does not
support the syscall (e.g., either non-root or new subcommand is not
supported).

>
>> +     if (json_output)
>> +             print_perf_json(pid, fd, prog_id, attach_info, buf, probe_offset,
>> +                             probe_addr);
>> +     else
>> +             print_perf_plain(pid, fd, prog_id, attach_info, buf, probe_offset,
>> +                              probe_addr);
>> +
>> +     return 0;
>> +}
>> +
>> +static int do_show(int argc, char **argv)
>> +{
>> +     int err = 0, nopenfd = 16;
>> +     int flags = FTW_ACTIONRETVAL | FTW_PHYS;
>
> nit: reverse xmas tree

Will make the change in the next revision.

>
>> +     if (json_output)
>> +             jsonw_start_array(json_wtr);
>> +     if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
>> +             p_err("%s", strerror(errno));
>> +             err = -1;
>> +     }
>> +     if (json_output)
>> +             jsonw_end_array(json_wtr);
>> +
>> +     return err;
>> +}

^ permalink raw reply

* [PATCH v2] selftests: net: reuseport_bpf_numa: don't fail if no numa support
From: Anders Roxell @ 2018-05-18 22:27 UTC (permalink / raw)
  To: davem, shuah; +Cc: netdev, linux-kselftest, linux-kernel, Anders Roxell
In-Reply-To: <20180306151004.31336-1-anders.roxell@linaro.org>

The reuseport_bpf_numa test case fails there's no numa support.  The
test shouldn't fail if there's no support it should be skipped.

Fixes: 3c2c3c16aaf6 ("reuseport, bpf: add test case for bpf_get_numa_node_id")
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
---
 tools/testing/selftests/net/reuseport_bpf_numa.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
index 365c32e84189..c9f478b40996 100644
--- a/tools/testing/selftests/net/reuseport_bpf_numa.c
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -23,6 +23,8 @@
 #include <unistd.h>
 #include <numa.h>
 
+#include "../kselftest.h"
+
 static const int PORT = 8888;
 
 static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
@@ -229,7 +231,7 @@ int main(void)
 	int *rcv_fd, nodes;
 
 	if (numa_available() < 0)
-		error(1, errno, "no numa api support");
+		ksft_exit_skip("no numa api support\n");
 
 	nodes = numa_max_node() + 1;
 
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH net-next v2 1/3] net: ethernet: ti: Allow most drivers with COMPILE_TEST
From: kbuild test robot @ 2018-05-18 22:32 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: kbuild-all, netdev, fugang.duan, Florian Fainelli,
	David S. Miller, Andrew Lunn, open list
In-Reply-To: <20180516185258.20508-2-f.fainelli@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1500 bytes --]

Hi Florian,

I love your patch! Yet something to improve:

[auto build test ERROR on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Florian-Fainelli/net-ethernet-ti-Allow-most-drivers-with-COMPILE_TEST/20180519-043005
config: sparc64-allyesconfig (attached as .config)
compiler: sparc64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=sparc64 

All errors (new ones prefixed by >>):

   `.exit.data' referenced in section `.exit.text' of drivers/tty/n_hdlc.o: defined in discarded section `.exit.data' of drivers/tty/n_hdlc.o
   `.exit.data' referenced in section `.exit.text' of drivers/tty/n_hdlc.o: defined in discarded section `.exit.data' of drivers/tty/n_hdlc.o
   `.exit.data' referenced in section `.exit.text' of drivers/tty/n_hdlc.o: defined in discarded section `.exit.data' of drivers/tty/n_hdlc.o
   `.exit.data' referenced in section `.exit.text' of drivers/tty/n_hdlc.o: defined in discarded section `.exit.data' of drivers/tty/n_hdlc.o
   drivers/net/ethernet/ti/netcp_core.o: In function `netcp_txpipe_open':
>> netcp_core.c:(.text+0xc84): undefined reference to `knav_queue_open'

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 53402 bytes --]

^ permalink raw reply

* [PATCH iproute2] Allow to configure /var/run/netns directory
From: Pavel Maltsev @ 2018-05-18 22:44 UTC (permalink / raw)
  To: netdev, lorenzo, stephen, pavelm; +Cc: lorenzo, stephen, Pavel Maltsev

Currently NETNS_RUN_DIR is hardcoded and refers to /var/run/netns.
However, some systems (e.g. Android) doesn't have /var
which results in error attempts to create network namespaces on these
systems.  This change makes NETNS_RUN_DIR configurable at build time
by allowing to pass environment variable to make command.
Also, this change makes /etc/netns directory configurable through
NETNS_ETC_DIR environment variable.

For example: ./configure && NETNS_RUN_DIR=/mnt/vendor/netns make

Tested: verified that iproute2 with configuration mentioned above
creates namespaces in /mnt/vendor/netns

Signed-off-by: Pavel Maltsev <pavelm@google.com>
---
 Makefile            | 6 +++++-
 include/namespace.h | 5 +++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b526d3b5..651d2a50 100644
--- a/Makefile
+++ b/Makefile
@@ -16,6 +16,8 @@ PREFIX?=/usr
 LIBDIR?=$(PREFIX)/lib
 SBINDIR?=/sbin
 CONFDIR?=/etc/iproute2
+NETNS_RUN_DIR?=/var/run/netns
+NETNS_ETC_DIR?=/etc/netns
 DATADIR?=$(PREFIX)/share
 HDRDIR?=$(PREFIX)/include/iproute2
 DOCDIR?=$(DATADIR)/doc/iproute2
@@ -34,7 +36,9 @@ ifneq ($(SHARED_LIBS),y)
 DEFINES+= -DNO_SHARED_LIBS
 endif
 
-DEFINES+=-DCONFDIR=\"$(CONFDIR)\"
+DEFINES+=-DCONFDIR=\"$(CONFDIR)\" \
+         -DNETNS_RUN_DIR=\"$(NETNS_RUN_DIR)\" \
+         -DNETNS_ETC_DIR=\"$(NETNS_ETC_DIR)\"
 
 #options for decnet
 ADDLIB+=dnet_ntop.o dnet_pton.o
diff --git a/include/namespace.h b/include/namespace.h
index aed7ce08..e47f9b5d 100644
--- a/include/namespace.h
+++ b/include/namespace.h
@@ -8,8 +8,13 @@
 #include <sys/syscall.h>
 #include <errno.h>
 
+#ifndef NETNS_RUN_DIR
 #define NETNS_RUN_DIR "/var/run/netns"
+#endif
+
+#ifndef NETNS_ETC_DIR
 #define NETNS_ETC_DIR "/etc/netns"
+#endif
 
 #ifndef CLONE_NEWNET
 #define CLONE_NEWNET 0x40000000	/* New network namespace (lo, device, names sockets, etc) */
-- 
2.17.0.441.gb46fe60e1d-goog

^ permalink raw reply related

* Re: [PATCH iproute2] Allow to configure /var/run/netns directory
From: Pavel Maltsev @ 2018-05-18 22:48 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, Lorenzo Colitti
In-Reply-To: <20180518145306.1e7632b0@xeon-e3>

Thanks, Stephen,

I've uploaded new patch as you suggested by putting these
variables in the makefile rather than configure script.

On Fri, May 18, 2018 at 2:53 PM Stephen Hemminger
<stephen@networkplumber.org> wrote:
>
> On Tue, 15 May 2018 14:49:46 -0700
> Pavel Maltsev <pavelm@google.com> wrote:
>
> > Currently NETNS_RUN_DIR is hardcoded and refers to /var/run/netns.
> > However, some systems (e.g. Android) doesn't have /var
> > which results in error attempts to create network namespaces on these
> > systems.  This change makes NETNS_RUN_DIR configurable at build time
> > by allowing to pass environment variable to configre script.
> >
> > For example: NETNS_RUN_DIR=/mnt/vendor/netns ./configure && make
> >
> > Tested: verified that iproute2 with configuration mentioned above
> > creates namespaces in /mnt/vendor/netns
> >
> > Signed-off-by: Pavel Maltsev <pavelm@google.com>
>
> The directory path should definitely be overrideable on the build.
> The configure script is already messy enough, lets do it instead like
> the other runtime directories are already done ARPDDIR and CONFDIR.
>
> Something like?
>
> diff --git a/Makefile b/Makefile
> index b526d3b5b5c4..ab828669e711 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -16,6 +16,7 @@ PREFIX?=/usr
>  LIBDIR?=$(PREFIX)/lib
>  SBINDIR?=/sbin
>  CONFDIR?=/etc/iproute2
> +NETNS_RUN_DIR?=/var/run/netns
>  DATADIR?=$(PREFIX)/share
>  HDRDIR?=$(PREFIX)/include/iproute2
>  DOCDIR?=$(DATADIR)/doc/iproute2
> @@ -34,7 +35,7 @@ ifneq ($(SHARED_LIBS),y)
>  DEFINES+= -DNO_SHARED_LIBS
>  endif
>
> -DEFINES+=-DCONFDIR=\"$(CONFDIR)\"
> +DEFINES+=-DCONFDIR=\"$(CONFDIR)\" -DNETNS_RUN_DIR=\"$(NETNS_RUN_DIR)\"
>
>  #options for decnet
>  ADDLIB+=dnet_ntop.o dnet_pton.o
> diff --git a/include/namespace.h b/include/namespace.h
> index aed7ce08507f..e47f9b5d49d1 100644
> --- a/include/namespace.h
> +++ b/include/namespace.h
> @@ -8,8 +8,13 @@
>  #include <sys/syscall.h>
>  #include <errno.h>
>
> +#ifndef NETNS_RUN_DIR
>  #define NETNS_RUN_DIR "/var/run/netns"
> +#endif
> +
> +#ifndef NETNS_ETC_DIR
>  #define NETNS_ETC_DIR "/etc/netns"
> +#endif
>
>  #ifndef CLONE_NEWNET
>  #define CLONE_NEWNET 0x40000000        /* New network namespace (lo, device, names sockets, etc) */
>

^ permalink raw reply

* general protection fault in smc_ioctl
From: syzbot @ 2018-05-18 23:25 UTC (permalink / raw)
  To: davem, linux-kernel, linux-s390, netdev, syzkaller-bugs, ubraun

Hello,

syzbot found the following crash on:

HEAD commit:    1f7455c3912d tcp: tcp_rack_reo_wnd() can be static
git tree:       net-next
console output: https://syzkaller.appspot.com/x/log.txt?x=171a1337800000
kernel config:  https://syzkaller.appspot.com/x/.config?x=b632d8e2c2ab2c1
dashboard link: https://syzkaller.appspot.com/bug?extid=e6714328fda813fc670f
compiler:       gcc (GCC) 8.0.1 20180413 (experimental)
syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=15782d57800000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=108711a7800000

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+e6714328fda813fc670f@syzkaller.appspotmail.com

random: sshd: uninitialized urandom read (32 bytes read)
random: sshd: uninitialized urandom read (32 bytes read)
random: sshd: uninitialized urandom read (32 bytes read)
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
    (ftrace buffer empty)
Modules linked in:
CPU: 1 PID: 4559 Comm: syz-executor292 Not tainted 4.17.0-rc4+ #50
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011
RIP: 0010:smc_ioctl+0x3dc/0x9f0 net/smc/af_smc.c:1499
RSP: 0018:ffff8801ad22f770 EFLAGS: 00010202
RAX: dffffc0000000000 RBX: ffff8801ad0df7c0 RCX: ffffffff8741188f
RDX: 0000000000000004 RSI: ffffffff8741189e RDI: 0000000000000020
RBP: ffff8801ad22f9d0 R08: ffff8801ae87e6c0 R09: ffffed00363e1818
R10: ffffed00363e1818 R11: ffff8801b1f0c0c3 R12: 1ffff10035a45ef1
R13: 0000000020000080 R14: 0000000000000000 R15: 0000000000000000
FS:  00000000017b7880(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ffd1f18f038 CR3: 00000001ad044000 CR4: 00000000001406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
  sock_do_ioctl+0xe4/0x3e0 net/socket.c:957
  sock_ioctl+0x30d/0x680 net/socket.c:1081
  vfs_ioctl fs/ioctl.c:46 [inline]
  file_ioctl fs/ioctl.c:500 [inline]
  do_vfs_ioctl+0x1cf/0x16a0 fs/ioctl.c:684
  ksys_ioctl+0xa9/0xd0 fs/ioctl.c:701
  __do_sys_ioctl fs/ioctl.c:708 [inline]
  __se_sys_ioctl fs/ioctl.c:706 [inline]
  __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:706
  do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x43fca9
RSP: 002b:00007ffd1f073588 EFLAGS: 00000213 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fca9
RDX: 0000000020000080 RSI: 0000000000005411 RDI: 0000000000000003
RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8
R10: 00000000004002c8 R11: 0000000000000213 R12: 00000000004015d0
R13: 0000000000401660 R14: 0000000000000000 R15: 0000000000000000
Code: fa 48 c1 ea 03 80 3c 02 00 0f 85 7d 05 00 00 4c 8b b3 90 04 00 00 48  
b8 00 00 00 00 00 fc ff df 49 8d 7e 20 48 89 fa 48 c1 ea 03 <0f> b6 04 02  
84 c0 74 08 3c 03 0f 8e 47 05 00 00 45 8b 7e 20 4c
RIP: smc_ioctl+0x3dc/0x9f0 net/smc/af_smc.c:1499 RSP: ffff8801ad22f770
---[ end trace b586e1eb098f7714 ]---


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with  
syzbot.
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches

^ permalink raw reply

* Re: [PATCH iproute2] ip link: Do not call ll_name_to_index when creating a new link
From: David Ahern @ 2018-05-18 23:40 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev
In-Reply-To: <20180518150816.70901564@xeon-e3>

On 5/18/18 4:08 PM, Stephen Hemminger wrote:
> 
> What about just pushing the lookup down to the leaf functions that need it?
> 

That should work as well. You want to re-send a formal patch?

^ permalink raw reply

* [PATCH bpf-next 4/7] bpf: btf: Remove unused bits from uapi/linux/btf.h
From: Martin KaFai Lau @ 2018-05-19  0:16 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180519001650.4043980-1-kafai@fb.com>

This patch does the followings:
1. Limit BTF_MAX_TYPES and BTF_MAX_NAME_OFFSET to 64k.  We can
   raise it later.

2. Remove the BTF_TYPE_PARENT and BTF_STR_TBL_ELF_ID.  They are
   currently encoded at the highest bit of a u32.
   It is because the current use case does not require supporting
   parent type (i.e type_id referring to a type in another BTF file).
   It also does not support referring to a string in ELF.

   The BTF_TYPE_PARENT and BTF_STR_TBL_ELF_ID checks are replaced
   by BTF_TYPE_ID_CHECK and BTF_STR_OFFSET_CHECK which are
   defined in btf.c instead of uapi/linux/btf.h.

3. Limit the BTF_INFO_KIND from 5 bits to 4 bits which is enough.
   There is unused bits headroom if we ever needed it later.

4. The root bit in BTF_INFO is also removed because it is not
   used in the current use case.

The above can be added back later because the verifier
ensures the unused bits are zeros.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/uapi/linux/btf.h | 20 +++++---------------
 kernel/bpf/btf.c         | 34 +++++++++++++++++++++-------------
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 4fa479741a02..b89b56f2b099 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -22,28 +22,19 @@ struct btf_header {
 };
 
 /* Max # of type identifier */
-#define BTF_MAX_TYPE	0x7fffffff
+#define BTF_MAX_TYPE	0x0000ffff
 /* Max offset into the string section */
-#define BTF_MAX_NAME_OFFSET	0x7fffffff
+#define BTF_MAX_NAME_OFFSET	0x0000ffff
 /* Max # of struct/union/enum members or func args */
 #define BTF_MAX_VLEN	0xffff
 
-/* The type id is referring to a parent BTF */
-#define BTF_TYPE_PARENT(id)	(((id) >> 31) & 0x1)
-#define BTF_TYPE_ID(id)		((id) & BTF_MAX_TYPE)
-
-/* String is in the ELF string section */
-#define BTF_STR_TBL_ELF_ID(ref)	(((ref) >> 31) & 0x1)
-#define BTF_STR_OFFSET(ref)	((ref) & BTF_MAX_NAME_OFFSET)
-
 struct btf_type {
 	__u32 name_off;
 	/* "info" bits arrangement
 	 * bits  0-15: vlen (e.g. # of struct's members)
 	 * bits 16-23: unused
-	 * bits 24-28: kind (e.g. int, ptr, array...etc)
-	 * bits 29-30: unused
-	 * bits    31: root
+	 * bits 24-27: kind (e.g. int, ptr, array...etc)
+	 * bits 28-31: unused
 	 */
 	__u32 info;
 	/* "size" is used by INT, ENUM, STRUCT and UNION.
@@ -58,8 +49,7 @@ struct btf_type {
 	};
 };
 
-#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x1f)
-#define BTF_INFO_ISROOT(info)	(!!(((info) >> 24) & 0x80))
+#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x0f)
 #define BTF_INFO_VLEN(info)	((info) & 0xffff)
 
 #define BTF_KIND_UNKN		0	/* Unknown	*/
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index b4e48dae2240..5d1967d4fb62 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -163,13 +163,15 @@
 #define BITS_ROUNDUP_BYTES(bits) \
 	(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
 
+#define BTF_INFO_MASK 0x0f00ffff
+#define BTF_TYPE_ID_CHECK(type_id) ((type_id) <= BTF_MAX_TYPE)
+#define BTF_STR_OFFSET_CHECK(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
+
 /* 16MB for 64k structs and each has 16 members and
  * a few MB spaces for the string section.
  * The hard limit is S32_MAX.
  */
 #define BTF_MAX_SIZE (16 * 1024 * 1024)
-/* 64k. We can raise it later. The hard limit is S32_MAX. */
-#define BTF_MAX_NR_TYPES 65535
 
 #define for_each_member(i, struct_type, member)			\
 	for (i = 0, member = btf_type_member(struct_type);	\
@@ -422,16 +424,16 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
 
 static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
 {
-	return !BTF_STR_TBL_ELF_ID(offset) &&
-		BTF_STR_OFFSET(offset) < btf->hdr.str_len;
+	return BTF_STR_OFFSET_CHECK(offset) &&
+		offset < btf->hdr.str_len;
 }
 
 static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
 {
-	if (!BTF_STR_OFFSET(offset))
+	if (!offset)
 		return "(anon)";
-	else if (BTF_STR_OFFSET(offset) < btf->hdr.str_len)
-		return &btf->strings[BTF_STR_OFFSET(offset)];
+	else if (offset < btf->hdr.str_len)
+		return &btf->strings[offset];
 	else
 		return "(invalid-name-offset)";
 }
@@ -599,13 +601,13 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
 		struct btf_type **new_types;
 		u32 expand_by, new_size;
 
-		if (btf->types_size == BTF_MAX_NR_TYPES) {
+		if (btf->types_size == BTF_MAX_TYPE) {
 			btf_verifier_log(env, "Exceeded max num of types");
 			return -E2BIG;
 		}
 
 		expand_by = max_t(u32, btf->types_size >> 2, 16);
-		new_size = min_t(u32, BTF_MAX_NR_TYPES,
+		new_size = min_t(u32, BTF_MAX_TYPE,
 				 btf->types_size + expand_by);
 
 		new_types = kvzalloc(new_size * sizeof(*new_types),
@@ -1127,7 +1129,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
 
-	if (BTF_TYPE_PARENT(t->type)) {
+	if (!BTF_TYPE_ID_CHECK(t->type)) {
 		btf_verifier_log_type(env, t, "Invalid type_id");
 		return -EINVAL;
 	}
@@ -1334,12 +1336,12 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env,
 	/* Array elem type and index type cannot be in type void,
 	 * so !array->type and !array->index_type are not allowed.
 	 */
-	if (!array->type || BTF_TYPE_PARENT(array->type)) {
+	if (!array->type || !BTF_TYPE_ID_CHECK(array->type)) {
 		btf_verifier_log_type(env, t, "Invalid elem");
 		return -EINVAL;
 	}
 
-	if (!array->index_type || BTF_TYPE_PARENT(array->index_type)) {
+	if (!array->index_type || !BTF_TYPE_ID_CHECK(array->index_type)) {
 		btf_verifier_log_type(env, t, "Invalid index");
 		return -EINVAL;
 	}
@@ -1511,7 +1513,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
 		}
 
 		/* A member cannot be in type void */
-		if (!member->type || BTF_TYPE_PARENT(member->type)) {
+		if (!member->type || !BTF_TYPE_ID_CHECK(member->type)) {
 			btf_verifier_log_member(env, t, member,
 						"Invalid type_id");
 			return -EINVAL;
@@ -1764,6 +1766,12 @@ static s32 btf_check_meta(struct btf_verifier_env *env,
 	}
 	meta_left -= sizeof(*t);
 
+	if (t->info & ~BTF_INFO_MASK) {
+		btf_verifier_log(env, "[%u] Invalid btf_info:%x",
+				 env->log_type_id, t->info);
+		return -EINVAL;
+	}
+
 	if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX ||
 	    BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) {
 		btf_verifier_log(env, "[%u] Invalid kind:%u",
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 5/7] bpf: btf: Rename btf_key_id and btf_value_id in bpf_map_info
From: Martin KaFai Lau @ 2018-05-19  0:16 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180519001650.4043980-1-kafai@fb.com>

In "struct bpf_map_info", the name "btf_id", "btf_key_id" and "btf_value_id"
could cause confusion because the "id" of "btf_id" means the BPF obj id
given to the BTF object while
"btf_key_id" and "btf_value_id" means the BTF type id within
that BTF object.

To make it clear, btf_key_id and btf_value_id are
renamed to btf_key_type_id and btf_value_type_id.

Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/linux/bpf.h      |  4 ++--
 include/uapi/linux/bpf.h |  8 ++++----
 kernel/bpf/arraymap.c    |  2 +-
 kernel/bpf/syscall.c     | 18 +++++++++---------
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f6fe3c719ca8..1795eeee846c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -69,8 +69,8 @@ struct bpf_map {
 	u32 pages;
 	u32 id;
 	int numa_node;
-	u32 btf_key_id;
-	u32 btf_value_id;
+	u32 btf_key_type_id;
+	u32 btf_value_type_id;
 	struct btf *btf;
 	bool unpriv_array;
 	/* 55 bytes hole */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d94d333a8225..123ebe4b3662 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -284,8 +284,8 @@ union bpf_attr {
 		char	map_name[BPF_OBJ_NAME_LEN];
 		__u32	map_ifindex;	/* ifindex of netdev to create on */
 		__u32	btf_fd;		/* fd pointing to a BTF type data */
-		__u32	btf_key_id;	/* BTF type_id of the key */
-		__u32	btf_value_id;	/* BTF type_id of the value */
+		__u32	btf_key_type_id;	/* BTF type_id of the key */
+		__u32	btf_value_type_id;	/* BTF type_id of the value */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -2211,8 +2211,8 @@ struct bpf_map_info {
 	__u64 netns_dev;
 	__u64 netns_ino;
 	__u32 btf_id;
-	__u32 btf_key_id;
-	__u32 btf_value_id;
+	__u32 btf_key_type_id;
+	__u32 btf_value_type_id;
 } __attribute__((aligned(8)));
 
 struct bpf_btf_info {
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 0fd8d8f1a398..544e58f5f642 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -352,7 +352,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
 	}
 
 	seq_printf(m, "%u: ", *(u32 *)key);
-	btf_type_seq_show(map->btf, map->btf_value_id, value, m);
+	btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
 	seq_puts(m, "\n");
 
 	rcu_read_unlock();
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2b29ef84ded3..0b4c94551001 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -422,7 +422,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
 	return 0;
 }
 
-#define BPF_MAP_CREATE_LAST_FIELD btf_value_id
+#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
 {
@@ -457,10 +457,10 @@ static int map_create(union bpf_attr *attr)
 	atomic_set(&map->usercnt, 1);
 
 	if (bpf_map_support_seq_show(map) &&
-	    (attr->btf_key_id || attr->btf_value_id)) {
+	    (attr->btf_key_type_id || attr->btf_value_type_id)) {
 		struct btf *btf;
 
-		if (!attr->btf_key_id || !attr->btf_value_id) {
+		if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
 			err = -EINVAL;
 			goto free_map_nouncharge;
 		}
@@ -471,16 +471,16 @@ static int map_create(union bpf_attr *attr)
 			goto free_map_nouncharge;
 		}
 
-		err = map->ops->map_check_btf(map, btf, attr->btf_key_id,
-					      attr->btf_value_id);
+		err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
+					      attr->btf_value_type_id);
 		if (err) {
 			btf_put(btf);
 			goto free_map_nouncharge;
 		}
 
 		map->btf = btf;
-		map->btf_key_id = attr->btf_key_id;
-		map->btf_value_id = attr->btf_value_id;
+		map->btf_key_type_id = attr->btf_key_type_id;
+		map->btf_value_type_id = attr->btf_value_type_id;
 	}
 
 	err = security_bpf_map_alloc(map);
@@ -2013,8 +2013,8 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
 
 	if (map->btf) {
 		info.btf_id = btf_id(map->btf);
-		info.btf_key_id = map->btf_key_id;
-		info.btf_value_id = map->btf_value_id;
+		info.btf_key_type_id = map->btf_key_type_id;
+		info.btf_value_type_id = map->btf_value_type_id;
 	}
 
 	if (bpf_map_is_dev_bound(map)) {
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 2/7] bpf: btf: Change how section is supported in btf_header
From: Martin KaFai Lau @ 2018-05-19  0:16 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180519001650.4043980-1-kafai@fb.com>

There are currently unused section descriptions in the btf_header.  Those
sections are here to support future BTF use cases.  For example, the
func section (func_off) is to support function signature (e.g. the BPF
prog function signature).

Instead of spelling out all potential sections up-front in the btf_header.
This patch makes changes to btf_header such that extending it (e.g. adding
a section) is possible later.  The unused ones can be removed for now and
they can be added back later.

This patch:
1. adds a hdr_len to the btf_header.  It will allow adding
sections (and other info like parent_label and parent_name)
later.  The check is similar to the existing bpf_attr.
If a user passes in a longer hdr_len, the kernel
ensures the extra tailing bytes are 0.

2. allows the section order in the BTF object to be
different from its sec_off order in btf_header.

3. each sec_off is followed by a sec_len.  It must not have gap or
overlapping among sections.

The string section is ensured to be at the end due to the 4 bytes
alignment requirement of the type section.

The above changes will allow enough flexibility to
add new sections (and other info) to the btf_header later.

This patch also removes an unnecessary !err check
at the end of btf_parse().

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/uapi/linux/btf.h |   8 +-
 kernel/bpf/btf.c         | 207 +++++++++++++++++++++++++++++++++++------------
 2 files changed, 158 insertions(+), 57 deletions(-)

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index bcb56ee47014..4fa479741a02 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -12,15 +12,11 @@ struct btf_header {
 	__u16	magic;
 	__u8	version;
 	__u8	flags;
-
-	__u32	parent_label;
-	__u32	parent_name;
+	__u32	hdr_len;
 
 	/* All offsets are in bytes relative to the end of this header */
-	__u32	label_off;	/* offset of label section	*/
-	__u32	object_off;	/* offset of data object section*/
-	__u32	func_off;	/* offset of function section	*/
 	__u32	type_off;	/* offset of type section	*/
+	__u32	type_len;	/* length of type section	*/
 	__u32	str_off;	/* offset of string section	*/
 	__u32	str_len;	/* length of string section	*/
 };
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index ded10ab47b8a..536e5981ad8c 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -12,6 +12,7 @@
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/idr.h>
+#include <linux/sort.h>
 #include <linux/bpf_verifier.h>
 #include <linux/btf.h>
 
@@ -184,15 +185,13 @@ static DEFINE_IDR(btf_idr);
 static DEFINE_SPINLOCK(btf_idr_lock);
 
 struct btf {
-	union {
-		struct btf_header *hdr;
-		void *data;
-	};
+	void *data;
 	struct btf_type **types;
 	u32 *resolved_ids;
 	u32 *resolved_sizes;
 	const char *strings;
 	void *nohdr_data;
+	struct btf_header hdr;
 	u32 nr_types;
 	u32 types_size;
 	u32 data_size;
@@ -227,6 +226,12 @@ enum resolve_mode {
 };
 
 #define MAX_RESOLVE_DEPTH 32
+#define NR_SECS 2
+
+struct btf_sec_info {
+	u32 off;
+	u32 len;
+};
 
 struct btf_verifier_env {
 	struct btf *btf;
@@ -418,14 +423,14 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
 static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
 {
 	return !BTF_STR_TBL_ELF_ID(offset) &&
-		BTF_STR_OFFSET(offset) < btf->hdr->str_len;
+		BTF_STR_OFFSET(offset) < btf->hdr.str_len;
 }
 
 static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
 {
 	if (!BTF_STR_OFFSET(offset))
 		return "(anon)";
-	else if (BTF_STR_OFFSET(offset) < btf->hdr->str_len)
+	else if (BTF_STR_OFFSET(offset) < btf->hdr.str_len)
 		return &btf->strings[BTF_STR_OFFSET(offset)];
 	else
 		return "(invalid-name-offset)";
@@ -536,7 +541,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
 	__btf_verifier_log(log, "\n");
 }
 
-static void btf_verifier_log_hdr(struct btf_verifier_env *env)
+static void btf_verifier_log_hdr(struct btf_verifier_env *env,
+				 u32 btf_data_size)
 {
 	struct bpf_verifier_log *log = &env->log;
 	const struct btf *btf = env->btf;
@@ -545,19 +551,16 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env)
 	if (!bpf_verifier_log_needed(log))
 		return;
 
-	hdr = btf->hdr;
+	hdr = &btf->hdr;
 	__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
 	__btf_verifier_log(log, "version: %u\n", hdr->version);
 	__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
-	__btf_verifier_log(log, "parent_label: %u\n", hdr->parent_label);
-	__btf_verifier_log(log, "parent_name: %u\n", hdr->parent_name);
-	__btf_verifier_log(log, "label_off: %u\n", hdr->label_off);
-	__btf_verifier_log(log, "object_off: %u\n", hdr->object_off);
-	__btf_verifier_log(log, "func_off: %u\n", hdr->func_off);
+	__btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len);
 	__btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
+	__btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
 	__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
 	__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
-	__btf_verifier_log(log, "btf_total_size: %u\n", btf->data_size);
+	__btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
 }
 
 static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
@@ -1754,9 +1757,9 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
 	struct btf_header *hdr;
 	void *cur, *end;
 
-	hdr = btf->hdr;
+	hdr = &btf->hdr;
 	cur = btf->nohdr_data + hdr->type_off;
-	end = btf->nohdr_data + hdr->str_off;
+	end = btf->nohdr_data + hdr->type_len;
 
 	env->log_type_id = 1;
 	while (cur < end) {
@@ -1866,8 +1869,20 @@ static int btf_check_all_types(struct btf_verifier_env *env)
 
 static int btf_parse_type_sec(struct btf_verifier_env *env)
 {
+	const struct btf_header *hdr = &env->btf->hdr;
 	int err;
 
+	/* Type section must align to 4 bytes */
+	if (hdr->type_off & (sizeof(u32) - 1)) {
+		btf_verifier_log(env, "Unaligned type_off");
+		return -EINVAL;
+	}
+
+	if (!hdr->type_len) {
+		btf_verifier_log(env, "No type found");
+		return -EINVAL;
+	}
+
 	err = btf_check_all_metas(env);
 	if (err)
 		return err;
@@ -1881,10 +1896,15 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
 	struct btf *btf = env->btf;
 	const char *start, *end;
 
-	hdr = btf->hdr;
+	hdr = &btf->hdr;
 	start = btf->nohdr_data + hdr->str_off;
 	end = start + hdr->str_len;
 
+	if (end != btf->data + btf->data_size) {
+		btf_verifier_log(env, "String section is not at the end");
+		return -EINVAL;
+	}
+
 	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
 	    start[0] || end[-1]) {
 		btf_verifier_log(env, "Invalid string section");
@@ -1896,20 +1916,119 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
 	return 0;
 }
 
-static int btf_parse_hdr(struct btf_verifier_env *env)
+static const size_t btf_sec_info_offset[] = {
+	offsetof(struct btf_header, type_off),
+	offsetof(struct btf_header, str_off),
+};
+
+static int btf_sec_info_cmp(const void *a, const void *b)
 {
+	const struct btf_sec_info *x = a;
+	const struct btf_sec_info *y = b;
+
+	return (int)(x->off - y->off) ? : (int)(x->len - y->len);
+}
+
+static int btf_check_sec_info(struct btf_verifier_env *env,
+			      u32 btf_data_size)
+{
+	struct btf_sec_info secs[NR_SECS];
+	u32 total, expected_total, i;
 	const struct btf_header *hdr;
-	struct btf *btf = env->btf;
-	u32 meta_left;
+	const struct btf *btf;
+
+	BUILD_BUG_ON(ARRAY_SIZE(btf_sec_info_offset) != NR_SECS);
+
+	btf = env->btf;
+	hdr = &btf->hdr;
+
+	/* Populate the secs from hdr */
+	for (i = 0; i < NR_SECS; i++)
+		secs[i] = *(struct btf_sec_info *)((void *)hdr +
+						   btf_sec_info_offset[i]);
+
+	sort(secs, NR_SECS, sizeof(struct btf_sec_info),
+	     btf_sec_info_cmp, NULL);
+
+	/* Check for gaps and overlap among sections */
+	total = 0;
+	expected_total = btf_data_size - hdr->hdr_len;
+	for (i = 0; i < NR_SECS; i++) {
+		if (expected_total < secs[i].off) {
+			btf_verifier_log(env, "Invalid section offset");
+			return -EINVAL;
+		}
+		if (total < secs[i].off) {
+			/* gap */
+			btf_verifier_log(env, "Unsupported section found");
+			return -EINVAL;
+		}
+		if (total > secs[i].off) {
+			btf_verifier_log(env, "Section overlap found");
+			return -EINVAL;
+		}
+		if (expected_total - total < secs[i].len) {
+			btf_verifier_log(env,
+					 "Total section length too long");
+			return -EINVAL;
+		}
+		total += secs[i].len;
+	}
+
+	/* There is data other than hdr and known sections */
+	if (expected_total != total) {
+		btf_verifier_log(env, "Unsupported section found");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data,
+			 u32 btf_data_size)
+{
+	const struct btf_header *hdr;
+	u32 hdr_len, hdr_copy;
+	struct btf_min_header {
+		u16	magic;
+		u8	version;
+		u8	flags;
+		u32	hdr_len;
+	} __user *min_hdr;
+	struct btf *btf;
+	int err;
+
+	btf = env->btf;
+	min_hdr = btf_data;
+
+	if (btf_data_size < sizeof(*min_hdr)) {
+		btf_verifier_log(env, "hdr_len not found");
+		return -EINVAL;
+	}
 
-	if (btf->data_size < sizeof(*hdr)) {
+	if (get_user(hdr_len, &min_hdr->hdr_len))
+		return -EFAULT;
+
+	if (btf_data_size < hdr_len) {
 		btf_verifier_log(env, "btf_header not found");
 		return -EINVAL;
 	}
 
-	btf_verifier_log_hdr(env);
+	err = bpf_check_uarg_tail_zero(btf_data, sizeof(btf->hdr), hdr_len);
+	if (err) {
+		if (err == -E2BIG)
+			btf_verifier_log(env, "Unsupported btf_header");
+		return err;
+	}
+
+	hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
+	if (copy_from_user(&btf->hdr, btf_data, hdr_copy))
+		return -EFAULT;
+
+	hdr = &btf->hdr;
+
+	btf_verifier_log_hdr(env, btf_data_size);
 
-	hdr = btf->hdr;
 	if (hdr->magic != BTF_MAGIC) {
 		btf_verifier_log(env, "Invalid magic");
 		return -EINVAL;
@@ -1925,26 +2044,14 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
 		return -ENOTSUPP;
 	}
 
-	meta_left = btf->data_size - sizeof(*hdr);
-	if (!meta_left) {
+	if (btf_data_size == hdr->hdr_len) {
 		btf_verifier_log(env, "No data");
 		return -EINVAL;
 	}
 
-	if (meta_left < hdr->type_off || hdr->str_off <= hdr->type_off ||
-	    /* Type section must align to 4 bytes */
-	    hdr->type_off & (sizeof(u32) - 1)) {
-		btf_verifier_log(env, "Invalid type_off");
-		return -EINVAL;
-	}
-
-	if (meta_left < hdr->str_off ||
-	    meta_left - hdr->str_off < hdr->str_len) {
-		btf_verifier_log(env, "Invalid str_off or str_len");
-		return -EINVAL;
-	}
-
-	btf->nohdr_data = btf->hdr + 1;
+	err = btf_check_sec_info(env, btf_data_size);
+	if (err)
+		return err;
 
 	return 0;
 }
@@ -1987,6 +2094,11 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
 		err = -ENOMEM;
 		goto errout;
 	}
+	env->btf = btf;
+
+	err = btf_parse_hdr(env, btf_data, btf_data_size);
+	if (err)
+		goto errout;
 
 	data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
 	if (!data) {
@@ -1996,18 +2108,13 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
 
 	btf->data = data;
 	btf->data_size = btf_data_size;
+	btf->nohdr_data = btf->data + btf->hdr.hdr_len;
 
 	if (copy_from_user(data, btf_data, btf_data_size)) {
 		err = -EFAULT;
 		goto errout;
 	}
 
-	env->btf = btf;
-
-	err = btf_parse_hdr(env);
-	if (err)
-		goto errout;
-
 	err = btf_parse_str_sec(env);
 	if (err)
 		goto errout;
@@ -2016,16 +2123,14 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
 	if (err)
 		goto errout;
 
-	if (!err && log->level && bpf_verifier_log_full(log)) {
+	if (log->level && bpf_verifier_log_full(log)) {
 		err = -ENOSPC;
 		goto errout;
 	}
 
-	if (!err) {
-		btf_verifier_env_free(env);
-		refcount_set(&btf->refcnt, 1);
-		return btf;
-	}
+	btf_verifier_env_free(env);
+	refcount_set(&btf->refcnt, 1);
+	return btf;
 
 errout:
 	btf_verifier_env_free(env);
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 1/7] bpf: Expose check_uarg_tail_zero()
From: Martin KaFai Lau @ 2018-05-19  0:16 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180519001650.4043980-1-kafai@fb.com>

This patch exposes check_uarg_tail_zero() which will
be reused by a later BTF patch.  Its name is changed to
bpf_check_uarg_tail_zero().

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/linux/bpf.h  |  2 ++
 kernel/bpf/syscall.c | 14 +++++++-------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ed0122b45b63..f6fe3c719ca8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -463,6 +463,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
 
 int bpf_get_file_flag(int flags);
+int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size,
+			     size_t actual_size);
 
 /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
  * forced to use 'long' read/writes to try to atomically copy long counters.
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bfcde949c7f8..2b29ef84ded3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -65,9 +65,9 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
  * copy_from_user() call. However, this is not a concern since this function is
  * meant to be a future-proofing of bits.
  */
-static int check_uarg_tail_zero(void __user *uaddr,
-				size_t expected_size,
-				size_t actual_size)
+int bpf_check_uarg_tail_zero(void __user *uaddr,
+			     size_t expected_size,
+			     size_t actual_size)
 {
 	unsigned char __user *addr;
 	unsigned char __user *end;
@@ -1899,7 +1899,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	u32 ulen;
 	int err;
 
-	err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
 	if (err)
 		return err;
 	info_len = min_t(u32, sizeof(info), info_len);
@@ -1998,7 +1998,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
 	u32 info_len = attr->info.info_len;
 	int err;
 
-	err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
 	if (err)
 		return err;
 	info_len = min_t(u32, sizeof(info), info_len);
@@ -2038,7 +2038,7 @@ static int bpf_btf_get_info_by_fd(struct btf *btf,
 	u32 info_len = attr->info.info_len;
 	int err;
 
-	err = check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
+	err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
 	if (err)
 		return err;
 
@@ -2110,7 +2110,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	err = check_uarg_tail_zero(uattr, sizeof(attr), size);
+	err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
 	if (err)
 		return err;
 	size = min_t(u32, size, sizeof(attr));
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 0/7] BTF uapi cleanup
From: Martin KaFai Lau @ 2018-05-19  0:16 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team

This patch set makes some changes to cleanup the unused
bits in BTF uapi.  It also makes the btf_header extensible.

Please see individual patches for details.

Martin KaFai Lau (7):
  bpf: Expose check_uarg_tail_zero()
  bpf: btf: Change how section is supported in btf_header
  bpf: btf: Check array->index_type
  bpf: btf: Remove unused bits from uapi/linux/btf.h
  bpf: btf: Rename btf_key_id and btf_value_id in bpf_map_info
  bpf: btf: Sync bpf.h and btf.h to tools/include/uapi/linux/
  bpf: btf: Add tests for the btf uapi changes

 include/linux/bpf.h                    |   6 +-
 include/uapi/linux/bpf.h               |   8 +-
 include/uapi/linux/btf.h               |  28 +-
 kernel/bpf/arraymap.c                  |   2 +-
 kernel/bpf/btf.c                       | 318 ++++++++++++++------
 kernel/bpf/syscall.c                   |  32 +-
 tools/include/uapi/linux/bpf.h         |   8 +-
 tools/include/uapi/linux/btf.h         |  28 +-
 tools/lib/bpf/bpf.c                    |   4 +-
 tools/lib/bpf/bpf.h                    |   4 +-
 tools/lib/bpf/btf.c                    |   5 +-
 tools/lib/bpf/libbpf.c                 |  34 +--
 tools/lib/bpf/libbpf.h                 |   4 +-
 tools/testing/selftests/bpf/test_btf.c | 528 ++++++++++++++++++++++++++-------
 14 files changed, 724 insertions(+), 285 deletions(-)

-- 
2.9.5

^ permalink raw reply

* [PATCH bpf-next 3/7] bpf: btf: Check array->index_type
From: Martin KaFai Lau @ 2018-05-19  0:16 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180519001650.4043980-1-kafai@fb.com>

Instead of ingoring the array->index_type field.  Enforce that
it must be an unsigned BTF_KIND_INT.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 kernel/bpf/btf.c | 83 ++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 59 insertions(+), 24 deletions(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 536e5981ad8c..b4e48dae2240 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -444,6 +444,28 @@ static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
 	return btf->types[type_id];
 }
 
+/*
+ * Regular int is not a bit field and it must be either
+ * u8/u16/u32/u64.
+ */
+static bool btf_type_int_is_regular(const struct btf_type *t)
+{
+	u16 nr_bits, nr_bytes;
+	u32 int_data;
+
+	int_data = btf_type_int(t);
+	nr_bits = BTF_INT_BITS(int_data);
+	nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
+	if (BITS_PER_BYTE_MASKED(nr_bits) ||
+	    BTF_INT_OFFSET(int_data) ||
+	    (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
+	     nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
+		return false;
+	}
+
+	return true;
+}
+
 __printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
 					      const char *fmt, ...)
 {
@@ -1309,14 +1331,16 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
 
-	/* We are a little forgiving on array->index_type since
-	 * the kernel is not using it.
-	 */
-	/* Array elem cannot be in type void,
-	 * so !array->type is not allowed.
+	/* Array elem type and index type cannot be in type void,
+	 * so !array->type and !array->index_type are not allowed.
 	 */
 	if (!array->type || BTF_TYPE_PARENT(array->type)) {
-		btf_verifier_log_type(env, t, "Invalid type_id");
+		btf_verifier_log_type(env, t, "Invalid elem");
+		return -EINVAL;
+	}
+
+	if (!array->index_type || BTF_TYPE_PARENT(array->index_type)) {
+		btf_verifier_log_type(env, t, "Invalid index");
 		return -EINVAL;
 	}
 
@@ -1329,11 +1353,35 @@ static int btf_array_resolve(struct btf_verifier_env *env,
 			     const struct resolve_vertex *v)
 {
 	const struct btf_array *array = btf_type_array(v->t);
-	const struct btf_type *elem_type;
-	u32 elem_type_id = array->type;
+	const struct btf_type *elem_type, *index_type;
+	u32 elem_type_id, index_type_id;
 	struct btf *btf = env->btf;
 	u32 elem_size;
 
+	/* Check array->index_type */
+	index_type_id = array->index_type;
+	index_type = btf_type_by_id(btf, index_type_id);
+	if (btf_type_is_void_or_null(index_type)) {
+		btf_verifier_log_type(env, v->t, "Invalid index");
+		return -EINVAL;
+	}
+
+	if (!env_type_is_resolve_sink(env, index_type) &&
+	    !env_type_is_resolved(env, index_type_id))
+		return env_stack_push(env, index_type, index_type_id);
+
+	index_type = btf_type_id_size(btf, &index_type_id, NULL);
+	if (!index_type || !btf_type_is_int(index_type) ||
+	    /* bit field int is not allowed */
+	    !btf_type_int_is_regular(index_type) ||
+	    /* unsigned only */
+	    BTF_INT_ENCODING(btf_type_int(index_type))) {
+		btf_verifier_log_type(env, v->t, "Invalid index");
+		return -EINVAL;
+	}
+
+	/* Check array->type */
+	elem_type_id = array->type;
 	elem_type = btf_type_by_id(btf, elem_type_id);
 	if (btf_type_is_void_or_null(elem_type)) {
 		btf_verifier_log_type(env, v->t,
@@ -1351,22 +1399,9 @@ static int btf_array_resolve(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
 
-	if (btf_type_is_int(elem_type)) {
-		int int_type_data = btf_type_int(elem_type);
-		u16 nr_bits = BTF_INT_BITS(int_type_data);
-		u16 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
-
-		/* Put more restriction on array of int.  The int cannot
-		 * be a bit field and it must be either u8/u16/u32/u64.
-		 */
-		if (BITS_PER_BYTE_MASKED(nr_bits) ||
-		    BTF_INT_OFFSET(int_type_data) ||
-		    (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
-		     nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
-			btf_verifier_log_type(env, v->t,
-					      "Invalid array of int");
-			return -EINVAL;
-		}
+	if (btf_type_is_int(elem_type) && !btf_type_int_is_regular(elem_type)) {
+		btf_verifier_log_type(env, v->t, "Invalid array of int");
+		return -EINVAL;
 	}
 
 	if (array->nelems && elem_size > U32_MAX / array->nelems) {
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 7/7] bpf: btf: Add tests for the btf uapi changes
From: Martin KaFai Lau @ 2018-05-19  0:16 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180519001650.4043980-1-kafai@fb.com>

This patch does the followings:
1. Modify libbpf and test_btf to reflect the uapi changes in btf
2. Add test for the btf_header changes
3. Add tests for array->index_type
4. Add err_str check to the tests
5. Fix a 4 bytes hole in "struct test #1" by swapping "m" and "n"

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 tools/lib/bpf/bpf.c                    |   4 +-
 tools/lib/bpf/bpf.h                    |   4 +-
 tools/lib/bpf/btf.c                    |   5 +-
 tools/lib/bpf/libbpf.c                 |  34 +--
 tools/lib/bpf/libbpf.h                 |   4 +-
 tools/testing/selftests/bpf/test_btf.c | 528 ++++++++++++++++++++++++++-------
 6 files changed, 448 insertions(+), 131 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 6a8a00097fd8..442b4cdfeb71 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -89,8 +89,8 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
 	       min(name_len, BPF_OBJ_NAME_LEN - 1));
 	attr.numa_node = create_attr->numa_node;
 	attr.btf_fd = create_attr->btf_fd;
-	attr.btf_key_id = create_attr->btf_key_id;
-	attr.btf_value_id = create_attr->btf_value_id;
+	attr.btf_key_type_id = create_attr->btf_key_type_id;
+	attr.btf_value_type_id = create_attr->btf_value_type_id;
 	attr.map_ifindex = create_attr->map_ifindex;
 
 	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 15bff7728cf1..d12344f66d4e 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -36,8 +36,8 @@ struct bpf_create_map_attr {
 	__u32 max_entries;
 	__u32 numa_node;
 	__u32 btf_fd;
-	__u32 btf_key_id;
-	__u32 btf_value_id;
+	__u32 btf_key_type_id;
+	__u32 btf_value_type_id;
 	__u32 map_ifindex;
 };
 
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 2bac710e3194..8c54a4b6f187 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -35,9 +35,8 @@ struct btf {
 
 static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset)
 {
-	if (!BTF_STR_TBL_ELF_ID(offset) &&
-	    BTF_STR_OFFSET(offset) < btf->hdr->str_len)
-		return &btf->strings[BTF_STR_OFFSET(offset)];
+	if (offset < btf->hdr->str_len)
+		return &btf->strings[offset];
 	else
 		return NULL;
 }
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 3dbe217bf23e..8f1707dbfcfa 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -216,8 +216,8 @@ struct bpf_map {
 	size_t offset;
 	int map_ifindex;
 	struct bpf_map_def def;
-	uint32_t btf_key_id;
-	uint32_t btf_value_id;
+	uint32_t btf_key_type_id;
+	uint32_t btf_value_type_id;
 	void *priv;
 	bpf_map_clear_priv_t clear_priv;
 };
@@ -1074,8 +1074,8 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
 		return -EINVAL;
 	}
 
-	map->btf_key_id = key_id;
-	map->btf_value_id = value_id;
+	map->btf_key_type_id = key_id;
+	map->btf_value_type_id = value_id;
 
 	return 0;
 }
@@ -1100,24 +1100,24 @@ bpf_object__create_maps(struct bpf_object *obj)
 		create_attr.value_size = def->value_size;
 		create_attr.max_entries = def->max_entries;
 		create_attr.btf_fd = 0;
-		create_attr.btf_key_id = 0;
-		create_attr.btf_value_id = 0;
+		create_attr.btf_key_type_id = 0;
+		create_attr.btf_value_type_id = 0;
 
 		if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
 			create_attr.btf_fd = btf__fd(obj->btf);
-			create_attr.btf_key_id = map->btf_key_id;
-			create_attr.btf_value_id = map->btf_value_id;
+			create_attr.btf_key_type_id = map->btf_key_type_id;
+			create_attr.btf_value_type_id = map->btf_value_type_id;
 		}
 
 		*pfd = bpf_create_map_xattr(&create_attr);
-		if (*pfd < 0 && create_attr.btf_key_id) {
+		if (*pfd < 0 && create_attr.btf_key_type_id) {
 			pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
 				   map->name, strerror(errno), errno);
 			create_attr.btf_fd = 0;
-			create_attr.btf_key_id = 0;
-			create_attr.btf_value_id = 0;
-			map->btf_key_id = 0;
-			map->btf_value_id = 0;
+			create_attr.btf_key_type_id = 0;
+			create_attr.btf_value_type_id = 0;
+			map->btf_key_type_id = 0;
+			map->btf_value_type_id = 0;
 			*pfd = bpf_create_map_xattr(&create_attr);
 		}
 
@@ -2085,14 +2085,14 @@ const char *bpf_map__name(struct bpf_map *map)
 	return map ? map->name : NULL;
 }
 
-uint32_t bpf_map__btf_key_id(const struct bpf_map *map)
+uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map)
 {
-	return map ? map->btf_key_id : 0;
+	return map ? map->btf_key_type_id : 0;
 }
 
-uint32_t bpf_map__btf_value_id(const struct bpf_map *map)
+uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map)
 {
-	return map ? map->btf_value_id : 0;
+	return map ? map->btf_value_type_id : 0;
 }
 
 int bpf_map__set_priv(struct bpf_map *map, void *priv,
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index cd3fd8d782c7..09976531aa74 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -244,8 +244,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
 int bpf_map__fd(struct bpf_map *map);
 const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
 const char *bpf_map__name(struct bpf_map *map);
-uint32_t bpf_map__btf_key_id(const struct bpf_map *map);
-uint32_t bpf_map__btf_value_id(const struct bpf_map *map);
+uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map);
+uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map);
 
 typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
 int bpf_map__set_priv(struct bpf_map *map, void *priv,
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index c8bceae7ec02..4635d5557639 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -113,22 +113,26 @@ static char btf_log_buf[BTF_LOG_BUF_SIZE];
 static struct btf_header hdr_tmpl = {
 	.magic = BTF_MAGIC,
 	.version = BTF_VERSION,
+	.hdr_len = sizeof(struct btf_header),
 };
 
 struct btf_raw_test {
 	const char *descr;
 	const char *str_sec;
 	const char *map_name;
+	const char *err_str;
+	int (*special_test)(unsigned int test_num);
 	__u32 raw_types[MAX_NR_RAW_TYPES];
 	__u32 str_sec_size;
 	enum bpf_map_type map_type;
 	__u32 key_size;
 	__u32 value_size;
-	__u32 key_id;
-	__u32 value_id;
+	__u32 key_type_id;
+	__u32 value_type_id;
 	__u32 max_entries;
 	bool btf_load_err;
 	bool map_create_err;
+	int hdr_len_delta;
 	int type_off_delta;
 	int str_off_delta;
 	int str_len_delta;
@@ -141,8 +145,8 @@ static struct btf_raw_test raw_tests[] = {
  * };
  *
  * struct A {
- *	int m;
- *	unsigned long long n;
+ *	unsigned long long m;
+ *	int n;
  *	char o;
  *	[3 bytes hole]
  *	int p[8];
@@ -160,21 +164,24 @@ static struct btf_raw_test raw_tests[] = {
 		/* char */
 		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),	/* [3] */
 		/* int[8] */
-		BTF_TYPE_ARRAY_ENC(1, 1, 8),			/* [4] */
+		BTF_TYPE_ARRAY_ENC(1, 8, 8),			/* [4] */
 		/* struct A { */				/* [5] */
 		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180),
-		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m;		*/
-		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* unsigned long long n;*/
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),	/* unsigned long long m;*/
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n;		*/
 		BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o;		*/
 		BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8]		*/
 		BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8]		*/
 		BTF_MEMBER_ENC(NAME_TBD, 7, 1408), /* enum E r		*/
 		/* } */
 		/* int[4][8] */
-		BTF_TYPE_ARRAY_ENC(4, 1, 4),			/* [6] */
+		BTF_TYPE_ARRAY_ENC(4, 8, 4),			/* [6]  */
+		/* enum E */					/* [7] */
 		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
 		BTF_ENUM_ENC(NAME_TBD, 0),
 		BTF_ENUM_ENC(NAME_TBD, 1),
+		/* unsigned int */				/* [8] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
 		BTF_END_RAW,
 	},
 	.str_sec = "\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1",
@@ -183,8 +190,8 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "struct_test1_map",
 	.key_size = sizeof(int),
 	.value_size = 180,
-	.key_id = 1,
-	.value_id = 5,
+	.key_type_id = 1,
+	.value_type_id = 5,
 	.max_entries = 4,
 },
 
@@ -207,7 +214,7 @@ static struct btf_raw_test raw_tests[] = {
 		/* int */					/* [1] */
 		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
 		/* struct b [4] */				/* [2] */
-		BTF_TYPE_ARRAY_ENC(4, 1, 4),
+		BTF_TYPE_ARRAY_ENC(4, 9, 4),
 
 		/* struct A { */				/* [3] */
 		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 3), 68),
@@ -229,7 +236,9 @@ static struct btf_raw_test raw_tests[] = {
 		/* const Struct_B */				/* [7] */
 		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 6),
 		/* const Struct_B [4] */			/* [8] */
-		BTF_TYPE_ARRAY_ENC(7, 1, 4),
+		BTF_TYPE_ARRAY_ENC(7, 9, 4),
+		/* unsigned int */				/* [9] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
 		BTF_END_RAW,
 	},
 	.str_sec = "\0A\0m\0n\0o\0B\0m\0n\0Struct_B",
@@ -238,8 +247,8 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "struct_test2_map",
 	.key_size = sizeof(int),
 	.value_size = 68,
-	.key_id = 1,
-	.value_id = 3,
+	.key_type_id = 1,
+	.value_type_id = 3,
 	.max_entries = 4,
 },
 
@@ -258,7 +267,7 @@ static struct btf_raw_test raw_tests[] = {
 		/* struct A { */				/* [2] */
 		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 -  1),
 		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
-		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n; */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
 		/* } */
 		BTF_END_RAW,
 	},
@@ -268,10 +277,11 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "size_check1_map",
 	.key_size = sizeof(int),
 	.value_size = 1,
-	.key_id = 1,
-	.value_id = 2,
+	.key_type_id = 1,
+	.value_type_id = 2,
 	.max_entries = 4,
 	.btf_load_err = true,
+	.err_str = "Member exceeds struct_size",
 },
 
 /* Test member exeeds the size of struct
@@ -287,12 +297,14 @@ static struct btf_raw_test raw_tests[] = {
 		/* int */					/* [1] */
 		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
 		/* int[2] */					/* [2] */
-		BTF_TYPE_ARRAY_ENC(1, 1, 2),
+		BTF_TYPE_ARRAY_ENC(1, 4, 2),
 		/* struct A { */				/* [3] */
 		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 3 - 1),
 		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
 		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n[2]; */
 		/* } */
+		/* unsigned int */				/* [4] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
 		BTF_END_RAW,
 	},
 	.str_sec = "\0A\0m\0n",
@@ -301,11 +313,11 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "size_check2_map",
 	.key_size = sizeof(int),
 	.value_size = 1,
-	.key_id = 1,
-	.value_id = 3,
+	.key_type_id = 1,
+	.value_type_id = 3,
 	.max_entries = 4,
 	.btf_load_err = true,
-
+	.err_str = "Member exceeds struct_size",
 },
 
 /* Test member exeeds the size of struct
@@ -335,10 +347,11 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "size_check3_map",
 	.key_size = sizeof(int),
 	.value_size = 1,
-	.key_id = 1,
-	.value_id = 3,
+	.key_type_id = 1,
+	.value_type_id = 3,
 	.max_entries = 4,
 	.btf_load_err = true,
+	.err_str = "Member exceeds struct_size",
 },
 
 /* Test member exceeds the size of struct
@@ -376,10 +389,11 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "size_check4_map",
 	.key_size = sizeof(int),
 	.value_size = 1,
-	.key_id = 1,
-	.value_id = 3,
+	.key_type_id = 1,
+	.value_type_id = 3,
 	.max_entries = 4,
 	.btf_load_err = true,
+	.err_str = "Member exceeds struct_size",
 },
 
 /* typedef const void * const_void_ptr;
@@ -411,8 +425,8 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "void_test1_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(void *),
-	.key_id = 1,
-	.value_id = 4,
+	.key_type_id = 1,
+	.value_type_id = 4,
 	.max_entries = 4,
 },
 
@@ -440,10 +454,11 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "void_test2_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(void *),
-	.key_id = 1,
-	.value_id = 3,
+	.key_type_id = 1,
+	.value_type_id = 3,
 	.max_entries = 4,
 	.btf_load_err = true,
+	.err_str = "Invalid member",
 },
 
 /* typedef const void * const_void_ptr;
@@ -458,10 +473,12 @@ static struct btf_raw_test raw_tests[] = {
 		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
 		/* const void* */	/* [3] */
 		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
-		/* typedef const void * const_void_ptr */
+		/* typedef const void * const_void_ptr */	/* [4] */
 		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
-		/* const_void_ptr[4] */	/* [4] */
-		BTF_TYPE_ARRAY_ENC(3, 1, 4),
+		/* const_void_ptr[4] */	/* [5] */
+		BTF_TYPE_ARRAY_ENC(3, 6, 4),
+		/* unsigned int */	/* [6] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
 		BTF_END_RAW,
 	},
 	.str_sec = "\0const_void_ptr",
@@ -470,8 +487,8 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "void_test3_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(void *) * 4,
-	.key_id = 1,
-	.value_id = 4,
+	.key_type_id = 1,
+	.value_type_id = 4,
 	.max_entries = 4,
 },
 
@@ -484,7 +501,9 @@ static struct btf_raw_test raw_tests[] = {
 		/* const void */	/* [2] */
 		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
 		/* const void[4] */	/* [3] */
-		BTF_TYPE_ARRAY_ENC(2, 1, 4),
+		BTF_TYPE_ARRAY_ENC(2, 4, 4),
+		/* unsigned int */	/* [4] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
 		BTF_END_RAW,
 	},
 	.str_sec = "\0A\0m",
@@ -493,10 +512,11 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "void_test4_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(void *) * 4,
-	.key_id = 1,
-	.value_id = 3,
+	.key_type_id = 1,
+	.value_type_id = 3,
 	.max_entries = 4,
 	.btf_load_err = true,
+	.err_str = "Invalid elem",
 },
 
 /* Array_A  <------------------+
@@ -512,9 +532,11 @@ static struct btf_raw_test raw_tests[] = {
 		/* int */			/* [1] */
 		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
 		/* Array_A */			/* [2] */
-		BTF_TYPE_ARRAY_ENC(3, 1, 8),
+		BTF_TYPE_ARRAY_ENC(3, 4, 8),
 		/* Array_B */			/* [3] */
-		BTF_TYPE_ARRAY_ENC(2, 1, 8),
+		BTF_TYPE_ARRAY_ENC(2, 4, 8),
+		/* unsigned int */		/* [4] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
 		BTF_END_RAW,
 	},
 	.str_sec = "",
@@ -523,10 +545,11 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "loop_test1_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(sizeof(int) * 8),
-	.key_id = 1,
-	.value_id = 2,
+	.key_type_id = 1,
+	.value_type_id = 2,
 	.max_entries = 4,
 	.btf_load_err = true,
+	.err_str = "Loop detected",
 },
 
 /* typedef is _before_ the BTF type of Array_A and Array_B
@@ -548,10 +571,11 @@ static struct btf_raw_test raw_tests[] = {
 		/* typedef Array_B int_array */
 		BTF_TYPEDEF_ENC(1, 4),				/* [2] */
 		/* Array_A */
-		BTF_TYPE_ARRAY_ENC(2, 1, 8),			/* [3] */
+		BTF_TYPE_ARRAY_ENC(2, 5, 8),			/* [3] */
 		/* Array_B */
-		BTF_TYPE_ARRAY_ENC(3, 1, 8),			/* [4] */
-
+		BTF_TYPE_ARRAY_ENC(3, 5, 8),			/* [4] */
+		/* unsigned int */				/* [5] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
 		BTF_END_RAW,
 	},
 	.str_sec = "\0int_array\0",
@@ -560,10 +584,11 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "loop_test2_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(sizeof(int) * 8),
-	.key_id = 1,
-	.value_id = 2,
+	.key_type_id = 1,
+	.value_type_id = 2,
 	.max_entries = 4,
 	.btf_load_err = true,
+	.err_str = "Loop detected",
 },
 
 /* Array_A  <------------------+
@@ -579,10 +604,11 @@ static struct btf_raw_test raw_tests[] = {
 		/* int */				/* [1] */
 		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
 		/* Array_A */				/* [2] */
-		BTF_TYPE_ARRAY_ENC(3, 1, 8),
+		BTF_TYPE_ARRAY_ENC(3, 4, 8),
 		/* Array_B */				/* [3] */
-		BTF_TYPE_ARRAY_ENC(2, 1, 8),
-
+		BTF_TYPE_ARRAY_ENC(2, 4, 8),
+		/* unsigned int */			/* [4] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
 		BTF_END_RAW,
 	},
 	.str_sec = "",
@@ -591,10 +617,11 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "loop_test3_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(sizeof(int) * 8),
-	.key_id = 1,
-	.value_id = 2,
+	.key_type_id = 1,
+	.value_type_id = 2,
 	.max_entries = 4,
 	.btf_load_err = true,
+	.err_str = "Loop detected",
 },
 
 /* typedef is _between_ the BTF type of Array_A and Array_B
@@ -614,11 +641,13 @@ static struct btf_raw_test raw_tests[] = {
 		/* int */				/* [1] */
 		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
 		/* Array_A */				/* [2] */
-		BTF_TYPE_ARRAY_ENC(3, 1, 8),
+		BTF_TYPE_ARRAY_ENC(3, 5, 8),
 		/* typedef Array_B int_array */		/* [3] */
 		BTF_TYPEDEF_ENC(NAME_TBD, 4),
 		/* Array_B */				/* [4] */
-		BTF_TYPE_ARRAY_ENC(2, 1, 8),
+		BTF_TYPE_ARRAY_ENC(2, 5, 8),
+		/* unsigned int */			/* [5] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
 		BTF_END_RAW,
 	},
 	.str_sec = "\0int_array\0",
@@ -627,10 +656,11 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "loop_test4_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(sizeof(int) * 8),
-	.key_id = 1,
-	.value_id = 2,
+	.key_type_id = 1,
+	.value_type_id = 2,
 	.max_entries = 4,
 	.btf_load_err = true,
+	.err_str = "Loop detected",
 },
 
 /* typedef struct B Struct_B
@@ -668,10 +698,11 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "loop_test5_map",
 	.key_size = sizeof(int),
 	.value_size = 8,
-	.key_id = 1,
-	.value_id = 2,
+	.key_type_id = 1,
+	.value_type_id = 2,
 	.max_entries = 4,
 	.btf_load_err = true,
+	.err_str = "Loop detected",
 },
 
 /* struct A {
@@ -684,11 +715,13 @@ static struct btf_raw_test raw_tests[] = {
 	.raw_types = {
 		/* int */
 		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
-		BTF_TYPE_ARRAY_ENC(3, 1, 4),			/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 4, 4),			/* [2] */
 		/* struct A */					/* [3] */
 		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
 		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int x;		*/
 		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A array_a[4];	*/
+		/* unsigned int */				/* [4] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
 		BTF_END_RAW,
 	},
 	.str_sec = "\0A\0x\0y",
@@ -697,10 +730,11 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "loop_test6_map",
 	.key_size = sizeof(int),
 	.value_size = 8,
-	.key_id = 1,
-	.value_id = 2,
+	.key_type_id = 1,
+	.value_type_id = 2,
 	.max_entries = 4,
 	.btf_load_err = true,
+	.err_str = "Loop detected",
 },
 
 {
@@ -724,10 +758,11 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "loop_test7_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(void *),
-	.key_id = 1,
-	.value_id = 2,
+	.key_type_id = 1,
+	.value_type_id = 2,
 	.max_entries = 4,
 	.btf_load_err = true,
+	.err_str = "Loop detected",
 },
 
 {
@@ -759,14 +794,73 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "loop_test8_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(void *),
-	.key_id = 1,
-	.value_id = 2,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+{
+	.descr = "string section does not end with null",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int") - 1,
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid string section",
+},
+
+{
+	.descr = "empty string section",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = 0,
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid string section",
+},
+
+{
+	.descr = "empty type section",
+	.raw_types = {
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
 	.max_entries = 4,
 	.btf_load_err = true,
+	.err_str = "No type found",
 },
 
 {
-	.descr = "type_off == str_off",
+	.descr = "btf_header test #1. Longer hdr_len",
 	.raw_types = {
 		/* int */				/* [1] */
 		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -778,15 +872,16 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "hdr_test_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(int),
-	.key_id = 1,
-	.value_id = 1,
+	.key_type_id = 1,
+	.value_type_id = 1,
 	.max_entries = 4,
 	.btf_load_err = true,
-	.type_off_delta = sizeof(struct btf_type) + sizeof(int) + sizeof("\0int"),
+	.hdr_len_delta = 4,
+	.err_str = "Unsupported btf_header",
 },
 
 {
-	.descr = "Unaligned type_off",
+	.descr = "btf_header test #2. Gap between hdr and type",
 	.raw_types = {
 		/* int */				/* [1] */
 		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -798,15 +893,16 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "hdr_test_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(int),
-	.key_id = 1,
-	.value_id = 1,
+	.key_type_id = 1,
+	.value_type_id = 1,
 	.max_entries = 4,
 	.btf_load_err = true,
-	.type_off_delta = 1,
+	.type_off_delta = 4,
+	.err_str = "Unsupported section found",
 },
 
 {
-	.descr = "str_off beyonds btf size",
+	.descr = "btf_header test #3. Gap between type and str",
 	.raw_types = {
 		/* int */				/* [1] */
 		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -818,15 +914,16 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "hdr_test_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(int),
-	.key_id = 1,
-	.value_id = 1,
+	.key_type_id = 1,
+	.value_type_id = 1,
 	.max_entries = 4,
 	.btf_load_err = true,
-	.str_off_delta = sizeof("\0int") + 1,
+	.str_off_delta = 4,
+	.err_str = "Unsupported section found",
 },
 
 {
-	.descr = "str_len beyonds btf size",
+	.descr = "btf_header test #4. Overlap between type and str",
 	.raw_types = {
 		/* int */				/* [1] */
 		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -838,15 +935,16 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "hdr_test_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(int),
-	.key_id = 1,
-	.value_id = 1,
+	.key_type_id = 1,
+	.value_type_id = 1,
 	.max_entries = 4,
 	.btf_load_err = true,
-	.str_len_delta = 1,
+	.str_off_delta = -4,
+	.err_str = "Section overlap found",
 },
 
 {
-	.descr = "String section does not end with null",
+	.descr = "btf_header test #5. Larger BTF size",
 	.raw_types = {
 		/* int */				/* [1] */
 		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -858,15 +956,16 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "hdr_test_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(int),
-	.key_id = 1,
-	.value_id = 1,
+	.key_type_id = 1,
+	.value_type_id = 1,
 	.max_entries = 4,
 	.btf_load_err = true,
-	.str_len_delta = -1,
+	.str_len_delta = -4,
+	.err_str = "Unsupported section found",
 },
 
 {
-	.descr = "Empty string section",
+	.descr = "btf_header test #6. Smaller BTF size",
 	.raw_types = {
 		/* int */				/* [1] */
 		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -878,11 +977,223 @@ static struct btf_raw_test raw_tests[] = {
 	.map_name = "hdr_test_map",
 	.key_size = sizeof(int),
 	.value_size = sizeof(int),
-	.key_id = 1,
-	.value_id = 1,
+	.key_type_id = 1,
+	.value_type_id = 1,
 	.max_entries = 4,
 	.btf_load_err = true,
-	.str_len_delta = 0 - (int)sizeof("\0int"),
+	.str_len_delta = 4,
+	.err_str = "Total section length too long",
+},
+
+{
+	.descr = "array test #1. index_type \"unsigned int\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* unsigned int */			/* [2] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
+		/* unsigned int[16] */			/* [3] */
+		BTF_TYPE_ARRAY_ENC(1, 2, 16),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "array test #2. index_type \"const unsigned int\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* unsigned int */			/* [2] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
+		/* int[16] */				/* [3] */
+		BTF_TYPE_ARRAY_ENC(1, 4, 16),
+		/* CONST type_id=2 */			/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "array test #3. index_type \"int\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 16),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid index",
+},
+
+{
+	.descr = "array test #3. index_type \"const int\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(1, 3, 16),
+		/* CONST type_id=1 */			/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid index",
+},
+
+{
+	.descr = "array test #4. index_type \"void\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(1, 0, 16),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid index",
+},
+
+{
+	.descr = "array test #5. index_type \"const void\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(1, 3, 16),
+		/* CONST type_id=0 (void) */		/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid index",
+},
+
+{
+	.descr = "array test #6. elem_type \"const void *\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void *[16] */			/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 5, 16),
+		/* CONST type_id=4 */			/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+		/* void* */				/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+		/* unsigned int */			/* [5] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "array test #7. index_type \"const void *\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void *[16] */			/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 3, 16),
+		/* CONST type_id=4 */			/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+		/* void* */				/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid index",
+},
+
+{
+	.descr = "invalid BTF_INFO",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_TYPE_ENC(0, 0x10000000, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid btf_info",
 },
 
 }; /* struct btf_raw_test raw_tests[] */
@@ -951,6 +1262,7 @@ static void *btf_raw_create(const struct btf_header *hdr,
 	memcpy(raw_btf + offset, str, str_sec_size);
 
 	ret_hdr = (struct btf_header *)raw_btf;
+	ret_hdr->type_len = type_sec_size;
 	ret_hdr->str_off = type_sec_size;
 	ret_hdr->str_len = str_sec_size;
 
@@ -981,6 +1293,7 @@ static int do_test_raw(unsigned int test_num)
 
 	hdr = raw_btf;
 
+	hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta;
 	hdr->type_off = (int)hdr->type_off + test->type_off_delta;
 	hdr->str_off = (int)hdr->str_off + test->str_off_delta;
 	hdr->str_len = (int)hdr->str_len + test->str_len_delta;
@@ -992,8 +1305,13 @@ static int do_test_raw(unsigned int test_num)
 	free(raw_btf);
 
 	err = ((btf_fd == -1) != test->btf_load_err);
-	CHECK(err, "btf_fd:%d test->btf_load_err:%u",
-	      btf_fd, test->btf_load_err);
+	if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
+		  btf_fd, test->btf_load_err) ||
+	    CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
+		  "expected err_str:%s", test->err_str)) {
+		err = -1;
+		goto done;
+	}
 
 	if (err || btf_fd == -1)
 		goto done;
@@ -1004,8 +1322,8 @@ static int do_test_raw(unsigned int test_num)
 	create_attr.value_size = test->value_size;
 	create_attr.max_entries = test->max_entries;
 	create_attr.btf_fd = btf_fd;
-	create_attr.btf_key_id = test->key_id;
-	create_attr.btf_value_id = test->value_id;
+	create_attr.btf_key_type_id = test->key_type_id;
+	create_attr.btf_value_type_id = test->value_type_id;
 
 	map_fd = bpf_create_map_xattr(&create_attr);
 
@@ -1267,8 +1585,8 @@ static int test_btf_id(unsigned int test_num)
 	create_attr.value_size = sizeof(unsigned int);
 	create_attr.max_entries = 4;
 	create_attr.btf_fd = btf_fd[0];
-	create_attr.btf_key_id = 1;
-	create_attr.btf_value_id = 2;
+	create_attr.btf_key_type_id = 1;
+	create_attr.btf_value_type_id = 2;
 
 	map_fd = bpf_create_map_xattr(&create_attr);
 	if (CHECK(map_fd == -1, "errno:%d", errno)) {
@@ -1279,10 +1597,10 @@ static int test_btf_id(unsigned int test_num)
 	info_len = sizeof(map_info);
 	err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
 	if (CHECK(err || map_info.btf_id != info[0].id ||
-		  map_info.btf_key_id != 1 || map_info.btf_value_id != 2,
-		  "err:%d errno:%d info.id:%u btf_id:%u btf_key_id:%u btf_value_id:%u",
-		  err, errno, info[0].id, map_info.btf_id, map_info.btf_key_id,
-		  map_info.btf_value_id)) {
+		  map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2,
+		  "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u",
+		  err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id,
+		  map_info.btf_value_type_id)) {
 		err = -1;
 		goto done;
 	}
@@ -1542,10 +1860,10 @@ static int do_test_file(unsigned int test_num)
 		goto done;
 	}
 
-	err = (bpf_map__btf_key_id(map) == 0 || bpf_map__btf_value_id(map) == 0)
+	err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0)
 		!= test->btf_kv_notfound;
-	if (CHECK(err, "btf_key_id:%u btf_value_id:%u test->btf_kv_notfound:%u",
-		  bpf_map__btf_key_id(map), bpf_map__btf_value_id(map),
+	if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u",
+		  bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map),
 		  test->btf_kv_notfound))
 		goto done;
 
@@ -1654,8 +1972,8 @@ static struct btf_raw_test pprint_test = {
 	.map_name = "pprint_test",
 	.key_size = sizeof(unsigned int),
 	.value_size = sizeof(struct pprint_mapv),
-	.key_id = 3,	/* unsigned int */
-	.value_id = 16,	/* struct pprint_mapv */
+	.key_type_id = 3,	/* unsigned int */
+	.value_type_id = 16,	/* struct pprint_mapv */
 	.max_entries = 128 * 1024,
 };
 
@@ -1712,8 +2030,8 @@ static int test_pprint(void)
 	create_attr.value_size = test->value_size;
 	create_attr.max_entries = test->max_entries;
 	create_attr.btf_fd = btf_fd;
-	create_attr.btf_key_id = test->key_id;
-	create_attr.btf_value_id = test->value_id;
+	create_attr.btf_key_type_id = test->key_type_id;
+	create_attr.btf_value_type_id = test->value_type_id;
 
 	map_fd = bpf_create_map_xattr(&create_attr);
 	if (CHECK(map_fd == -1, "errno:%d", errno)) {
-- 
2.9.5

^ permalink raw reply related

* Re: [PATCH 1/2] bpf: sockmap, double free in __sock_map_ctx_update_elem()
From: Gustavo A. R. Silva @ 2018-05-19  0:17 UTC (permalink / raw)
  To: Dan Carpenter, Daniel Borkmann
  Cc: Alexei Starovoitov, John Fastabend, netdev, kernel-janitors
In-Reply-To: <20180518143930.hopqsx3sbrbsxlfp@mwanda>


Hi Dan,

On 05/18/2018 09:39 AM, Dan Carpenter wrote:
> On Fri, May 18, 2018 at 10:27:18AM +0200, Daniel Borkmann wrote:
>>
>> Thanks for the two fixes, appreciate it! There were two similar ones that
>> fix the same issues which were already applied yesterday to bpf-next:
>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=0e4364560361d57e8cd873a8990327f3471d7d8a
>> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=a78622932c27e8ec33e5ba180f3d2e87fb806b28
> 
> Hey Gustavo,
> 
> We're sort of duplicating each other's work.  Could you CC
> kernel-janitors@vger.kernel.org for static checker fixes so that I can
> see what you're working on?
> 

Sure thing.

I've been doing this work for more than a year now and just recently we 
are having these issues. I'm a bit curious about it.

> We'll probably still send the occasional duplicate which is fine...
> 

Yep. Not a big deal for me.

Have a good one.

^ permalink raw reply

* Re: [PATCH net] tuntap: raise EPOLLOUT on device up
From: Jason Wang @ 2018-05-19  1:09 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, Hannes Frederic Sowa, Eric Dumazet
In-Reply-To: <20180518172932-mutt-send-email-mst@kernel.org>



On 2018年05月18日 22:46, Michael S. Tsirkin wrote:
> On Fri, May 18, 2018 at 10:11:54PM +0800, Jason Wang wrote:
>>
>> On 2018年05月18日 22:06, Michael S. Tsirkin wrote:
>>> On Fri, May 18, 2018 at 10:00:31PM +0800, Jason Wang wrote:
>>>> On 2018年05月18日 21:26, Jason Wang wrote:
>>>>> On 2018年05月18日 21:13, Michael S. Tsirkin wrote:
>>>>>> On Fri, May 18, 2018 at 09:00:43PM +0800, Jason Wang wrote:
>>>>>>> We return -EIO on device down but can not raise EPOLLOUT after it was
>>>>>>> up. This may confuse user like vhost which expects tuntap to raise
>>>>>>> EPOLLOUT to re-enable its TX routine after tuntap is down. This could
>>>>>>> be easily reproduced by transmitting packets from VM while down and up
>>>>>>> the tap device. Fixing this by set SOCKWQ_ASYNC_NOSPACE on -EIO.
>>>>>>>
>>>>>>> Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
>>>>>>> Cc: Eric Dumazet <edumazet@google.com>
>>>>>>> Fixes: 1bd4978a88ac2 ("tun: honor IFF_UP in tun_get_user()")
>>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>>>>> ---
>>>>>>>     drivers/net/tun.c | 4 +++-
>>>>>>>     1 file changed, 3 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>>>>>>> index d45ac37..1b29761 100644
>>>>>>> --- a/drivers/net/tun.c
>>>>>>> +++ b/drivers/net/tun.c
>>>>>>> @@ -1734,8 +1734,10 @@ static ssize_t tun_get_user(struct
>>>>>>> tun_struct *tun, struct tun_file *tfile,
>>>>>>>         int skb_xdp = 1;
>>>>>>>         bool frags = tun_napi_frags_enabled(tun);
>>>>>>>     -    if (!(tun->dev->flags & IFF_UP))
>>>>>>> +    if (!(tun->dev->flags & IFF_UP)) {
>>>>>> Isn't this racy?  What if flag is cleared at this point?
>>>>> I think you mean "set at this point"? Then yes, so we probably need to
>>>>> set the bit during tun_net_close().
>>>>>
>>>>> Thanks
>>>> Looks no need, vhost will poll socket after it see EIO. So we are ok here?
>>>>
>>>> Thanks
>>> In fact I don't even understand why does this help any longer.
>>>
>> We disable tx polling and only enable it on demand for a better rx
>> performance. You may want to have a look at :
>>
>> commit feb8892cb441c742d4220cf7ced001e7fa070731
>> Author: Jason Wang <jasowang@redhat.com>
>> Date:   Mon Nov 13 11:45:34 2017 +0800
>>
>>      vhost_net: conditionally enable tx polling
>>
>> Thanks
>
> Question is, what looks at SOCKWQ_ASYNC_NOSPACE.
> I think it's tested when packet is transmitted,
> but there is no guarantee here any packet will
> ever be transmitted.
>

Well, actually, I do plan to disable vq polling from the beginning. But 
looks like you do not want this:

See https://patchwork.kernel.org/patch/10034025/

Thanks

^ permalink raw reply

* Re: [RFC v4 3/5] virtio_ring: add packed ring support
From: Jason Wang @ 2018-05-19  1:12 UTC (permalink / raw)
  To: Tiwei Bie; +Cc: mst, virtualization, linux-kernel, netdev, wexu, jfreimann
In-Reply-To: <20180518143334.GA4537@debian>



On 2018年05月18日 22:33, Tiwei Bie wrote:
> On Fri, May 18, 2018 at 09:17:05PM +0800, Jason Wang wrote:
>> On 2018年05月18日 19:29, Tiwei Bie wrote:
>>> On Thu, May 17, 2018 at 08:01:52PM +0800, Jason Wang wrote:
>>>> On 2018年05月16日 22:33, Tiwei Bie wrote:
>>>>> On Wed, May 16, 2018 at 10:05:44PM +0800, Jason Wang wrote:
>>>>>> On 2018年05月16日 21:45, Tiwei Bie wrote:
>>>>>>> On Wed, May 16, 2018 at 08:51:43PM +0800, Jason Wang wrote:
>>>>>>>> On 2018年05月16日 20:39, Tiwei Bie wrote:
>>>>>>>>> On Wed, May 16, 2018 at 07:50:16PM +0800, Jason Wang wrote:
>>>>>>>>>> On 2018年05月16日 16:37, Tiwei Bie wrote:
>>>>> [...]
>>>>>>>>>>> +static void detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
>>>>>>>>>>> +			      unsigned int id, void **ctx)
>>>>>>>>>>> +{
>>>>>>>>>>> +	struct vring_packed_desc *desc;
>>>>>>>>>>> +	unsigned int i, j;
>>>>>>>>>>> +
>>>>>>>>>>> +	/* Clear data ptr. */
>>>>>>>>>>> +	vq->desc_state[id].data = NULL;
>>>>>>>>>>> +
>>>>>>>>>>> +	i = head;
>>>>>>>>>>> +
>>>>>>>>>>> +	for (j = 0; j < vq->desc_state[id].num; j++) {
>>>>>>>>>>> +		desc = &vq->vring_packed.desc[i];
>>>>>>>>>>> +		vring_unmap_one_packed(vq, desc);
>>>>>>>>>> As mentioned in previous discussion, this probably won't work for the case
>>>>>>>>>> of out of order completion since it depends on the information in the
>>>>>>>>>> descriptor ring. We probably need to extend ctx to record such information.
>>>>>>>>> Above code doesn't depend on the information in the descriptor
>>>>>>>>> ring. The vq->desc_state[] is the extended ctx.
>>>>>>>>>
>>>>>>>>> Best regards,
>>>>>>>>> Tiwei Bie
>>>>>>>> Yes, but desc is a pointer to descriptor ring I think so
>>>>>>>> vring_unmap_one_packed() still depends on the content of descriptor ring?
>>>>>>>>
>>>>>>> I got your point now. I think it makes sense to reserve
>>>>>>> the bits of the addr field. Driver shouldn't try to get
>>>>>>> addrs from the descriptors when cleanup the descriptors
>>>>>>> no matter whether we support out-of-order or not.
>>>>>> Maybe I was wrong, but I remember spec mentioned something like this.
>>>>> You're right. Spec mentioned this. I was just repeating
>>>>> the spec to emphasize that it does make sense. :)
>>>>>
>>>>>>> But combining it with the out-of-order support, it will
>>>>>>> mean that the driver still needs to maintain a desc/ctx
>>>>>>> list that is very similar to the desc ring in the split
>>>>>>> ring. I'm not quite sure whether it's something we want.
>>>>>>> If it is true, I'll do it. So do you think we also want
>>>>>>> to maintain such a desc/ctx list for packed ring?
>>>>>> To make it work for OOO backends I think we need something like this
>>>>>> (hardware NIC drivers are usually have something like this).
>>>>> Which hardware NIC drivers have this?
>>>> It's quite common I think, e.g driver track e.g dma addr and page frag
>>>> somewhere. e.g the ring->rx_info in mlx4 driver.
>>> It seems that I had a misunderstanding on your
>>> previous comments. I know it's quite common for
>>> drivers to track e.g. DMA addrs somewhere (and
>>> I think one reason behind this is that they want
>>> to reuse the bits of addr field).
>> Yes, we may want this for virtio-net as well in the future.
>>
>>>    But tracking
>>> addrs somewhere doesn't means supporting OOO.
>>> I thought you were saying it's quite common for
>>> hardware NIC drivers to support OOO (i.e. NICs
>>> will return the descriptors OOO):
>>>
>>> I'm not familiar with mlx4, maybe I'm wrong.
>>> I just had a quick glance. And I found below
>>> comments in mlx4_en_process_rx_cq():
>>>
>>> ```
>>> /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
>>>    * descriptor offset can be deduced from the CQE index instead of
>>>    * reading 'cqe->index' */
>>> index = cq->mcq.cons_index & ring->size_mask;
>>> cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
>>> ```
>>>
>>> It seems that although they have a completion
>>> queue, they are still using the ring in order.
>> I guess so (at least from the above bits). Git grep -i "out of order" in
>> drivers/net gives some hints. Looks like there're few deivces do this.
>>
>>> I guess maybe storage device may want OOO.
>> Right, some iSCSI did.
>>
>> But tracking them elsewhere is not only for OOO.
>>
>> Spec said:
>>
>> for element address
>>
>> "
>> In a used descriptor, Element Address is unused.
>> "
>>
>> for Next flag:
>>
>> "
>> For example, if descriptors are used in the same order in which they are
>> made available, this will result in
>> the used descriptor overwriting the first available descriptor in the list,
>> the used descriptor for the next list
>> overwriting the first available descriptor in the next list, etc.
>> "
>>
>> for in order completion:
>>
>> "
>> This will result in the used descriptor overwriting the first available
>> descriptor in the batch, the used descriptor
>> for the next batch overwriting the first available descriptor in the next
>> batch, etc.
>> "
>>
>> So:
>>
>> - It's an alignment to the spec
>> - device may (or should) overwrite the descriptor make also make address
>> field useless.
> You didn't get my point...

I don't hope so.

> I agreed driver should track the DMA addrs or some
> other necessary things from the very beginning. And
> I also repeated the spec to emphasize that it does
> make sense. And I'd like to do that.
>
> What I was saying is that, to support OOO, we may
> need to manage these context (which saves DMA addrs
> etc) via a list which is similar to the desc list
> maintained via `next` in split ring instead of an
> array whose elements always can be indexed directly.

My point is these context is a must (not only for OOO).

>
> The desc ring in split ring is an array, but its
> free entries are managed as list via next. I was
> just wondering, do we want to manage such a list
> because of OOO. It's just a very simple question
> that I want to hear your opinion... (It doesn't
> means anything, e.g. It doesn't mean I don't want
> to support OOO. It's just a simple question...)

So the question is yes. But I admit I don't have better idea other than 
what you propose here (something like split ring which is a little bit 
sad). Maybe Michael had.

Thanks

>
> Best regards,
> Tiwei Bie
>
>> Thanks
>>
>>> Best regards,
>>> Tiwei Bie
>>>
>>>> Thanks
>>>>
>>>>>> Not for the patch, but it looks like having a OUT_OF_ORDER feature bit is
>>>>>> much more simpler to be started with.
>>>>> +1
>>>>>
>>>>> Best regards,
>>>>> Tiwei Bie

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox