Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next 1/2] net: mscc: ocelot: add bonding support
From: Alexandre Belloni @ 2018-05-18  6:41 UTC (permalink / raw)
  To: David S . Miller
  Cc: Allan Nielsen, razvan.stefanescu, po.liu, Thomas Petazzoni,
	Andrew Lunn, Florian Fainelli, netdev, linux-kernel,
	Alexandre Belloni
In-Reply-To: <20180518064106.10122-1-alexandre.belloni@bootlin.com>

Add link aggregation hardware offload support for Ocelot.

ocelot_get_link_ksettings() is not great but it does work until the driver
is reworked to switch to phylink.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/net/ethernet/mscc/ocelot.c | 177 +++++++++++++++++++++++++++++
 drivers/net/ethernet/mscc/ocelot.h |   2 +-
 2 files changed, 178 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index c8c74aa548d9..f22f5467001e 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -775,10 +775,43 @@ static int ocelot_get_sset_count(struct net_device *dev, int sset)
 	return ocelot->num_stats;
 }
 
+static int ocelot_get_link_ksettings(struct net_device *dev,
+				     struct ethtool_link_ksettings *ks)
+{
+	ethtool_link_ksettings_zero_link_mode(ks, supported);
+	ethtool_link_ksettings_zero_link_mode(ks, advertising);
+
+	ethtool_link_ksettings_add_link_mode(ks, supported, 10baseT_Half);
+	ethtool_link_ksettings_add_link_mode(ks, supported, 10baseT_Full);
+	ethtool_link_ksettings_add_link_mode(ks, supported, 100baseT_Half);
+	ethtool_link_ksettings_add_link_mode(ks, supported, 100baseT_Full);
+	ethtool_link_ksettings_add_link_mode(ks, supported, 1000baseT_Half);
+	ethtool_link_ksettings_add_link_mode(ks, supported, 1000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(ks, supported, 2500baseT_Full);
+	ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+
+	if (!dev->phydev)
+		return 0;
+
+	ethtool_link_ksettings_add_link_mode(ks, advertising, 10baseT_Full);
+	ethtool_link_ksettings_add_link_mode(ks, advertising, 100baseT_Full);
+	ethtool_link_ksettings_add_link_mode(ks, advertising, 1000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+
+	if (!dev->phydev->link)
+		return 0;
+
+	ks->base.speed = dev->phydev->speed;
+	ks->base.duplex = DUPLEX_FULL;
+
+	return 0;
+}
+
 static const struct ethtool_ops ocelot_ethtool_ops = {
 	.get_strings		= ocelot_get_strings,
 	.get_ethtool_stats	= ocelot_get_ethtool_stats,
 	.get_sset_count		= ocelot_get_sset_count,
+	.get_link_ksettings	= ocelot_get_link_ksettings,
 };
 
 static int ocelot_port_attr_get(struct net_device *dev,
@@ -1087,6 +1120,137 @@ static void ocelot_port_bridge_leave(struct ocelot_port *ocelot_port,
 		ocelot->hw_bridge_dev = NULL;
 }
 
+static void ocelot_set_aggr_pgids(struct ocelot *ocelot)
+{
+	int i, port, lag;
+
+	/* Reset destination and aggregation PGIDS */
+	for (port = 0; port < ocelot->num_phys_ports; port++)
+		ocelot_write_rix(ocelot, BIT(port), ANA_PGID_PGID, port);
+
+	for (i = PGID_AGGR; i < PGID_SRC; i++)
+		ocelot_write_rix(ocelot, GENMASK(ocelot->num_phys_ports - 1, 0),
+				 ANA_PGID_PGID, i);
+
+	/* Now, set PGIDs for each LAG */
+	for (lag = 0; lag < ocelot->num_phys_ports; lag++) {
+		unsigned long bond_mask;
+		int aggr_count = 0;
+		u8 aggr_idx[16];
+
+		bond_mask = ocelot->lags[lag];
+		if (!bond_mask)
+			continue;
+
+		for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) {
+			// Destination mask
+			ocelot_write_rix(ocelot, bond_mask,
+					 ANA_PGID_PGID, port);
+			aggr_idx[aggr_count] = port;
+			aggr_count++;
+		}
+
+		for (i = PGID_AGGR; i < PGID_SRC; i++) {
+			u32 ac;
+
+			ac = ocelot_read_rix(ocelot, ANA_PGID_PGID, i);
+			ac &= ~bond_mask;
+			ac |= BIT(aggr_idx[i % aggr_count]);
+			ocelot_write_rix(ocelot, ac, ANA_PGID_PGID, i);
+		}
+	}
+}
+
+static void ocelot_setup_lag(struct ocelot *ocelot, int lag)
+{
+	unsigned long bond_mask = ocelot->lags[lag];
+	unsigned int p;
+
+	for_each_set_bit(p, &bond_mask, ocelot->num_phys_ports) {
+		u32 port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, p);
+
+		port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;
+
+		/* Use lag port as logical port for port i */
+		ocelot_write_gix(ocelot, port_cfg |
+				 ANA_PORT_PORT_CFG_PORTID_VAL(lag),
+				 ANA_PORT_PORT_CFG, p);
+	}
+}
+
+static int ocelot_port_lag_join(struct ocelot_port *ocelot_port,
+				struct net_device *bond)
+{
+	struct ocelot *ocelot = ocelot_port->ocelot;
+	int p = ocelot_port->chip_port;
+	int lag, lp;
+	struct net_device *ndev;
+	u32 bond_mask = 0;
+
+	rcu_read_lock();
+	for_each_netdev_in_bond_rcu(bond, ndev) {
+		struct ocelot_port *port = netdev_priv(ndev);
+
+		bond_mask |= BIT(port->chip_port);
+	}
+	rcu_read_unlock();
+
+	lp = __ffs(bond_mask);
+
+	/* If the new port is the lowest one, use it as the logical port from
+	 * now on
+	 */
+	if (p == lp) {
+		lag = p;
+		ocelot->lags[p] = bond_mask;
+		bond_mask &= ~BIT(p);
+		if (bond_mask) {
+			lp = __ffs(bond_mask);
+			ocelot->lags[lp] = 0;
+		}
+	} else {
+		lag = lp;
+		ocelot->lags[lp] |= BIT(p);
+	}
+
+	ocelot_setup_lag(ocelot, lag);
+	ocelot_set_aggr_pgids(ocelot);
+
+	return 0;
+}
+
+static void ocelot_port_lag_leave(struct ocelot_port *ocelot_port,
+				  struct net_device *bond)
+{
+	struct ocelot *ocelot = ocelot_port->ocelot;
+	int p = ocelot_port->chip_port;
+	u32 port_cfg;
+	int i;
+
+	/* Remove port from any lag */
+	for (i = 0; i < ocelot->num_phys_ports; i++)
+		ocelot->lags[i] &= ~BIT(ocelot_port->chip_port);
+
+	/* if it was the logical port of the lag, move the lag config to the
+	 * next port
+	 */
+	if (ocelot->lags[p]) {
+		int n = __ffs(ocelot->lags[p]);
+
+		ocelot->lags[n] = ocelot->lags[p];
+		ocelot->lags[p] = 0;
+
+		ocelot_setup_lag(ocelot, n);
+	}
+
+	port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, p);
+	port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;
+	ocelot_write_gix(ocelot, port_cfg | ANA_PORT_PORT_CFG_PORTID_VAL(p),
+			 ANA_PORT_PORT_CFG, p);
+
+	ocelot_set_aggr_pgids(ocelot);
+}
+
 /* Checks if the net_device instance given to us originate from our driver. */
 static bool ocelot_netdevice_dev_check(const struct net_device *dev)
 {
@@ -1113,6 +1277,14 @@ static int ocelot_netdevice_port_event(struct net_device *dev,
 				ocelot_port_bridge_leave(ocelot_port,
 							 info->upper_dev);
 		}
+		if (netif_is_lag_master(info->upper_dev)) {
+			if (info->linking)
+				err = ocelot_port_lag_join(ocelot_port,
+							   info->upper_dev);
+			else
+				ocelot_port_lag_leave(ocelot_port,
+						      info->upper_dev);
+		}
 		break;
 	default:
 		break;
@@ -1200,6 +1372,11 @@ int ocelot_init(struct ocelot *ocelot)
 	int i, cpu = ocelot->num_phys_ports;
 	char queue_name[32];
 
+	ocelot->lags = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports,
+				    sizeof(u32), GFP_KERNEL);
+	if (!ocelot->lags)
+		return -ENOMEM;
+
 	ocelot->stats = devm_kcalloc(ocelot->dev,
 				     ocelot->num_phys_ports * ocelot->num_stats,
 				     sizeof(u64), GFP_KERNEL);
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index 097bd12a10d4..616bec30dfa3 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -493,7 +493,7 @@ struct ocelot {
 	u8 num_cpu_ports;
 	struct ocelot_port **ports;
 
-	u16 lags[16];
+	u32 *lags;
 
 	/* Keep track of the vlan port masks */
 	u32 vlan_mask[VLAN_N_VID];
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 0/2] net: mscc; ocelot: add more features
From: Alexandre Belloni @ 2018-05-18  6:41 UTC (permalink / raw)
  To: David S . Miller
  Cc: Allan Nielsen, razvan.stefanescu, po.liu, Thomas Petazzoni,
	Andrew Lunn, Florian Fainelli, netdev, linux-kernel,
	Alexandre Belloni

Hi,

This series adds link aggregation and VLAN filtering hardware offload
support to the ocelot driver.

PTP is also on the list of features but it will probably not be
submitted this cycle.

Alexandre Belloni (1):
  net: mscc: ocelot: add bonding support

Antoine Tenart (1):
  net: mscc: ocelot: add VLAN filtering

 drivers/net/ethernet/mscc/ocelot.c | 462 ++++++++++++++++++++++++++++-
 drivers/net/ethernet/mscc/ocelot.h |   2 +-
 2 files changed, 461 insertions(+), 3 deletions(-)

-- 
2.17.0

^ permalink raw reply

* Re: [patch net-next RFC 04/12] dsa: set devlink port attrs for dsa ports
From: Jiri Pirko @ 2018-05-18  6:37 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Florian Fainelli, netdev, davem, idosch, jakub.kicinski, mlxsw,
	vivien.didelot, michael.chan, ganeshgr, saeedm, simon.horman,
	pieter.jansenvanvuuren, john.hurley, dirk.vandermerwe,
	alexander.h.duyck, ogerlitz, dsahern, vijaya.guvva,
	satananda.burla, raghu.vatsavayi, felix.manlunas, gospo,
	sathya.perla, vasundhara-v.volam, tariqt, eranbe,
	jeffrey.t.kirsher
In-Reply-To: <20180518014137.GB6069@lunn.ch>

Fri, May 18, 2018 at 03:41:37AM CEST, andrew@lunn.ch wrote:
>> >>>         ds = dsa_switch_alloc(&mdiodev->dev, DSA_MAX_PORTS);
>> >>>
>> >>> It is allocating a switch with 12 ports. However only 4 of them have
>> >>> names. So the core only creates slave devices for those 4.
>> >>>
>> >>> This is a useful test. Real hardware often has unused ports. A WiFi AP
>> >>> with a 7 port switch which only uses 6 ports is often seen.
>> >>
>> >> The following patch should fix this:
>> >>
>> >>
>> >> diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
>> >> index adf50fbc4c13..a06c29ec91f0 100644
>> >> --- a/net/dsa/dsa2.c
>> >> +++ b/net/dsa/dsa2.c
>> >> @@ -262,13 +262,14 @@ static int dsa_port_setup(struct dsa_port *dp)
>> >>
>> >>         memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
>> >>
>> >> +       if (dp->type == DSA_PORT_TYPE_UNUSED)
>> >> +               return 0;
>> >> +
>> >>         err = devlink_port_register(ds->devlink, &dp->devlink_port,
>> >> dp->index);
>> > 
>> > Hi Florian, Jiri
>> > 
>> > Maybe it is better to add a devlink port type unused?
>> 
>> The port does not exist on the switch, so it should not even be
>> registered IMHO.
>
>Hi Florian
>
>The ports do exist, when you called dsa_switch_alloc() you said the
>switch has 12 ports.

What benefit does it have to register unused ports? What is a usecase
for them. Like Florian, I also think they should not be registered.


>
>       Andrew

^ permalink raw reply

* Re: Request for -stable inclusion: time stamping fix for nfp
From: Greg KH @ 2018-05-18  6:36 UTC (permalink / raw)
  To: Willy Tarreau; +Cc: David Miller, g.nault, jakub.kicinski, netdev
In-Reply-To: <20180517183202.GB25475@1wt.eu>

On Thu, May 17, 2018 at 08:32:02PM +0200, Willy Tarreau wrote:
> Adding Greg here.
> 
> Greg, apparently a backport of 46f1c52e66db is needed in 4.9 according
> to the thread below. It was merged in 4.13 so 4.14 already has it.

Thanks for the notice, now queued up.

greg k-h

^ permalink raw reply

* [PATCH bpf-next v2 1/7] perf/core: add perf_get_event() to return perf_event given a struct file
From: Yonghong Song @ 2018-05-18  5:32 UTC (permalink / raw)
  To: peterz, ast, daniel, netdev; +Cc: kernel-team

A new extern function, perf_get_event(), is added to return a perf event
given a struct file. This function will be used in later patches.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/perf_event.h | 5 +++++
 kernel/events/core.c       | 8 ++++++++
 2 files changed, 13 insertions(+)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e71e99e..b5c1ad3 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -868,6 +868,7 @@ extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_delayed_put(struct task_struct *task);
 extern struct file *perf_event_get(unsigned int fd);
+extern struct perf_event *perf_get_event(struct file *file);
 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
@@ -1289,6 +1290,10 @@ static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
 static inline void perf_event_delayed_put(struct task_struct *task)	{ }
 static inline struct file *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
+static inline struct perf_event *perf_get_event(struct file *file)
+{
+	return ERR_PTR(-EINVAL);
+}
 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 {
 	return ERR_PTR(-EINVAL);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 67612ce..1e3cddb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11212,6 +11212,14 @@ struct file *perf_event_get(unsigned int fd)
 	return file;
 }
 
+struct perf_event *perf_get_event(struct file *file)
+{
+	if (file->f_op != &perf_fops)
+		return ERR_PTR(-EINVAL);
+
+	return file->private_data;
+}
+
 const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 {
 	if (!event)
-- 
2.9.5

^ permalink raw reply related

* Re: [PATCH v3 1/2] media: rc: introduce BPF_PROG_RAWIR_EVENT
From: Y Song @ 2018-05-18  5:31 UTC (permalink / raw)
  To: Sean Young
  Cc: linux-media, linux-kernel, Alexei Starovoitov,
	Mauro Carvalho Chehab, Daniel Borkmann, netdev, Matthias Reichl,
	Devin Heitmueller
In-Reply-To: <20180517214534.2ipc4q3ru7ykdklf@gofer.mess.org>

On Thu, May 17, 2018 at 2:45 PM, Sean Young <sean@mess.org> wrote:
> Hi,
>
> Again thanks for a thoughtful review. This will definitely will improve
> the code.
>
> On Thu, May 17, 2018 at 10:02:52AM -0700, Y Song wrote:
>> On Wed, May 16, 2018 at 2:04 PM, Sean Young <sean@mess.org> wrote:
>> > Add support for BPF_PROG_RAWIR_EVENT. This type of BPF program can call
>> > rc_keydown() to reported decoded IR scancodes, or rc_repeat() to report
>> > that the last key should be repeated.
>> >
>> > The bpf program can be attached to using the bpf(BPF_PROG_ATTACH) syscall;
>> > the target_fd must be the /dev/lircN device.
>> >
>> > Signed-off-by: Sean Young <sean@mess.org>
>> > ---
>> >  drivers/media/rc/Kconfig           |  13 ++
>> >  drivers/media/rc/Makefile          |   1 +
>> >  drivers/media/rc/bpf-rawir-event.c | 363 +++++++++++++++++++++++++++++
>> >  drivers/media/rc/lirc_dev.c        |  24 ++
>> >  drivers/media/rc/rc-core-priv.h    |  24 ++
>> >  drivers/media/rc/rc-ir-raw.c       |  14 +-
>> >  include/linux/bpf_rcdev.h          |  30 +++
>> >  include/linux/bpf_types.h          |   3 +
>> >  include/uapi/linux/bpf.h           |  55 ++++-
>> >  kernel/bpf/syscall.c               |   7 +
>> >  10 files changed, 531 insertions(+), 3 deletions(-)
>> >  create mode 100644 drivers/media/rc/bpf-rawir-event.c
>> >  create mode 100644 include/linux/bpf_rcdev.h
>> >
>> > diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig
>> > index eb2c3b6eca7f..2172d65b0213 100644
>> > --- a/drivers/media/rc/Kconfig
>> > +++ b/drivers/media/rc/Kconfig
>> > @@ -25,6 +25,19 @@ config LIRC
>> >            passes raw IR to and from userspace, which is needed for
>> >            IR transmitting (aka "blasting") and for the lirc daemon.
>> >
>> > +config BPF_RAWIR_EVENT
>> > +       bool "Support for eBPF programs attached to lirc devices"
>> > +       depends on BPF_SYSCALL
>> > +       depends on RC_CORE=y
>> > +       depends on LIRC
>> > +       help
>> > +          Allow attaching eBPF programs to a lirc device using the bpf(2)
>> > +          syscall command BPF_PROG_ATTACH. This is supported for raw IR
>> > +          receivers.
>> > +
>> > +          These eBPF programs can be used to decode IR into scancodes, for
>> > +          IR protocols not supported by the kernel decoders.
>> > +
>> >  menuconfig RC_DECODERS
>> >         bool "Remote controller decoders"
>> >         depends on RC_CORE
>> > diff --git a/drivers/media/rc/Makefile b/drivers/media/rc/Makefile
>> > index 2e1c87066f6c..74907823bef8 100644
>> > --- a/drivers/media/rc/Makefile
>> > +++ b/drivers/media/rc/Makefile
>> > @@ -5,6 +5,7 @@ obj-y += keymaps/
>> >  obj-$(CONFIG_RC_CORE) += rc-core.o
>> >  rc-core-y := rc-main.o rc-ir-raw.o
>> >  rc-core-$(CONFIG_LIRC) += lirc_dev.o
>> > +rc-core-$(CONFIG_BPF_RAWIR_EVENT) += bpf-rawir-event.o
>> >  obj-$(CONFIG_IR_NEC_DECODER) += ir-nec-decoder.o
>> >  obj-$(CONFIG_IR_RC5_DECODER) += ir-rc5-decoder.o
>> >  obj-$(CONFIG_IR_RC6_DECODER) += ir-rc6-decoder.o
>> > diff --git a/drivers/media/rc/bpf-rawir-event.c b/drivers/media/rc/bpf-rawir-event.c
>> > new file mode 100644
>> > index 000000000000..7cb48b8d87b5
>> > --- /dev/null
>> > +++ b/drivers/media/rc/bpf-rawir-event.c
>> > @@ -0,0 +1,363 @@
>> > +// SPDX-License-Identifier: GPL-2.0
>> > +// bpf-rawir-event.c - handles bpf
>> > +//
>> > +// Copyright (C) 2018 Sean Young <sean@mess.org>
>> > +
>> > +#include <linux/bpf.h>
>> > +#include <linux/filter.h>
>> > +#include <linux/bpf_rcdev.h>
>> > +#include "rc-core-priv.h"
>> > +
>> > +/*
>> > + * BPF interface for raw IR
>> > + */
>> > +const struct bpf_prog_ops rawir_event_prog_ops = {
>> > +};
>> > +
>> > +BPF_CALL_1(bpf_rc_repeat, struct bpf_rawir_event*, event)
>> > +{
>> > +       struct ir_raw_event_ctrl *ctrl;
>> > +
>> > +       ctrl = container_of(event, struct ir_raw_event_ctrl, bpf_rawir_event);
>> > +
>> > +       rc_repeat(ctrl->dev);
>> > +
>> > +       return 0;
>> > +}
>> > +
>> > +static const struct bpf_func_proto rc_repeat_proto = {
>> > +       .func      = bpf_rc_repeat,
>> > +       .gpl_only  = true, /* rc_repeat is EXPORT_SYMBOL_GPL */
>> > +       .ret_type  = RET_INTEGER,
>> > +       .arg1_type = ARG_PTR_TO_CTX,
>> > +};
>> > +
>> > +BPF_CALL_4(bpf_rc_keydown, struct bpf_rawir_event*, event, u32, protocol,
>> > +          u32, scancode, u32, toggle)
>> > +{
>> > +       struct ir_raw_event_ctrl *ctrl;
>> > +
>> > +       ctrl = container_of(event, struct ir_raw_event_ctrl, bpf_rawir_event);
>> > +
>> > +       rc_keydown(ctrl->dev, protocol, scancode, toggle != 0);
>> > +
>> > +       return 0;
>> > +}
>> > +
>> > +static const struct bpf_func_proto rc_keydown_proto = {
>> > +       .func      = bpf_rc_keydown,
>> > +       .gpl_only  = true, /* rc_keydown is EXPORT_SYMBOL_GPL */
>> > +       .ret_type  = RET_INTEGER,
>> > +       .arg1_type = ARG_PTR_TO_CTX,
>> > +       .arg2_type = ARG_ANYTHING,
>> > +       .arg3_type = ARG_ANYTHING,
>> > +       .arg4_type = ARG_ANYTHING,
>> > +};
>> > +
>> > +static const struct bpf_func_proto *
>> > +rawir_event_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>> > +{
>> > +       switch (func_id) {
>> > +       case BPF_FUNC_rc_repeat:
>> > +               return &rc_repeat_proto;
>> > +       case BPF_FUNC_rc_keydown:
>> > +               return &rc_keydown_proto;
>> > +       case BPF_FUNC_map_lookup_elem:
>> > +               return &bpf_map_lookup_elem_proto;
>> > +       case BPF_FUNC_map_update_elem:
>> > +               return &bpf_map_update_elem_proto;
>> > +       case BPF_FUNC_map_delete_elem:
>> > +               return &bpf_map_delete_elem_proto;
>> > +       case BPF_FUNC_ktime_get_ns:
>> > +               return &bpf_ktime_get_ns_proto;
>> > +       case BPF_FUNC_tail_call:
>> > +               return &bpf_tail_call_proto;
>> > +       case BPF_FUNC_get_prandom_u32:
>> > +               return &bpf_get_prandom_u32_proto;
>> > +       case BPF_FUNC_trace_printk:
>> > +               if (capable(CAP_SYS_ADMIN))
>> > +                       return bpf_get_trace_printk_proto();
>> > +               /* fall through */
>> > +       default:
>> > +               return NULL;
>> > +       }
>> > +}
>> > +
>> > +static bool rawir_event_is_valid_access(int off, int size,
>> > +                                       enum bpf_access_type type,
>> > +                                       const struct bpf_prog *prog,
>> > +                                       struct bpf_insn_access_aux *info)
>> > +{
>> > +       /* struct bpf_rawir_event has two u32 fields */
>> > +       if (type == BPF_WRITE)
>> > +               return false;
>> > +
>> > +       if (size != sizeof(__u32))
>> > +               return false;
>> > +
>> > +       if (!(off == offsetof(struct bpf_rawir_event, duration) ||
>> > +             off == offsetof(struct bpf_rawir_event, type)))
>> > +               return false;
>> > +
>> > +       return true;
>> > +}
>> > +
>> > +const struct bpf_verifier_ops rawir_event_verifier_ops = {
>> > +       .get_func_proto  = rawir_event_func_proto,
>> > +       .is_valid_access = rawir_event_is_valid_access
>> > +};
>> > +
>> > +#define BPF_MAX_PROGS 64
>> > +
>> > +static int rc_dev_bpf_attach(struct rc_dev *rcdev, struct bpf_prog *prog)
>> > +{
>> > +       struct ir_raw_event_ctrl *raw;
>> > +       struct bpf_prog_list *pl;
>> > +       int ret, size;
>> > +
>> > +       if (rcdev->driver_type != RC_DRIVER_IR_RAW)
>> > +               return -EINVAL;
>> > +
>> > +       ret = mutex_lock_interruptible(&ir_raw_handler_lock);
>> > +       if (ret)
>> > +               return ret;
>> > +
>> > +       raw = rcdev->raw;
>> > +       if (!raw) {
>> > +               ret = -ENODEV;
>> > +               goto out;
>> > +       }
>> > +
>> > +       size = 0;
>> > +       list_for_each_entry(pl, &raw->progs, node) {
>> > +               if (pl->prog == prog) {
>> > +                       ret = -EEXIST;
>> > +                       goto out;
>> > +               }
>> > +
>> > +               size++;
>> > +       }
>> > +
>> > +       if (size >= BPF_MAX_PROGS) {
>> > +               ret = -E2BIG;
>> > +               goto out;
>> > +       }
>> > +
>> > +       pl = kmalloc(sizeof(*pl), GFP_KERNEL);
>> > +       if (!pl) {
>> > +               ret = -ENOMEM;
>> > +               goto out;
>> > +       }
>> > +
>> > +       pl->prog = prog;
>> > +       list_add(&pl->node, &raw->progs);
>> > +out:
>> > +       mutex_unlock(&ir_raw_handler_lock);
>> > +       return ret;
>> > +}
>> > +
>> > +static int rc_dev_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog)
>> > +{
>> > +       struct ir_raw_event_ctrl *raw;
>> > +       struct bpf_prog_list *pl, *tmp;
>> > +       int ret;
>> > +
>> > +       if (rcdev->driver_type != RC_DRIVER_IR_RAW)
>> > +               return -EINVAL;
>> > +
>> > +       ret = mutex_lock_interruptible(&ir_raw_handler_lock);
>> > +       if (ret)
>> > +               return ret;
>> > +
>> > +       raw = rcdev->raw;
>> > +       if (!raw) {
>> > +               ret = -ENODEV;
>> > +               goto out;
>> > +       }
>> > +
>> > +       ret = -ENOENT;
>> > +
>> > +       list_for_each_entry_safe(pl, tmp, &raw->progs, node) {
>> > +               if (pl->prog == prog) {
>> > +                       list_del(&pl->node);
>> > +                       kfree(pl);
>> > +                       bpf_prog_put(prog);
>> > +                       ret = 0;
>> > +                       goto out;
>> > +               }
>> > +       }
>> > +out:
>> > +       mutex_unlock(&ir_raw_handler_lock);
>> > +       return ret;
>> > +}
>> > +
>> > +void rc_dev_bpf_init(struct rc_dev *rcdev)
>> > +{
>> > +       INIT_LIST_HEAD(&rcdev->raw->progs);
>> > +}
>> > +
>> > +void rc_dev_bpf_run(struct rc_dev *rcdev, struct ir_raw_event ev)
>> > +{
>> > +       struct ir_raw_event_ctrl *raw = rcdev->raw;
>> > +       struct bpf_prog_list *pl;
>> > +
>> > +       if (list_empty(&raw->progs))
>> > +               return;
>> > +
>> > +       if (unlikely(ev.carrier_report)) {
>> > +               raw->bpf_rawir_event.carrier = ev.carrier;
>> > +               raw->bpf_rawir_event.type = BPF_RAWIR_EVENT_CARRIER;
>> > +       } else {
>> > +               raw->bpf_rawir_event.duration = ev.duration;
>> > +
>> > +               if (ev.pulse)
>> > +                       raw->bpf_rawir_event.type = BPF_RAWIR_EVENT_PULSE;
>> > +               else if (ev.timeout)
>> > +                       raw->bpf_rawir_event.type = BPF_RAWIR_EVENT_TIMEOUT;
>> > +               else if (ev.reset)
>> > +                       raw->bpf_rawir_event.type = BPF_RAWIR_EVENT_RESET;
>> > +               else
>> > +                       raw->bpf_rawir_event.type = BPF_RAWIR_EVENT_SPACE;
>> > +       }
>> > +
>> > +       list_for_each_entry(pl, &raw->progs, node)
>> > +               BPF_PROG_RUN(pl->prog, &raw->bpf_rawir_event);
>>
>> Is the raw->progs protected by locks? It is possible that attaching/detaching
>> could manipulate raw->progs at the same time? In perf/cgroup prog array
>> case, the prog array run is protected by rcu and the dummy prog idea is
>> to avoid the prog is skipped by reshuffling.
>
> Yes, it is. The caller takes the appropriate lock. These functions could do
> with some comments.
>
>> Also, the original idea about using prog array is the least overhead since you
>> want to BPF programs to execute as soon as possible.
>
> Now, for our purposes the we're not bothered by a few milliseconds delay,
> so I would pick whatever uses less cpu/memory overall. There is some overhead
> in using rcu but the array is nice since it uses less memory than the
> double-linked list, and less cache misses.
>
> So I wanted bpf_prog_detach() to return -ENOENT if the prog was not in the list,
> however bpf_prog_array_copy() and bpf_prog_array_delete_safe() do not return
> anything useful for that.
>
> Maybe I should look at adding a count-delta return to bpf_prog_array_copy(),
> and a bool return to bpf_prog_array_delete_safe(). Or I could scan the array
> an extra time to see if it is present. Any other ideas?

The detaching is not performance critical. Either of these methods is fine.
A bool return to bpf_prog_array_delete_safe() seems easy to make a change.

>
> Also, BPF_F_OVERRIDE is not relevant for this but possibly BPF_F_ALLOW_MULTI
> could invoke the same behaviour as for cgroups. What do you think?

Looks like you already allow multiple programs attaching to the same device?
So by default you already implied supporting BPF_F_ALLOW_MULTI, but you
just did not specify it. I think this is fine. perf event also
supports program array
by default and users do not need to specify flags.
Or I misunderstood your question?

^ permalink raw reply

* [RFC PATCH net-next] tcp: tcp_rack_reo_wnd() can be static
From: kbuild test robot @ 2018-05-18  5:14 UTC (permalink / raw)
  To: Yuchung Cheng
  Cc: kbuild-all, netdev, Neal Cardwell, Eric Dumazet,
	Soheil Hassas Yeganeh, Priyaranjan Jha, Alexey Kuznetsov,
	Hideaki YOSHIFUJI, linux-kernel
In-Reply-To: <201805181300.JN7KwXNm%fengguang.wu@intel.com>


Fixes: 20b654dfe1be ("tcp: support DUPACK threshold in RACK")
Signed-off-by: kbuild test robot <fengguang.wu@intel.com>
---
 tcp_recovery.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 30cbfb6..71593e4 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -21,7 +21,7 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
 	return t1 > t2 || (t1 == t2 && after(seq1, seq2));
 }
 
-u32 tcp_rack_reo_wnd(const struct sock *sk)
+static u32 tcp_rack_reo_wnd(const struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 

^ permalink raw reply related

* [net-next:master 1200/1233] net/ipv4/tcp_recovery.c:24:5: sparse: symbol 'tcp_rack_reo_wnd' was not declared. Should it be static?
From: kbuild test robot @ 2018-05-18  5:14 UTC (permalink / raw)
  To: Yuchung Cheng
  Cc: kbuild-all, netdev, Neal Cardwell, Eric Dumazet,
	Soheil Hassas Yeganeh, Priyaranjan Jha, Alexey Kuznetsov,
	Hideaki YOSHIFUJI, linux-kernel

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git master
head:   538e2de104cfb4ef1acb35af42427bff42adbe4d
commit: 20b654dfe1beaca60ab51894ff405a049248433d [1200/1233] tcp: support DUPACK threshold in RACK
reproduce:
        # apt-get install sparse
        git checkout 20b654dfe1beaca60ab51894ff405a049248433d
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

   net/ipv4/tcp_recovery.c:46:16: sparse: expression using sizeof(void)
   net/ipv4/tcp_recovery.c:46:16: sparse: expression using sizeof(void)
>> net/ipv4/tcp_recovery.c:24:5: sparse: symbol 'tcp_rack_reo_wnd' was not declared. Should it be static?
   include/net/tcp.h:738:16: sparse: expression using sizeof(void)
   net/ipv4/tcp_recovery.c:102:40: sparse: expression using sizeof(void)
   net/ipv4/tcp_recovery.c:102:40: sparse: expression using sizeof(void)
   include/net/tcp.h:738:16: sparse: expression using sizeof(void)
   net/ipv4/tcp_recovery.c:210:42: sparse: expression using sizeof(void)

Please review and possibly fold the followup patch.

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply

* [PATCH bpf-next v2 0/7] bpf: implement BPF_TASK_FD_QUERY
From: Yonghong Song @ 2018-05-18  5:03 UTC (permalink / raw)
  To: peterz, ast, daniel, netdev; +Cc: kernel-team

Currently, suppose a userspace application has loaded a bpf program
and attached it to a tracepoint/kprobe/uprobe, and a bpf
introspection tool, e.g., bpftool, wants to show which bpf program
is attached to which tracepoint/kprobe/uprobe. Such attachment
information will be really useful to understand the overall bpf
deployment in the system.

There is a name field (16 bytes) for each program, which could
be used to encode the attachment point. There are some drawbacks
for this approaches. First, bpftool user (e.g., an admin) may not
really understand the association between the name and the
attachment point. Second, if one program is attached to multiple
places, encoding a proper name which can imply all these
attachments becomes difficult.

This patch introduces a new bpf subcommand BPF_TASK_FD_QUERY.
Given a pid and fd, this command will return bpf related information
to user space. Right now it only supports tracepoint/kprobe/uprobe
perf event fd's. For such a fd, BPF_TASK_FD_QUERY will return
   . prog_id
   . tracepoint name, or
   . k[ret]probe funcname + offset or kernel addr, or
   . u[ret]probe filename + offset
to the userspace.
The user can use "bpftool prog" to find more information about
bpf program itself with prog_id.

Patch #1 adds function perf_get_event() in kernel/events/core.c.
Patch #2 implements the bpf subcommand BPF_TASK_FD_QUERY.
Patch #3 syncs tools bpf.h header and also add bpf_task_fd_query()
in the libbpf library for samples/selftests/bpftool to use.
Patch #4 adds ksym_get_addr() utility function.
Patch #5 add a test in samples/bpf for querying k[ret]probes and
u[ret]probes.
Patch #6 add a test in tools/testing/selftests/bpf for querying
raw_tracepoint and tracepoint.
Patch #7 add a new subcommand "perf" to bpftool.

Changelogs:
  v1 -> v2:
     . changed bpf subcommand name from BPF_PERF_EVENT_QUERY
       to BPF_TASK_FD_QUERY.
     . fixed various "bpftool perf" issues and added documentation
       and auto-completion.

Yonghong Song (7):
  perf/core: add perf_get_event() to return perf_event given a struct
    file
  bpf: introduce bpf subcommand BPF_TASK_FD_QUERY
  tools/bpf: sync kernel header bpf.h and add bpf_trace_event_query in
    libbpf
  tools/bpf: add ksym_get_addr() in trace_helpers
  samples/bpf: add a samples/bpf test for BPF_TASK_FD_QUERY
  tools/bpf: add two BPF_TASK_FD_QUERY tests in test_progs
  tools/bpftool: add perf subcommand

 include/linux/perf_event.h                       |   5 +
 include/linux/trace_events.h                     |  15 +
 include/uapi/linux/bpf.h                         |  27 ++
 kernel/bpf/syscall.c                             | 124 ++++++++
 kernel/events/core.c                             |   8 +
 kernel/trace/bpf_trace.c                         |  48 +++
 kernel/trace/trace_kprobe.c                      |  29 ++
 kernel/trace/trace_uprobe.c                      |  22 ++
 samples/bpf/Makefile                             |   4 +
 samples/bpf/task_fd_query_kern.c                 |  19 ++
 samples/bpf/task_fd_query_user.c                 | 379 +++++++++++++++++++++++
 tools/bpf/bpftool/Documentation/bpftool-perf.rst |  81 +++++
 tools/bpf/bpftool/Documentation/bpftool.rst      |   5 +-
 tools/bpf/bpftool/bash-completion/bpftool        |   9 +
 tools/bpf/bpftool/main.c                         |   3 +-
 tools/bpf/bpftool/main.h                         |   1 +
 tools/bpf/bpftool/perf.c                         | 200 ++++++++++++
 tools/include/uapi/linux/bpf.h                   |  27 ++
 tools/lib/bpf/bpf.c                              |  24 ++
 tools/lib/bpf/bpf.h                              |   3 +
 tools/testing/selftests/bpf/test_progs.c         | 133 ++++++++
 tools/testing/selftests/bpf/trace_helpers.c      |  12 +
 tools/testing/selftests/bpf/trace_helpers.h      |   1 +
 23 files changed, 1177 insertions(+), 2 deletions(-)
 create mode 100644 samples/bpf/task_fd_query_kern.c
 create mode 100644 samples/bpf/task_fd_query_user.c
 create mode 100644 tools/bpf/bpftool/Documentation/bpftool-perf.rst
 create mode 100644 tools/bpf/bpftool/perf.c

-- 
2.9.5

^ permalink raw reply

* [PATCH bpf-next v2 7/7] tools/bpftool: add perf subcommand
From: Yonghong Song @ 2018-05-18  5:03 UTC (permalink / raw)
  To: peterz, ast, daniel, netdev; +Cc: kernel-team
In-Reply-To: <20180518050310.2814608-1-yhs@fb.com>

The new command "bpftool perf [show | list]" will traverse
all processes under /proc, and if any fd is associated
with a perf event, it will print out related perf event
information. Documentation is also added.

Below is an example to show the results using bcc commands.
Running the following 4 bcc commands:
  kprobe:     trace.py '__x64_sys_nanosleep'
  kretprobe:  trace.py 'r::__x64_sys_nanosleep'
  tracepoint: trace.py 't:syscalls:sys_enter_nanosleep'
  uprobe:     trace.py 'p:/home/yhs/a.out:main'

The bpftool command line and result:

  $ bpftool perf
  pid 21711  fd 5: prog_id 5  kprobe  func __x64_sys_write  offset 0
  pid 21765  fd 5: prog_id 7  kretprobe  func __x64_sys_nanosleep  offset 0
  pid 21767  fd 5: prog_id 8  tracepoint  sys_enter_nanosleep
  pid 21800  fd 5: prog_id 9  uprobe  filename /home/yhs/a.out  offset 1159

  $ bpftool -j perf
  {"pid":21711,"fd":5,"prog_id":5,"attach_info":"kprobe","func":"__x64_sys_write","offset":0}, \
  {"pid":21765,"fd":5,"prog_id":7,"attach_info":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
  {"pid":21767,"fd":5,"prog_id":8,"attach_info":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
  {"pid":21800,"fd":5,"prog_id":9,"attach_info":"uprobe","filename":"/home/yhs/a.out","offset":1159}

  $ bpftool prog
  5: kprobe  name probe___x64_sys  tag e495a0c82f2c7a8d  gpl
	  loaded_at 2018-05-15T04:46:37-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 4
  7: kprobe  name probe___x64_sys  tag f2fdee479a503abf  gpl
	  loaded_at 2018-05-15T04:48:32-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 7
  8: tracepoint  name tracepoint__sys  tag 5390badef2395fcf  gpl
	  loaded_at 2018-05-15T04:48:48-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 8
  9: kprobe  name probe_main_1  tag 0a87bdc2e2953b6d  gpl
	  loaded_at 2018-05-15T04:49:52-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 9

  $ ps ax | grep "python ./trace.py"
  21711 pts/0    T      0:03 python ./trace.py __x64_sys_write
  21765 pts/0    S+     0:00 python ./trace.py r::__x64_sys_nanosleep
  21767 pts/2    S+     0:00 python ./trace.py t:syscalls:sys_enter_nanosleep
  21800 pts/3    S+     0:00 python ./trace.py p:/home/yhs/a.out:main
  22374 pts/1    S+     0:00 grep --color=auto python ./trace.py

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/bpf/bpftool/Documentation/bpftool-perf.rst |  81 +++++++++
 tools/bpf/bpftool/Documentation/bpftool.rst      |   5 +-
 tools/bpf/bpftool/bash-completion/bpftool        |   9 +
 tools/bpf/bpftool/main.c                         |   3 +-
 tools/bpf/bpftool/main.h                         |   1 +
 tools/bpf/bpftool/perf.c                         | 200 +++++++++++++++++++++++
 6 files changed, 297 insertions(+), 2 deletions(-)
 create mode 100644 tools/bpf/bpftool/Documentation/bpftool-perf.rst
 create mode 100644 tools/bpf/bpftool/perf.c

diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
new file mode 100644
index 0000000..3e65375
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -0,0 +1,81 @@
+================
+bpftool-perf
+================
+-------------------------------------------------------------------------------
+tool for inspection of perf related bpf prog attachments
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+	**bpftool** [*OPTIONS*] **perf** *COMMAND*
+
+	*OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+
+	*COMMANDS* :=
+	{ **show** | **list** | **help** }
+
+PERF COMMANDS
+=============
+
+|	**bpftool** **perf { show | list }**
+|	**bpftool** **perf help**
+
+DESCRIPTION
+===========
+	**bpftool perf { show | list }**
+		  List all raw_tracepoint, tracepoint, kprobe attachment in the system.
+
+		  Output will start with process id and file descriptor in that process,
+		  followed by bpf program id, attachment information, and attachment point.
+		  The attachment point for raw_tracepoint/tracepoint is the trace probe name.
+		  The attachment point for k[ret]probe is either symbol name and offset,
+		  or a kernel virtual address.
+		  The attachment point for u[ret]probe is the file name and the file offset.
+
+	**bpftool perf help**
+		  Print short help message.
+
+OPTIONS
+=======
+	-h, --help
+		  Print short generic help message (similar to **bpftool help**).
+
+	-v, --version
+		  Print version number (similar to **bpftool version**).
+
+	-j, --json
+		  Generate JSON output. For commands that cannot produce JSON, this
+		  option has no effect.
+
+	-p, --pretty
+		  Generate human-readable JSON output. Implies **-j**.
+
+EXAMPLES
+========
+
+| **# bpftool perf**
+
+::
+
+      pid 21711  fd 5: prog_id 5  kprobe  func __x64_sys_write  offset 0
+      pid 21765  fd 5: prog_id 7  kretprobe  func __x64_sys_nanosleep  offset 0
+      pid 21767  fd 5: prog_id 8  tracepoint  sys_enter_nanosleep
+      pid 21800  fd 5: prog_id 9  uprobe  filename /home/yhs/a.out  offset 1159
+
+|
+| **# bpftool -j perf**
+
+::
+
+    [{"pid":21711,"fd":5,"prog_id":5,"attach_info":"kprobe","func":"__x64_sys_write","offset":0}, \
+     {"pid":21765,"fd":5,"prog_id":7,"attach_info":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
+     {"pid":21767,"fd":5,"prog_id":8,"attach_info":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
+     {"pid":21800,"fd":5,"prog_id":9,"attach_info":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
+
+
+SEE ALSO
+========
+	**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 564cb0d..b6f5d56 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -16,7 +16,7 @@ SYNOPSIS
 
 	**bpftool** **version**
 
-	*OBJECT* := { **map** | **program** | **cgroup** }
+	*OBJECT* := { **map** | **program** | **cgroup** | **perf** }
 
 	*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
 	| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
@@ -30,6 +30,8 @@ SYNOPSIS
 
 	*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
 
+	*PERF-COMMANDS* := { **show** | **list** | **help** }
+
 DESCRIPTION
 ===========
 	*bpftool* allows for inspection and simple modification of BPF objects
@@ -56,3 +58,4 @@ OPTIONS
 SEE ALSO
 ========
 	**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
+        **bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index b301c9b..3680ad4 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -448,6 +448,15 @@ _bpftool()
                     ;;
             esac
             ;;
+        cgroup)
+            case $command in
+                *)
+                    [[ $prev == $object ]] && \
+                        COMPREPLY=( $( compgen -W 'help \
+                            show list' -- "$cur" ) )
+                    ;;
+            esac
+            ;;
     esac
 } &&
 complete -F _bpftool bpftool
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 1ec852d..eea7f14 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -87,7 +87,7 @@ static int do_help(int argc, char **argv)
 		"       %s batch file FILE\n"
 		"       %s version\n"
 		"\n"
-		"       OBJECT := { prog | map | cgroup }\n"
+		"       OBJECT := { prog | map | cgroup | perf }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, bin_name, bin_name);
@@ -216,6 +216,7 @@ static const struct cmd cmds[] = {
 	{ "prog",	do_prog },
 	{ "map",	do_map },
 	{ "cgroup",	do_cgroup },
+	{ "perf",	do_perf },
 	{ "version",	do_version },
 	{ 0 }
 };
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 6173cd9..63fdb31 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -119,6 +119,7 @@ int do_prog(int argc, char **arg);
 int do_map(int argc, char **arg);
 int do_event_pipe(int argc, char **argv);
 int do_cgroup(int argc, char **arg);
+int do_perf(int argc, char **arg);
 
 int prog_parse_fd(int *argc, char ***argv);
 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
new file mode 100644
index 0000000..baa4bbe
--- /dev/null
+++ b/tools/bpf/bpftool/perf.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+// Author: Yonghong Song <yhs@fb.com>
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <ftw.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 attach_info,
+			    char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+	jsonw_start_object(json_wtr);
+	jsonw_int_field(json_wtr, "pid", pid);
+	jsonw_int_field(json_wtr, "fd", fd);
+	jsonw_uint_field(json_wtr, "prog_id", prog_id);
+	switch (attach_info) {
+	case BPF_ATTACH_RAW_TRACEPOINT:
+		jsonw_string_field(json_wtr, "attach_info", "raw_tracepoint");
+		jsonw_string_field(json_wtr, "tracepoint", buf);
+		break;
+	case BPF_ATTACH_TRACEPOINT:
+		jsonw_string_field(json_wtr, "attach_info", "tracepoint");
+		jsonw_string_field(json_wtr, "tracepoint", buf);
+		break;
+	case BPF_ATTACH_KPROBE:
+		jsonw_string_field(json_wtr, "attach_info", "kprobe");
+		if (buf[0] != '\0') {
+			jsonw_string_field(json_wtr, "func", buf);
+			jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		} else {
+			jsonw_lluint_field(json_wtr, "addr", probe_addr);
+		}
+		break;
+	case BPF_ATTACH_KRETPROBE:
+		jsonw_string_field(json_wtr, "attach_info", "kretprobe");
+		if (buf[0] != '\0') {
+			jsonw_string_field(json_wtr, "func", buf);
+			jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		} else {
+			jsonw_lluint_field(json_wtr, "addr", probe_addr);
+		}
+		break;
+	case BPF_ATTACH_UPROBE:
+		jsonw_string_field(json_wtr, "attach_info", "uprobe");
+		jsonw_string_field(json_wtr, "filename", buf);
+		jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		break;
+	case BPF_ATTACH_URETPROBE:
+		jsonw_string_field(json_wtr, "attach_info", "uretprobe");
+		jsonw_string_field(json_wtr, "filename", buf);
+		jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		break;
+	}
+	jsonw_end_object(json_wtr);
+}
+
+static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 attach_info,
+			    char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+	printf("pid %d  fd %d: prog_id %u  ", pid, fd, prog_id);
+	switch (attach_info) {
+	case BPF_ATTACH_RAW_TRACEPOINT:
+		printf("raw_tracepoint  %s\n", buf);
+		break;
+	case BPF_ATTACH_TRACEPOINT:
+		printf("tracepoint  %s\n", buf);
+		break;
+	case BPF_ATTACH_KPROBE:
+		if (buf[0] != '\0')
+			printf("kprobe  func %s  offset %llu\n", buf,
+			       probe_offset);
+		else
+			printf("kprobe  addr %llu\n", probe_addr);
+		break;
+	case BPF_ATTACH_KRETPROBE:
+		if (buf[0] != '\0')
+			printf("kretprobe  func %s  offset %llu\n", buf,
+			       probe_offset);
+		else
+			printf("kretprobe  addr %llu\n", probe_addr);
+		break;
+	case BPF_ATTACH_UPROBE:
+		printf("uprobe  filename %s  offset %llu\n", buf, probe_offset);
+		break;
+	case BPF_ATTACH_URETPROBE:
+		printf("uretprobe  filename %s  offset %llu\n", buf,
+		       probe_offset);
+		break;
+	}
+}
+
+static int show_proc(const char *fpath, const struct stat *sb,
+		     int tflag, struct FTW *ftwbuf)
+{
+	__u64 probe_offset, probe_addr;
+	__u32 prog_id, attach_info;
+	int err, pid = 0, fd = 0;
+	const char *pch;
+	char buf[4096];
+
+	/* prefix always /proc */
+	pch = fpath + 5;
+	if (*pch == '\0')
+		return 0;
+
+	/* pid should be all numbers */
+	pch++;
+	while (isdigit(*pch)) {
+		pid = pid * 10 + *pch - '0';
+		pch++;
+	}
+	if (*pch == '\0')
+		return 0;
+	if (*pch != '/')
+		return FTW_SKIP_SUBTREE;
+
+	/* check /proc/<pid>/fd directory */
+	pch++;
+	if (strncmp(pch, "fd", 2))
+		return FTW_SKIP_SUBTREE;
+	pch += 2;
+	if (*pch == '\0')
+		return 0;
+	if (*pch != '/')
+		return FTW_SKIP_SUBTREE;
+
+	/* check /proc/<pid>/fd/<fd_num> */
+	pch++;
+	while (isdigit(*pch)) {
+		fd = fd * 10 + *pch - '0';
+		pch++;
+	}
+	if (*pch != '\0')
+		return FTW_SKIP_SUBTREE;
+
+	/* query (pid, fd) for potential perf events */
+	err = bpf_task_fd_query(pid, fd, 0, buf, sizeof(buf), &prog_id,
+				&attach_info, &probe_offset, &probe_addr);
+	if (err < 0)
+		return 0;
+
+	if (json_output)
+		print_perf_json(pid, fd, prog_id, attach_info, buf, probe_offset,
+				probe_addr);
+	else
+		print_perf_plain(pid, fd, prog_id, attach_info, buf, probe_offset,
+				 probe_addr);
+
+	return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+	int err = 0, nopenfd = 16;
+	int flags = FTW_ACTIONRETVAL | FTW_PHYS;
+
+	if (json_output)
+		jsonw_start_array(json_wtr);
+	if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
+		p_err("%s", strerror(errno));
+		err = -1;
+	}
+	if (json_output)
+		jsonw_end_array(json_wtr);
+
+	return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+	fprintf(stderr,
+		"Usage: %s %s { show | list | help }\n"
+		"",
+		bin_name, argv[-2]);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "show",	do_show },
+	{ "list",	do_show },
+	{ "help",	do_help },
+	{ 0 }
+};
+
+int do_perf(int argc, char **argv)
+{
+	return cmd_select(cmds, argc, argv, do_help);
+}
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next v2 3/7] tools/bpf: sync kernel header bpf.h and add bpf_trace_event_query in libbpf
From: Yonghong Song @ 2018-05-18  5:03 UTC (permalink / raw)
  To: peterz, ast, daniel, netdev; +Cc: kernel-team
In-Reply-To: <20180518050310.2814608-1-yhs@fb.com>

Sync kernel header bpf.h to tools/include/uapi/linux/bpf.h and
implement bpf_trace_event_query() in libbpf. The test programs
in samples/bpf and tools/testing/selftests/bpf, and later bpftool
will use this libbpf function to query kernel.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/include/uapi/linux/bpf.h | 27 +++++++++++++++++++++++++++
 tools/lib/bpf/bpf.c            | 24 ++++++++++++++++++++++++
 tools/lib/bpf/bpf.h            |  3 +++
 3 files changed, 54 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d94d333..6a22ad4 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -97,6 +97,7 @@ enum bpf_cmd {
 	BPF_RAW_TRACEPOINT_OPEN,
 	BPF_BTF_LOAD,
 	BPF_BTF_GET_FD_BY_ID,
+	BPF_TASK_FD_QUERY,
 };
 
 enum bpf_map_type {
@@ -379,6 +380,22 @@ union bpf_attr {
 		__u32		btf_log_size;
 		__u32		btf_log_level;
 	};
+
+	struct {
+		int		pid;		/* input: pid */
+		int		fd;		/* input: fd */
+		__u32		flags;		/* input: flags */
+		__u32		buf_len;	/* input: buf len */
+		__aligned_u64	buf;		/* input/output:
+						 *   tp_name for tracepoint
+						 *   symbol for kprobe
+						 *   filename for uprobe
+						 */
+		__u32		prog_id;	/* output: prod_id */
+		__u32		attach_info;	/* output: BPF_ATTACH_* */
+		__u64		probe_offset;	/* output: probe_offset */
+		__u64		probe_addr;	/* output: probe_addr */
+	} task_fd_query;
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
@@ -2450,4 +2467,14 @@ struct bpf_fib_lookup {
 	__u8	dmac[6];     /* ETH_ALEN */
 };
 
+/* used by <task, fd> based query */
+enum {
+	BPF_ATTACH_RAW_TRACEPOINT,	/* tp name */
+	BPF_ATTACH_TRACEPOINT,		/* tp name */
+	BPF_ATTACH_KPROBE,		/* (symbol + offset) or addr */
+	BPF_ATTACH_KRETPROBE,		/* (symbol + offset) or addr */
+	BPF_ATTACH_UPROBE,		/* filename + offset */
+	BPF_ATTACH_URETPROBE,		/* filename + offset */
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 6a8a000..da3f336 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -643,3 +643,27 @@ int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
 
 	return fd;
 }
+
+int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 buf_len,
+		      __u32 *prog_id, __u32 *attach_info,
+		      __u64 *probe_offset, __u64 *probe_addr)
+{
+	union bpf_attr attr = {};
+	int err;
+
+	attr.task_fd_query.pid = pid;
+	attr.task_fd_query.fd = fd;
+	attr.task_fd_query.flags = flags;
+	attr.task_fd_query.buf = ptr_to_u64(buf);
+	attr.task_fd_query.buf_len = buf_len;
+
+	err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+	if (!err) {
+		*prog_id = attr.task_fd_query.prog_id;
+		*attach_info = attr.task_fd_query.attach_info;
+		*probe_offset = attr.task_fd_query.probe_offset;
+		*probe_addr = attr.task_fd_query.probe_addr;
+	}
+
+	return err;
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 15bff77..9adfde6 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -107,4 +107,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
 int bpf_raw_tracepoint_open(const char *name, int prog_fd);
 int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
 		 bool do_log);
+int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 buf_len,
+		      __u32 *prog_id, __u32 *prog_info,
+		      __u64 *probe_offset, __u64 *probe_addr);
 #endif
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next v2 4/7] tools/bpf: add ksym_get_addr() in trace_helpers
From: Yonghong Song @ 2018-05-18  5:03 UTC (permalink / raw)
  To: peterz, ast, daniel, netdev; +Cc: kernel-team
In-Reply-To: <20180518050310.2814608-1-yhs@fb.com>

Given a kernel function name, ksym_get_addr() will return the kernel
address for this function, or 0 if it cannot find this function name
in /proc/kallsyms. This function will be used later when a kernel
address is used to initiate a kprobe perf event.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/testing/selftests/bpf/trace_helpers.c | 12 ++++++++++++
 tools/testing/selftests/bpf/trace_helpers.h |  1 +
 2 files changed, 13 insertions(+)

diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 8fb4fe8..3868dcb 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -72,6 +72,18 @@ struct ksym *ksym_search(long key)
 	return &syms[0];
 }
 
+long ksym_get_addr(const char *name)
+{
+	int i;
+
+	for (i = 0; i < sym_cnt; i++) {
+		if (strcmp(syms[i].name, name) == 0)
+			return syms[i].addr;
+	}
+
+	return 0;
+}
+
 static int page_size;
 static int page_cnt = 8;
 static struct perf_event_mmap_page *header;
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 36d90e3..3b4bcf7 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -11,6 +11,7 @@ struct ksym {
 
 int load_kallsyms(void);
 struct ksym *ksym_search(long key);
+long ksym_get_addr(const char *name);
 
 typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
 
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next v2 2/7] bpf: introduce bpf subcommand BPF_TASK_FD_QUERY
From: Yonghong Song @ 2018-05-18  5:03 UTC (permalink / raw)
  To: peterz, ast, daniel, netdev; +Cc: kernel-team
In-Reply-To: <20180518050310.2814608-1-yhs@fb.com>

Currently, suppose a userspace application has loaded a bpf program
and attached it to a tracepoint/kprobe/uprobe, and a bpf
introspection tool, e.g., bpftool, wants to show which bpf program
is attached to which tracepoint/kprobe/uprobe. Such attachment
information will be really useful to understand the overall bpf
deployment in the system.

There is a name field (16 bytes) for each program, which could
be used to encode the attachment point. There are some drawbacks
for this approaches. First, bpftool user (e.g., an admin) may not
really understand the association between the name and the
attachment point. Second, if one program is attached to multiple
places, encoding a proper name which can imply all these
attachments becomes difficult.

This patch introduces a new bpf subcommand BPF_TASK_FD_QUERY.
Given a pid and fd, if the <pid, fd> is associated with a
tracepoint/kprobe/uprobe perf event, BPF_TASK_FD_QUERY will return
   . prog_id
   . tracepoint name, or
   . k[ret]probe funcname + offset or kernel addr, or
   . u[ret]probe filename + offset
to the userspace.
The user can use "bpftool prog" to find more information about
bpf program itself with prog_id.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/trace_events.h |  15 ++++++
 include/uapi/linux/bpf.h     |  27 ++++++++++
 kernel/bpf/syscall.c         | 124 +++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.c     |  48 +++++++++++++++++
 kernel/trace/trace_kprobe.c  |  29 ++++++++++
 kernel/trace/trace_uprobe.c  |  22 ++++++++
 6 files changed, 265 insertions(+)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2bde3ef..bd08e11 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info);
 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
 struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
+int bpf_get_perf_event_info(struct perf_event *event, u32 *prog_id,
+			    u32 *attach_info, const char **buf,
+			    u64 *probe_offset, u64 *probe_addr);
 #else
 static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
 {
@@ -504,6 +507,12 @@ static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name
 {
 	return NULL;
 }
+static inline int bpf_get_perf_event_info(struct file *file, u32 *prog_id,
+					  u32 *attach_info, const char **buf,
+					  u64 *probe_offset, u64 *probe_addr)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 enum {
@@ -560,10 +569,16 @@ extern void perf_trace_del(struct perf_event *event, int flags);
 #ifdef CONFIG_KPROBE_EVENTS
 extern int  perf_kprobe_init(struct perf_event *event, bool is_retprobe);
 extern void perf_kprobe_destroy(struct perf_event *event);
+extern int bpf_get_kprobe_info(struct perf_event *event, u32 *attach_info,
+			       const char **symbol, u64 *probe_offset,
+			       u64 *probe_addr, bool perf_type_tracepoint);
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
 extern int  perf_uprobe_init(struct perf_event *event, bool is_retprobe);
 extern void perf_uprobe_destroy(struct perf_event *event);
+extern int bpf_get_uprobe_info(struct perf_event *event, u32 *attach_info,
+			       const char **filename, u64 *probe_offset,
+			       bool perf_type_tracepoint);
 #endif
 extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
 				     char *filter_str);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d94d333..6a22ad4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -97,6 +97,7 @@ enum bpf_cmd {
 	BPF_RAW_TRACEPOINT_OPEN,
 	BPF_BTF_LOAD,
 	BPF_BTF_GET_FD_BY_ID,
+	BPF_TASK_FD_QUERY,
 };
 
 enum bpf_map_type {
@@ -379,6 +380,22 @@ union bpf_attr {
 		__u32		btf_log_size;
 		__u32		btf_log_level;
 	};
+
+	struct {
+		int		pid;		/* input: pid */
+		int		fd;		/* input: fd */
+		__u32		flags;		/* input: flags */
+		__u32		buf_len;	/* input: buf len */
+		__aligned_u64	buf;		/* input/output:
+						 *   tp_name for tracepoint
+						 *   symbol for kprobe
+						 *   filename for uprobe
+						 */
+		__u32		prog_id;	/* output: prod_id */
+		__u32		attach_info;	/* output: BPF_ATTACH_* */
+		__u64		probe_offset;	/* output: probe_offset */
+		__u64		probe_addr;	/* output: probe_addr */
+	} task_fd_query;
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
@@ -2450,4 +2467,14 @@ struct bpf_fib_lookup {
 	__u8	dmac[6];     /* ETH_ALEN */
 };
 
+/* used by <task, fd> based query */
+enum {
+	BPF_ATTACH_RAW_TRACEPOINT,	/* tp name */
+	BPF_ATTACH_TRACEPOINT,		/* tp name */
+	BPF_ATTACH_KPROBE,		/* (symbol + offset) or addr */
+	BPF_ATTACH_KRETPROBE,		/* (symbol + offset) or addr */
+	BPF_ATTACH_UPROBE,		/* filename + offset */
+	BPF_ATTACH_URETPROBE,		/* filename + offset */
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bfcde94..2f2bb22 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -18,7 +18,9 @@
 #include <linux/vmalloc.h>
 #include <linux/mmzone.h>
 #include <linux/anon_inodes.h>
+#include <linux/fdtable.h>
 #include <linux/file.h>
+#include <linux/fs.h>
 #include <linux/license.h>
 #include <linux/filter.h>
 #include <linux/version.h>
@@ -2102,6 +2104,125 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
 	return btf_get_fd_by_id(attr->btf_id);
 }
 
+static int bpf_task_fd_query_copy(const union bpf_attr *attr,
+				    union bpf_attr __user *uattr,
+				    u32 prog_id, u32 attach_info,
+				    const char *buf, u64 probe_offset,
+				    u64 probe_addr)
+{
+	__u64 __user *ubuf;
+	int len;
+
+	ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
+	if (buf) {
+		len = strlen(buf);
+		if (attr->task_fd_query.buf_len < len + 1)
+			return -ENOSPC;
+		if (copy_to_user(ubuf, buf, len + 1))
+			return -EFAULT;
+	} else if (attr->task_fd_query.buf_len) {
+		/* copy '\0' to ubuf */
+		__u8 zero = 0;
+
+		if (copy_to_user(ubuf, &zero, 1))
+			return -EFAULT;
+	}
+
+	if (copy_to_user(&uattr->task_fd_query.prog_id, &prog_id,
+			 sizeof(prog_id)) ||
+	    copy_to_user(&uattr->task_fd_query.attach_info, &attach_info,
+			 sizeof(attach_info)) ||
+	    copy_to_user(&uattr->task_fd_query.probe_offset, &probe_offset,
+			 sizeof(probe_offset)) ||
+	    copy_to_user(&uattr->task_fd_query.probe_addr, &probe_addr,
+			 sizeof(probe_addr)))
+		return -EFAULT;
+
+	return 0;
+}
+
+#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
+
+static int bpf_task_fd_query(const union bpf_attr *attr,
+			     union bpf_attr __user *uattr)
+{
+	pid_t pid = attr->task_fd_query.pid;
+	int fd = attr->task_fd_query.fd;
+	struct files_struct *files;
+	struct task_struct *task;
+	struct perf_event *event;
+	struct file *file;
+	int err;
+
+	if (CHECK_ATTR(BPF_TASK_FD_QUERY))
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (attr->task_fd_query.flags != 0)
+		return -EINVAL;
+
+	task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
+	if (!task)
+		return -ENOENT;
+
+	files = get_files_struct(task);
+	put_task_struct(task);
+	if (!files)
+		return -ENOENT;
+
+	err = 0;
+	spin_lock(&files->file_lock);
+	file = fcheck_files(files, fd);
+	if (!file)
+		err = -EBADF;
+	else
+		get_file(file);
+	spin_unlock(&files->file_lock);
+	put_files_struct(files);
+
+	if (err)
+		goto out;
+
+	if (file->f_op == &bpf_raw_tp_fops) {
+		struct bpf_raw_tracepoint *raw_tp = file->private_data;
+		struct bpf_raw_event_map *btp = raw_tp->btp;
+
+		if (!raw_tp->prog)
+			err = -ENOENT;
+		else
+			err = bpf_task_fd_query_copy(attr, uattr,
+						     raw_tp->prog->aux->id,
+						     BPF_ATTACH_RAW_TRACEPOINT,
+						     btp->tp->name, 0, 0);
+		goto put_file;
+	}
+
+	event = perf_get_event(file);
+	if (!IS_ERR(event)) {
+		u64 probe_offset, probe_addr;
+		u32 prog_id, attach_info;
+		const char *buf;
+
+		err = bpf_get_perf_event_info(event, &prog_id, &attach_info,
+					      &buf, &probe_offset,
+					      &probe_addr);
+		if (!err)
+			err = bpf_task_fd_query_copy(attr, uattr, prog_id,
+						     attach_info, buf,
+						     probe_offset,
+						     probe_addr);
+		goto put_file;
+	}
+
+	err = -ENOTSUPP;
+put_file:
+	fput(file);
+out:
+	return err;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr = {};
@@ -2188,6 +2309,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_BTF_GET_FD_BY_ID:
 		err = bpf_btf_get_fd_by_id(&attr);
 		break;
+	case BPF_TASK_FD_QUERY:
+		err = bpf_task_fd_query(&attr, uattr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ce2cbbf..b55758a 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -14,6 +14,7 @@
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
 #include <linux/kprobes.h>
+#include <linux/syscalls.h>
 #include <linux/error-injection.h>
 
 #include "trace_probe.h"
@@ -1163,3 +1164,50 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
 	mutex_unlock(&bpf_event_mutex);
 	return err;
 }
+
+int bpf_get_perf_event_info(struct perf_event *event, u32 *prog_id,
+			    u32 *attach_info, const char **buf,
+			    u64 *probe_offset, u64 *probe_addr)
+{
+	bool is_tracepoint, is_syscall_tp;
+	struct bpf_prog *prog;
+	int flags, err = 0;
+
+	prog = event->prog;
+	if (!prog)
+		return -ENOENT;
+
+	/* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
+	if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
+		return -EOPNOTSUPP;
+
+	*prog_id = prog->aux->id;
+	flags = event->tp_event->flags;
+	is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
+	is_syscall_tp = is_syscall_trace_event(event->tp_event);
+
+	if (is_tracepoint || is_syscall_tp) {
+		*buf = is_tracepoint ? event->tp_event->tp->name
+				     : event->tp_event->name;
+		*attach_info = BPF_ATTACH_TRACEPOINT;
+		*probe_offset = 0x0;
+		*probe_addr = 0x0;
+	} else {
+		/* kprobe/uprobe */
+		err = -EOPNOTSUPP;
+#ifdef CONFIG_KPROBE_EVENTS
+		if (flags & TRACE_EVENT_FL_KPROBE)
+			err = bpf_get_kprobe_info(event, attach_info, buf,
+						  probe_offset, probe_addr,
+						  event->attr.type == PERF_TYPE_TRACEPOINT);
+#endif
+#ifdef CONFIG_UPROBE_EVENTS
+		if (flags & TRACE_EVENT_FL_UPROBE)
+			err = bpf_get_uprobe_info(event, attach_info, buf,
+						  probe_offset,
+						  event->attr.type == PERF_TYPE_TRACEPOINT);
+#endif
+	}
+
+	return err;
+}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 02aed76..e6bcaf6 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1287,6 +1287,35 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 			      head, NULL);
 }
 NOKPROBE_SYMBOL(kretprobe_perf_func);
+
+int bpf_get_kprobe_info(struct perf_event *event, u32 *attach_info,
+			const char **symbol, u64 *probe_offset,
+			u64 *probe_addr, bool perf_type_tracepoint)
+{
+	const char *pevent = trace_event_name(event->tp_event);
+	const char *group = event->tp_event->class->system;
+	struct trace_kprobe *tk;
+
+	if (perf_type_tracepoint)
+		tk = find_trace_kprobe(pevent, group);
+	else
+		tk = event->tp_event->data;
+	if (!tk)
+		return -EINVAL;
+
+	*attach_info = trace_kprobe_is_return(tk) ? BPF_ATTACH_KRETPROBE
+						  : BPF_ATTACH_KPROBE;
+	if (tk->symbol) {
+		*symbol = tk->symbol;
+		*probe_offset = tk->rp.kp.offset;
+		*probe_addr = 0;
+	} else {
+		*symbol = NULL;
+		*probe_offset = 0;
+		*probe_addr = (u64)tk->rp.kp.addr;
+	}
+	return 0;
+}
 #endif	/* CONFIG_PERF_EVENTS */
 
 /*
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index ac89287..534cf1a 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1161,6 +1161,28 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
 {
 	__uprobe_perf_func(tu, func, regs, ucb, dsize);
 }
+
+int bpf_get_uprobe_info(struct perf_event *event, u32 *attach_info,
+			const char **filename, u64 *probe_offset,
+			bool perf_type_tracepoint)
+{
+	const char *pevent = trace_event_name(event->tp_event);
+	const char *group = event->tp_event->class->system;
+	struct trace_uprobe *tu;
+
+	if (perf_type_tracepoint)
+		tu = find_probe_event(pevent, group);
+	else
+		tu = event->tp_event->data;
+	if (!tu)
+		return -EINVAL;
+
+	*attach_info = is_ret_probe(tu) ? BPF_ATTACH_URETPROBE
+					: BPF_ATTACH_UPROBE;
+	*filename = tu->filename;
+	*probe_offset = tu->offset;
+	return 0;
+}
 #endif	/* CONFIG_PERF_EVENTS */
 
 static int
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next v2 6/7] tools/bpf: add two BPF_TASK_FD_QUERY tests in test_progs
From: Yonghong Song @ 2018-05-18  5:03 UTC (permalink / raw)
  To: peterz, ast, daniel, netdev; +Cc: kernel-team
In-Reply-To: <20180518050310.2814608-1-yhs@fb.com>

The new tests are added to query perf_event information
for raw_tracepoint and tracepoint attachment. For tracepoint,
both syscalls and non-syscalls tracepoints are queries as
they are treated slightly differently inside the kernel.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/testing/selftests/bpf/test_progs.c | 133 +++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 3ecf733..f7ede03 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1542,6 +1542,137 @@ static void test_get_stack_raw_tp(void)
 	bpf_object__close(obj);
 }
 
+static void test_task_fd_query_rawtp(void)
+{
+	const char *file = "./test_get_stack_rawtp.o";
+	struct perf_event_attr attr = {};
+	__u64 probe_offset, probe_addr;
+	int efd, err, prog_fd, pmu_fd;
+	__u32 prog_id, attach_info;
+	struct bpf_object *obj;
+	__u32 duration = 0;
+	char buf[256];
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+		return;
+
+	efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
+	if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+
+	attr.sample_type = PERF_SAMPLE_RAW;
+	attr.type = PERF_TYPE_SOFTWARE;
+	attr.config = PERF_COUNT_SW_BPF_OUTPUT;
+	pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+		  errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n",
+		  err, errno))
+		goto close_prog;
+
+	/* query (getpid(), efd */
+	err = bpf_task_fd_query(getpid(), efd, 0, buf, 256, &prog_id,
+				&attach_info, &probe_offset, &probe_addr);
+	if (CHECK(err < 0, "bpf_trace_event_query", "err %d errno %d\n", err,
+		  errno))
+		goto close_prog;
+
+	err = (attach_info == BPF_ATTACH_RAW_TRACEPOINT) &&
+	      (strcmp(buf, "sys_enter") == 0);
+	if (CHECK(!err, "check_results", "attach_info %d tp_name %s\n",
+		  attach_info, buf))
+		goto close_prog;
+
+	goto close_prog_noerr;
+close_prog:
+	error_cnt++;
+close_prog_noerr:
+	bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp_core(const char *probe_name,
+				       const char *tp_name)
+{
+	const char *file = "./test_tracepoint.o";
+	int err, bytes, efd, prog_fd, pmu_fd;
+	struct perf_event_attr attr = {};
+	__u64 probe_offset, probe_addr;
+	__u32 prog_id, attach_info;
+	struct bpf_object *obj;
+	__u32 duration = 0;
+	char buf[256];
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+		  "bytes %d errno %d\n", bytes, errno))
+		goto close_prog;
+
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (CHECK(err, "perf_event_open", "err %d errno %d\n", err, errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	/* query (getpid(), pmu_fd */
+	err = bpf_task_fd_query(getpid(), pmu_fd, 0, buf, 256, &prog_id,
+				&attach_info, &probe_offset, &probe_addr);
+	if (CHECK(err < 0, "bpf_trace_event_query", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	err = (attach_info == BPF_ATTACH_TRACEPOINT) && !strcmp(buf, tp_name);
+	if (CHECK(!err, "check_results", "attach_info %d tp_name %s\n",
+		  attach_info, buf))
+		goto close_pmu;
+
+	close(pmu_fd);
+	goto close_prog_noerr;
+
+close_pmu:
+	close(pmu_fd);
+close_prog:
+	error_cnt++;
+close_prog_noerr:
+	bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp(void)
+{
+	test_task_fd_query_tp_core("sched/sched_switch",
+				   "sched_switch");
+	test_task_fd_query_tp_core("syscalls/sys_enter_read",
+				   "sys_enter_read");
+}
+
 int main(void)
 {
 	jit_enabled = is_jit_enabled();
@@ -1561,6 +1692,8 @@ int main(void)
 	test_stacktrace_build_id_nmi();
 	test_stacktrace_map_raw_tp();
 	test_get_stack_raw_tp();
+	test_task_fd_query_rawtp();
+	test_task_fd_query_tp();
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next v2 5/7] samples/bpf: add a samples/bpf test for BPF_TASK_FD_QUERY
From: Yonghong Song @ 2018-05-18  5:03 UTC (permalink / raw)
  To: peterz, ast, daniel, netdev; +Cc: kernel-team
In-Reply-To: <20180518050310.2814608-1-yhs@fb.com>

This is mostly to test kprobe/uprobe which needs kernel headers.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 samples/bpf/Makefile             |   4 +
 samples/bpf/task_fd_query_kern.c |  19 ++
 samples/bpf/task_fd_query_user.c | 379 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 402 insertions(+)
 create mode 100644 samples/bpf/task_fd_query_kern.c
 create mode 100644 samples/bpf/task_fd_query_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 62d1aa1..7dc85ed 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -51,6 +51,7 @@ hostprogs-y += cpustat
 hostprogs-y += xdp_adjust_tail
 hostprogs-y += xdpsock
 hostprogs-y += xdp_fwd
+hostprogs-y += task_fd_query
 
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o
 xdp_adjust_tail-objs := xdp_adjust_tail_user.o
 xdpsock-objs := bpf_load.o xdpsock_user.o
 xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
+task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -160,6 +162,7 @@ always += cpustat_kern.o
 always += xdp_adjust_tail_kern.o
 always += xdpsock_kern.o
 always += xdp_fwd_kern.o
+always += task_fd_query_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -175,6 +178,7 @@ HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
 
 HOST_LOADLIBES		+= $(LIBBPF) -lelf
 HOSTLOADLIBES_tracex4		+= -lrt
diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c
new file mode 100644
index 0000000..f4b0a9e
--- /dev/null
+++ b/samples/bpf/task_fd_query_kern.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+SEC("kprobe/blk_start_request")
+int bpf_prog1(struct pt_regs *ctx)
+{
+	return 0;
+}
+
+SEC("kretprobe/blk_account_io_completion")
+int bpf_prog2(struct pt_regs *ctx)
+{
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
new file mode 100644
index 0000000..792ef24
--- /dev/null
+++ b/samples/bpf/task_fd_query_user.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "perf-sys.h"
+#include "trace_helpers.h"
+
+#define CHECK_PERROR_RET(condition) ({			\
+	int __ret = !!(condition);			\
+	if (__ret) {					\
+		printf("FAIL: %s:\n", __func__);	\
+		perror("    ");			\
+		return -1;				\
+	}						\
+})
+
+#define CHECK_AND_RET(condition) ({			\
+	int __ret = !!(condition);			\
+	if (__ret)					\
+		return -1;				\
+})
+
+static __u64 ptr_to_u64(void *ptr)
+{
+	return (__u64) (unsigned long) ptr;
+}
+
+#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
+static int bpf_find_probe_type(const char *event_type)
+{
+	char buf[256];
+	int fd, ret;
+
+	ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
+	CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+	fd = open(buf, O_RDONLY);
+	CHECK_PERROR_RET(fd < 0);
+
+	ret = read(fd, buf, sizeof(buf));
+	close(fd);
+	CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+	errno = 0;
+	ret = (int)strtol(buf, NULL, 10);
+	CHECK_PERROR_RET(errno);
+	return ret;
+}
+
+#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
+static int bpf_get_retprobe_bit(const char *event_type)
+{
+	char buf[256];
+	int fd, ret;
+
+	ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
+	CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+	fd = open(buf, O_RDONLY);
+	CHECK_PERROR_RET(fd < 0);
+
+	ret = read(fd, buf, sizeof(buf));
+	close(fd);
+	CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+	CHECK_PERROR_RET(strlen(buf) < strlen("config:"));
+
+	errno = 0;
+	ret = (int)strtol(buf + strlen("config:"), NULL, 10);
+	CHECK_PERROR_RET(errno);
+	return ret;
+}
+
+static int test_debug_fs_kprobe(int fd_idx, const char *fn_name,
+				__u32 expected_prog_info)
+{
+	__u64 probe_offset, probe_addr;
+	__u32 prog_id, prog_info;
+	char buf[256];
+	int err;
+
+	err = bpf_task_fd_query(getpid(), event_fd[fd_idx], 0, buf, 256,
+				&prog_id, &prog_info, &probe_offset,
+				&probe_addr);
+	if (err < 0) {
+		printf("FAIL: %s, for event_fd idx %d, fn_name %s\n",
+		       __func__, fd_idx, fn_name);
+		perror("    :");
+		return -1;
+	}
+	if (strcmp(buf, fn_name) != 0 ||
+	    prog_info != expected_prog_info ||
+	    probe_offset != 0x0 || probe_addr != 0x0) {
+		printf("FAIL: bpf_trace_event_query(event_fd[1]):\n");
+		printf("buf: %s, prog_info: %u, probe_offset: 0x%llx,"
+		       " probe_addr: 0x%llx\n",
+		       buf, prog_info, probe_offset, probe_addr);
+		return -1;
+	}
+	return 0;
+}
+
+static int test_nondebug_fs_kuprobe_common(const char *event_type,
+	const char *name, __u64 offset, __u64 addr, bool is_return,
+	char *buf, int buf_len, __u32 *prog_id, __u32 *prog_info,
+	__u64 *probe_offset, __u64 *probe_addr)
+{
+	int is_return_bit = bpf_get_retprobe_bit(event_type);
+	int type = bpf_find_probe_type(event_type);
+	struct perf_event_attr attr = {};
+	int fd;
+
+	if (type < 0 || is_return_bit < 0) {
+		printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n",
+			__func__, type, is_return_bit);
+		return -1;
+	}
+
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+	if (is_return)
+		attr.config |= 1 << is_return_bit;
+
+	if (name) {
+		attr.config1 = ptr_to_u64((void *)name);
+		attr.config2 = offset;
+	} else {
+		attr.config1 = 0;
+		attr.config2 = addr;
+	}
+	attr.size = sizeof(attr);
+	attr.type = type;
+
+	fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
+	CHECK_PERROR_RET(fd < 0);
+
+	CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0);
+	CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
+	CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len,
+			 prog_id, prog_info, probe_offset, probe_addr) < 0);
+
+	return 0;
+}
+
+static int test_nondebug_fs_probe(const char *event_type, const char *name,
+				  __u64 offset, __u64 addr, bool is_return,
+				  __u32 expected_prog_info,
+				  __u32 expected_ret_prog_info,
+				  char *buf, int buf_len)
+{
+	__u64 probe_offset, probe_addr;
+	__u32 prog_id, prog_info;
+	int err;
+
+	err = test_nondebug_fs_kuprobe_common(event_type, name,
+					      offset, addr, is_return,
+					      buf, buf_len, &prog_id,
+					      &prog_info, &probe_offset,
+					      &probe_addr);
+	if (err < 0) {
+		printf("FAIL: %s, "
+		       "for name %s, offset 0x%llx, addr 0x%llx, is_return %d\n",
+		       __func__, name ? name : "", offset, addr, is_return);
+		perror("    :");
+		return -1;
+	}
+	if ((is_return && prog_info != expected_ret_prog_info) ||
+	    (!is_return && prog_info != expected_prog_info)) {
+		printf("FAIL: %s, incorrect prog_info %u\n",
+		       __func__, prog_info);
+		return -1;
+	}
+	if (name) {
+		if (strcmp(name, buf) != 0) {
+			printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
+			return -1;
+		}
+		if (probe_offset != offset) {
+			printf("FAIL: %s, incorrect probe_offset 0x%llx\n",
+			       __func__, probe_offset);
+			return -1;
+		}
+	} else {
+		if (buf && buf[0] != '\0') {
+			printf("FAIL: %s, incorrect buf %p\n",
+			       __func__, buf);
+			return -1;
+		}
+
+		if (probe_addr != addr) {
+			printf("FAIL: %s, incorrect probe_addr 0x%llx\n",
+			       __func__, probe_addr);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
+{
+	const char *event_type = "uprobe";
+	struct perf_event_attr attr = {};
+	char buf[256], event_alias[256];
+	__u64 probe_offset, probe_addr;
+	__u32 prog_id, prog_info;
+	int err, res, kfd, efd;
+	ssize_t bytes;
+
+	snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events",
+		 event_type);
+	kfd = open(buf, O_WRONLY | O_APPEND, 0);
+	CHECK_PERROR_RET(kfd < 0);
+
+	res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid());
+	CHECK_PERROR_RET(res < 0 || res >= sizeof(event_alias));
+
+	res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx",
+		       is_return ? 'r' : 'p', event_type, event_alias,
+		       binary_path, offset);
+	CHECK_PERROR_RET(res < 0 || res >= sizeof(buf));
+	CHECK_PERROR_RET(write(kfd, buf, strlen(buf)) < 0);
+
+	close(kfd);
+	kfd = -1;
+
+	snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id",
+		 event_type, event_alias);
+	efd = open(buf, O_RDONLY, 0);
+	CHECK_PERROR_RET(efd < 0);
+
+	bytes = read(efd, buf, sizeof(buf));
+	CHECK_PERROR_RET(bytes <= 0 || bytes >= sizeof(buf));
+	close(efd);
+	buf[bytes] = '\0';
+
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+	kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+	CHECK_PERROR_RET(kfd < 0);
+	CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
+	CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0);
+
+	err = bpf_task_fd_query(getpid(), kfd, 0, buf, 256,
+				&prog_id, &prog_info, &probe_offset,
+				&probe_addr);
+	if (err < 0) {
+		printf("FAIL: %s, binary_path %s\n", __func__, binary_path);
+		perror("    :");
+		return -1;
+	}
+	if ((is_return && prog_info != BPF_ATTACH_URETPROBE) ||
+	    (!is_return && prog_info != BPF_ATTACH_UPROBE)) {
+		printf("FAIL: %s, incorrect prog_info %u\n", __func__,
+		       prog_info);
+		return -1;
+	}
+	if (strcmp(binary_path, buf) != 0) {
+		printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
+		return -1;
+	}
+	if (probe_offset != offset) {
+		printf("FAIL: %s, incorrect probe_offset 0x%llx\n", __func__,
+		       probe_offset);
+		return -1;
+	}
+
+	close(kfd);
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	struct rlimit r = {1024*1024, RLIM_INFINITY};
+	extern char __executable_start;
+	char filename[256], buf[256];
+	__u64 uprobe_file_offset;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return 1;
+	}
+
+	if (load_kallsyms()) {
+		printf("failed to process /proc/kallsyms\n");
+		return 1;
+	}
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	/* test two functions in the corresponding *_kern.c file */
+	CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_start_request",
+					   BPF_ATTACH_KPROBE));
+	CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
+					   BPF_ATTACH_KRETPROBE));
+
+	/* test nondebug fs kprobe */
+	CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
+					     false, BPF_ATTACH_KPROBE,
+					     BPF_ATTACH_KRETPROBE,
+					     buf, sizeof(buf)));
+#ifdef __x86_64__
+	/* set a kprobe on "bpf_check + 0x5", which is x64 specific */
+	CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x5, 0x0,
+					     false, BPF_ATTACH_KPROBE,
+					     BPF_ATTACH_KRETPROBE,
+					     buf, sizeof(buf)));
+#endif
+	CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
+					     true, BPF_ATTACH_KPROBE,
+					     BPF_ATTACH_KRETPROBE,
+					     buf, sizeof(buf)));
+	CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+					     ksym_get_addr("bpf_check"), false,
+					     BPF_ATTACH_KPROBE,
+					     BPF_ATTACH_KRETPROBE,
+					     buf, sizeof(buf)));
+	CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+					     ksym_get_addr("bpf_check"), false,
+					     BPF_ATTACH_KPROBE,
+					     BPF_ATTACH_KRETPROBE,
+					     NULL, 0));
+	CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+					     ksym_get_addr("bpf_check"), true,
+					     BPF_ATTACH_KPROBE,
+					     BPF_ATTACH_KRETPROBE,
+					     buf, sizeof(buf)));
+	CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+					     ksym_get_addr("bpf_check"), true,
+					     BPF_ATTACH_KPROBE,
+					     BPF_ATTACH_KRETPROBE,
+					     0, 0));
+
+	/* test nondebug fs uprobe */
+	/* the calculation of uprobe file offset is based on gcc 7.3.1 on x64
+	 * and the default linker script, which defines __executable_start as
+	 * the start of the .text section. The calculation could be different
+	 * on different systems with different compilers. The right way is
+	 * to parse the ELF file. We took a shortcut here.
+	 */
+	uprobe_file_offset = (__u64)main - (__u64)&__executable_start;
+	CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
+					     uprobe_file_offset, 0x0, false,
+					     BPF_ATTACH_UPROBE,
+					     BPF_ATTACH_URETPROBE,
+					     buf, sizeof(buf)));
+	CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
+					     uprobe_file_offset, 0x0, true,
+					     BPF_ATTACH_UPROBE,
+					     BPF_ATTACH_URETPROBE,
+					     buf, sizeof(buf)));
+
+	/* test debug fs uprobe */
+	CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
+					   false));
+	CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
+					   true));
+
+	return 0;
+}
-- 
2.9.5

^ permalink raw reply related

* Re: [PATCH bpf 5/6] tools: bpftool: resolve calls without using imm field
From: Sandipan Das @ 2018-05-18  4:28 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: netdev, naveen.n.rao, linuxppc-dev, ast, daniel
In-Reply-To: <20180517115104.7666ff60@cakuba>

Hi Jakub,

On 05/18/2018 12:21 AM, Jakub Kicinski wrote:
> On Thu, 17 May 2018 12:05:47 +0530, Sandipan Das wrote:
>> Currently, we resolve the callee's address for a JITed function
>> call by using the imm field of the call instruction as an offset
>> from __bpf_call_base. If bpf_jit_kallsyms is enabled, we further
>> use this address to get the callee's kernel symbol's name.
>>
>> For some architectures, such as powerpc64, the imm field is not
>> large enough to hold this offset. So, instead of assigning this
>> offset to the imm field, the verifier now assigns the subprog
>> id. Also, a list of kernel symbol addresses for all the JITed
>> functions is provided in the program info. We now use the imm
>> field as an index for this list to lookup a callee's symbol's
>> address and resolve its name.
>>
>> Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
>> Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
> 
> A few nit-picks below, thank you for the patch!
> 
>>  tools/bpf/bpftool/prog.c          | 31 +++++++++++++++++++++++++++++++
>>  tools/bpf/bpftool/xlated_dumper.c | 24 +++++++++++++++++-------
>>  tools/bpf/bpftool/xlated_dumper.h |  2 ++
>>  3 files changed, 50 insertions(+), 7 deletions(-)
>>
>> diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
>> index 9bdfdf2d3fbe..ac2f62a97e84 100644
>> --- a/tools/bpf/bpftool/prog.c
>> +++ b/tools/bpf/bpftool/prog.c
>> @@ -430,6 +430,10 @@ static int do_dump(int argc, char **argv)
>>  	unsigned char *buf;
>>  	__u32 *member_len;
>>  	__u64 *member_ptr;
>> +	unsigned int nr_addrs;
>> +	unsigned long *addrs = NULL;
>> +	__u32 *ksyms_len;
>> +	__u64 *ksyms_ptr;
> 
> nit: please try to keep the variables ordered longest to shortest like
> we do in networking code (please do it in all functions).
> 
>>  	ssize_t n;
>>  	int err;
>>  	int fd;
>> @@ -437,6 +441,8 @@ static int do_dump(int argc, char **argv)
>>  	if (is_prefix(*argv, "jited")) {
>>  		member_len = &info.jited_prog_len;
>>  		member_ptr = &info.jited_prog_insns;
>> +		ksyms_len = &info.nr_jited_ksyms;
>> +		ksyms_ptr = &info.jited_ksyms;
>>  	} else if (is_prefix(*argv, "xlated")) {
>>  		member_len = &info.xlated_prog_len;
>>  		member_ptr = &info.xlated_prog_insns;
>> @@ -496,10 +502,23 @@ static int do_dump(int argc, char **argv)
>>  		return -1;
>>  	}
>>  
>> +	nr_addrs = *ksyms_len;
> 
> Here and ...
> 
>> +	if (nr_addrs) {
>> +		addrs = malloc(nr_addrs * sizeof(__u64));
>> +		if (!addrs) {
>> +			p_err("mem alloc failed");
>> +			free(buf);
>> +			close(fd);
>> +			return -1;
> 
> You can just jump to err_free here.
> 
>> +		}
>> +	}
>> +
>>  	memset(&info, 0, sizeof(info));
>>  
>>  	*member_ptr = ptr_to_u64(buf);
>>  	*member_len = buf_size;
>> +	*ksyms_ptr = ptr_to_u64(addrs);
>> +	*ksyms_len = nr_addrs;
> 
> ... here - this function is getting long, so maybe I'm not seeing
> something, but are ksyms_ptr and ksyms_len guaranteed to be initialized?
> 
>>  	err = bpf_obj_get_info_by_fd(fd, &info, &len);
>>  	close(fd);
>> @@ -513,6 +532,11 @@ static int do_dump(int argc, char **argv)
>>  		goto err_free;
>>  	}
>>  
>> +	if (*ksyms_len > nr_addrs) {
>> +		p_err("too many addresses returned");
>> +		goto err_free;
>> +	}
>> +
>>  	if ((member_len == &info.jited_prog_len &&
>>  	     info.jited_prog_insns == 0) ||
>>  	    (member_len == &info.xlated_prog_len &&
>> @@ -558,6 +582,9 @@ static int do_dump(int argc, char **argv)
>>  			dump_xlated_cfg(buf, *member_len);
>>  	} else {
>>  		kernel_syms_load(&dd);
>> +		dd.jited_ksyms = ksyms_ptr;
>> +		dd.nr_jited_ksyms = *ksyms_len;
>> +
>>  		if (json_output)
>>  			dump_xlated_json(&dd, buf, *member_len, opcodes);
>>  		else
>> @@ -566,10 +593,14 @@ static int do_dump(int argc, char **argv)
>>  	}
>>  
>>  	free(buf);
>> +	if (addrs)
>> +		free(addrs);
> 
> Free can deal with NULL pointers, no need for an if.
> 
>>  	return 0;
>>  
>>  err_free:
>>  	free(buf);
>> +	if (addrs)
>> +		free(addrs);
>>  	return -1;
>>  }
>>  
>> diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
>> index 7a3173b76c16..dc8e4eca0387 100644
>> --- a/tools/bpf/bpftool/xlated_dumper.c
>> +++ b/tools/bpf/bpftool/xlated_dumper.c
>> @@ -178,8 +178,12 @@ static const char *print_call_pcrel(struct dump_data *dd,
>>  		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
>>  			 "%+d#%s", insn->off, sym->name);
>>  	else
> 
> else if (address)
> 
> saves us the indentation.
> 
>> -		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
>> -			 "%+d#0x%lx", insn->off, address);
>> +		if (address)
>> +			snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
>> +				 "%+d#0x%lx", insn->off, address);
>> +		else
>> +			snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
>> +				 "%+d", insn->off);
>>  	return dd->scratch_buff;
>>  }
>>  
>> @@ -200,14 +204,20 @@ static const char *print_call(void *private_data,
>>  			      const struct bpf_insn *insn)
>>  {
>>  	struct dump_data *dd = private_data;
>> -	unsigned long address = dd->address_call_base + insn->imm;
>> -	struct kernel_sym *sym;
>> +	unsigned long address = 0;
>> +	struct kernel_sym *sym = NULL;
>>  
> 
> Hm.  Quite a bit of churn.  Why not just add these three lines here:
> 
> if (insn->src_reg == BPF_PSEUDO_CALL && 
>     insn->imm < dd->nr_jited_ksyms)
> 	address = dd->jited_ksyms[insn->imm];
> 
>> -	sym = kernel_syms_search(dd, address);
>> -	if (insn->src_reg == BPF_PSEUDO_CALL)
>> +	if (insn->src_reg == BPF_PSEUDO_CALL) {
>> +		if (dd->nr_jited_ksyms) {
>> +			address = dd->jited_ksyms[insn->imm];
> 
> Perhaps it's paranoid, but it'd please do to bound check insn->imm
> against dd->nr_jited_ksyms.
> 
>> +			sym = kernel_syms_search(dd, address);
>> +		}
>>  		return print_call_pcrel(dd, sym, address, insn);
>> -	else
>> +	} else {
>> +		address = dd->address_call_base + insn->imm;
>> +		sym = kernel_syms_search(dd, address);
>>  		return print_call_helper(dd, sym, address);
>> +	}
>>  }
>>  
>>  static const char *print_imm(void *private_data,
> 
> 

Thank you for the suggestions. Will post out v2 with these changes.

- Sandipan

^ permalink raw reply

* Re: [PATCH net-next v12 3/7] sch_cake: Add optional ACK filter
From: Cong Wang @ 2018-05-18  4:27 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Eric Dumazet, Linux Kernel Network Developers, Cake List
In-Reply-To: <87in7my196.fsf@toke.dk>

On Thu, May 17, 2018 at 4:23 AM, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
> Eric Dumazet <eric.dumazet@gmail.com> writes:
>
>> On 05/16/2018 01:29 PM, Toke Høiland-Jørgensen wrote:
>>> The ACK filter is an optional feature of CAKE which is designed to improve
>>> performance on links with very asymmetrical rate limits. On such links
>>> (which are unfortunately quite prevalent, especially for DSL and cable
>>> subscribers), the downstream throughput can be limited by the number of
>>> ACKs capable of being transmitted in the *upstream* direction.
>>>
>>
>> ...
>>
>>>
>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>> ---
>>>  net/sched/sch_cake.c |  260 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 258 insertions(+), 2 deletions(-)
>>>
>>>
>>
>> I have decided to implement ACK compression in TCP stack itself.
>
> Awesome! Will look forward to seeing that!

+1

It is really odd to put into a TC qdisc, TCP stack is a much better
place.

^ permalink raw reply

* Re: [RFC PATCH bpf-next 12/12] i40e: implement Tx zero-copy
From: Björn Töpel @ 2018-05-18  4:23 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Magnus Karlsson, Karlsson, Magnus, Duyck, Alexander H,
	Alexander Duyck, John Fastabend, Alexei Starovoitov,
	Willem de Bruijn, Daniel Borkmann, Michael S. Tsirkin, Netdev,
	michael.lundkvist, Brandeburg, Jesse, Singhai, Anjali,
	Zhang, Qi Z, intel-wired-lan
In-Reply-To: <20180517233115.44c00dee@redhat.com>

2018-05-17 23:31 GMT+02:00 Jesper Dangaard Brouer <brouer@redhat.com>:
>
> On Tue, 15 May 2018 21:06:15 +0200 Björn Töpel <bjorn.topel@gmail.com> wrote:
>
>> From: Magnus Karlsson <magnus.karlsson@intel.com>
>>
>> Here, the zero-copy ndo is implemented. As a shortcut, the existing
>> XDP Tx rings are used for zero-copy. This means that and XDP program
>> cannot redirect to an AF_XDP enabled XDP Tx ring.
>
> This "shortcut" is not acceptable, and completely broken.  The
> XDP_REDIRECT queue_index is based on smp_processor_id(), and can easily
> clash with the configured XSK queue_index.  Provided a bit more code
> context below...
>

Yes, and this is the reason we need to go for a solution with
dedicated Tx rings. Again, we chose not to, and simply drops
XDP_REDIRECT where the AF_XDP queue id clashes with the processor id.
The queue id hijacked by AF_XDP's egress side.

> On Tue, 15 May 2018 21:06:15 +0200
> Björn Töpel <bjorn.topel@gmail.com> wrote:
>
> int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
> {
>         struct i40e_netdev_priv *np = netdev_priv(dev);
>         unsigned int queue_index = smp_processor_id();
>         struct i40e_vsi *vsi = np->vsi;
>         int err;
>
>         if (test_bit(__I40E_VSI_DOWN, vsi->state))
>                 return -ENETDOWN;
>
>> @@ -4025,6 +4158,9 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
>>       if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
>>               return -ENXIO;
>>
>> +     if (vsi->xdp_rings[queue_index]->xsk_umem)
>> +             return -ENXIO;
>> +
>
> Using the sane errno makes this impossible to debug (via the tracepoints).
>

The rationale was that the situation was similar to an incorrectly
configured receiving (from an XDP_REDIRECT perspective) interface.

We'll rework this! Thanks for looking into this, Jesper!


Björn

>>       err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
>>       if (err != I40E_XDP_TX)
>>               return -ENOSPC;
>> @@ -4048,5 +4184,34 @@ void i40e_xdp_flush(struct net_device *dev)
>>       if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
>>               return;
>>
>> +     if (vsi->xdp_rings[queue_index]->xsk_umem)
>> +             return;
>> +
>>       i40e_xdp_ring_update_tail(vsi->xdp_rings[queue_index]);
>>  }
>
>
>
> --
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [net-next PATCH v2 0/4] Symmetric queue selection using XPS for Rx queues
From: Tom Herbert @ 2018-05-18  4:11 UTC (permalink / raw)
  To: Amritha Nambiar
  Cc: Linux Kernel Network Developers, David S. Miller, Alexander Duyck,
	Sridhar Samudrala, Eric Dumazet, Hannes Frederic Sowa
In-Reply-To: <152643356116.4991.7215767041139726872.stgit@anamdev.jf.intel.com>

On Tue, May 15, 2018 at 6:26 PM, Amritha Nambiar
<amritha.nambiar@intel.com> wrote:
> This patch series implements support for Tx queue selection based on
> Rx queue(s) map. This is done by configuring Rx queue(s) map per Tx-queue
> using sysfs attribute. If the user configuration for Rx queues does
> not apply, then the Tx queue selection falls back to XPS using CPUs and
> finally to hashing.
>
> XPS is refactored to support Tx queue selection based on either the
> CPUs map or the Rx-queues map. The config option CONFIG_XPS needs to be
> enabled. By default no receive queues are configured for the Tx queue.
>
> - /sys/class/net/<dev>/queues/tx-*/xps_rxqs
>
> This is to enable sending packets on the same Tx-Rx queue pair as this

If I'm reading the patch correctly, isn't this mapping rxq to a set of
txqs (in other words not strictly queue pair which has other
connotations in NIC HW). It is important to make it clear that this
feature is no HW dependent.

> is useful for busy polling multi-threaded workloads where it is not
> possible to pin the threads to a CPU. This is a rework of Sridhar's
> patch for symmetric queueing via socket option:
> https://www.spinics.net/lists/netdev/msg453106.html
>
Please add something about how this was tested and what the
performance gain is to justify the feature.

> v2:
> - Added documentation in networking/scaling.txt
> - Added a simple routine to replace multiple ifdef blocks.
>
> ---
>
> Amritha Nambiar (4):
>       net: Refactor XPS for CPUs and Rx queues
>       net: Enable Tx queue selection based on Rx queues
>       net-sysfs: Add interface for Rx queue map per Tx queue
>       Documentation: Add explanation for XPS using Rx-queue map
>
>
>  Documentation/networking/scaling.txt |   58 +++++++-
>  include/linux/cpumask.h              |   11 +-
>  include/linux/netdevice.h            |   72 ++++++++++
>  include/net/sock.h                   |   18 +++
>  net/core/dev.c                       |  242 +++++++++++++++++++++++-----------
>  net/core/net-sysfs.c                 |   85 ++++++++++++
>  net/core/sock.c                      |    5 +
>  net/ipv4/tcp_input.c                 |    7 +
>  net/ipv4/tcp_ipv4.c                  |    1
>  net/ipv4/tcp_minisocks.c             |    1
>  10 files changed, 404 insertions(+), 96 deletions(-)
>
> --

^ permalink raw reply

* Re: [Cake] [PATCH net-next v12 3/7] sch_cake: Add optional ACK filter
From: Eric Dumazet @ 2018-05-18  4:08 UTC (permalink / raw)
  To: Ryan Mounce, Toke Høiland-Jørgensen
  Cc: Eric Dumazet, netdev, Cake List
In-Reply-To: <CAN+fvRYP4oHQDyz=NGPnfR8+PZ04ammP-sgeB_9Gryu4xxyf0w@mail.gmail.com>



On 05/17/2018 07:36 PM, Ryan Mounce wrote:
> On 17 May 2018 at 22:41, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>> Eric Dumazet <eric.dumazet@gmail.com> writes:
>>
>>> On 05/17/2018 04:23 AM, Toke Høiland-Jørgensen wrote:
>>>
>>>>
>>>> We don't do full parsing of SACKs, no; we were trying to keep things
>>>> simple... We do detect the presence of SACK options, though, and the
>>>> presence of SACK options on an ACK will make previous ACKs be considered
>>>> redundant.
>>>>
>>>
>>> But they are not redundant in some cases, particularly when reorders
>>> happen in the network.
>>
>> Huh. I was under the impression that SACKs were basically cumulative
>> until cleared.
>>
>> I.e., in packet sequence ABCDE where B and D are lost, C would have
>> SACK(B) and E would have SACK(B,D). Are you saying that E would only
>> have SACK(D)?
> 
> SACK works by acknowledging additional ranges above those that have
> been ACKed, rather than ACKing up to the largest seen sequence number
> and reporting missing ranges before that.
> 
> A - ACK(A)
> B - lost
> C - ACK(A) + SACK(C)
> D - lost
> E - ACK(A) + SACK(C, E)
> 
> Cake does check that the ACK sequence number is greater, or if it is
> equal and the 'newer' ACK has the SACK option present. It doesn't
> compare the sequence numbers inside two SACKs. If the two SACKs in the
> above example had been reordered before reaching cake's ACK filter in
> aggressive mode, the wrong one will be filtered.
> 
> This is a limitation of my naive SACK handling in cake. The default
> 'conservative' mode happens to mitigate the problem in the above
> scenario, but the issue could still present itself in more
> pathological cases. It's fixable, however I'm not sure this corner
> case is sufficiently common or severe to warrant the extra complexity.

The extra complexity is absolutely requested for inclusion in upstream linux.

I recommend reading rfc 2018, whole section 4 (Generating Sack Options: Data Receiver Behavior
)

Proposed ACK filter in Cake is messing the protocol, since the first rule is not respected 

* The first SACK block (i.e., the one immediately following the
      kind and length fields in the option) MUST specify the contiguous
      block of data containing the segment which triggered this ACK,
      unless that segment advanced the Acknowledgment Number field in
      the header.  This assures that the ACK with the SACK option
      reflects the most recent change in the data receiver's buffer
      queue.


An ACK filter must either :

Not merge ACK if they contain different SACK blocks.

Or make a precise analysis of the SACK blocks to determine if the merge is allowed,
ie no useful information is lost.

The sender should get all the information as which segments were received correctly,
assuming no ACK are dropped because of congestion on return path.

^ permalink raw reply

* Re: [net-next PATCH v2 1/4] net: Refactor XPS for CPUs and Rx queues
From: Tom Herbert @ 2018-05-18  4:08 UTC (permalink / raw)
  To: Amritha Nambiar
  Cc: Linux Kernel Network Developers, David S. Miller, Alexander Duyck,
	Sridhar Samudrala, Eric Dumazet, Hannes Frederic Sowa
In-Reply-To: <152643400370.4991.2044471541271189575.stgit@anamdev.jf.intel.com>

On Tue, May 15, 2018 at 6:26 PM, Amritha Nambiar
<amritha.nambiar@intel.com> wrote:
> Refactor XPS code to support Tx queue selection based on
> CPU map or Rx queue map.
>
> Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
> ---
>  include/linux/cpumask.h   |   11 ++
>  include/linux/netdevice.h |   72 +++++++++++++++-
>  net/core/dev.c            |  208 +++++++++++++++++++++++++++++----------------
>  net/core/net-sysfs.c      |    4 -
>  4 files changed, 215 insertions(+), 80 deletions(-)
>
> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> index bf53d89..57f20a0 100644
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -115,12 +115,17 @@ extern struct cpumask __cpu_active_mask;
>  #define cpu_active(cpu)                ((cpu) == 0)
>  #endif
>
> -/* verify cpu argument to cpumask_* operators */
> -static inline unsigned int cpumask_check(unsigned int cpu)
> +static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
>  {
>  #ifdef CONFIG_DEBUG_PER_CPU_MAPS
> -       WARN_ON_ONCE(cpu >= nr_cpumask_bits);
> +       WARN_ON_ONCE(cpu >= bits);
>  #endif /* CONFIG_DEBUG_PER_CPU_MAPS */
> +}
> +
> +/* verify cpu argument to cpumask_* operators */
> +static inline unsigned int cpumask_check(unsigned int cpu)
> +{
> +       cpu_max_bits_warn(cpu, nr_cpumask_bits);
>         return cpu;
>  }
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 03ed492..c2eeb36 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -730,10 +730,21 @@ struct xps_map {
>   */
>  struct xps_dev_maps {
>         struct rcu_head rcu;
> -       struct xps_map __rcu *cpu_map[0];
> +       struct xps_map __rcu *attr_map[0];
>  };
> -#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +         \
> +
> +#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +     \
>         (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
> +
> +#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
> +       (_rxqs * (_tcs) * sizeof(struct xps_map *)))
> +
> +enum xps_map_type {
> +       XPS_MAP_RXQS,
> +       XPS_MAP_CPUS,
> +       __XPS_MAP_MAX
> +};
> +
>  #endif /* CONFIG_XPS */
>
>  #define TC_MAX_QUEUE   16
> @@ -1891,7 +1902,7 @@ struct net_device {
>         int                     watchdog_timeo;
>
>  #ifdef CONFIG_XPS
> -       struct xps_dev_maps __rcu *xps_maps;
> +       struct xps_dev_maps __rcu *xps_maps[__XPS_MAP_MAX];
>  #endif
>  #ifdef CONFIG_NET_CLS_ACT
>         struct mini_Qdisc __rcu *miniq_egress;
> @@ -3229,6 +3240,61 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
>  #ifdef CONFIG_XPS
>  int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>                         u16 index);
> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
> +                         u16 index, enum xps_map_type type);
> +
> +static inline bool attr_test_mask(unsigned long j, const unsigned long *mask,
> +                                 unsigned int nr_bits)
> +{
> +       cpu_max_bits_warn(j, nr_bits);
> +       return test_bit(j, mask);
> +}
> +
> +static inline bool attr_test_online(unsigned long j,
> +                                   const unsigned long *online_mask,
> +                                   unsigned int nr_bits)
> +{
> +       cpu_max_bits_warn(j, nr_bits);
> +
> +       if (online_mask)
> +               return test_bit(j, online_mask);
> +
> +       if (j >= 0 && j < nr_bits)
> +               return true;
> +
> +       return false;
> +}
> +
> +static inline unsigned int attrmask_next(int n, const unsigned long *srcp,
> +                                        unsigned int nr_bits)
> +{
> +       /* -1 is a legal arg here. */
> +       if (n != -1)
> +               cpu_max_bits_warn(n, nr_bits);
> +
> +       if (srcp)
> +               return find_next_bit(srcp, nr_bits, n + 1);
> +
> +       return n + 1;
> +}
> +
> +static inline int attrmask_next_and(int n, const unsigned long *src1p,
> +                                   const unsigned long *src2p,
> +                                   unsigned int nr_bits)
> +{
> +       /* -1 is a legal arg here. */
> +       if (n != -1)
> +               cpu_max_bits_warn(n, nr_bits);
> +
> +       if (src1p && src2p)
> +               return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
> +       else if (src1p)
> +               return find_next_bit(src1p, nr_bits, n + 1);
> +       else if (src2p)
> +               return find_next_bit(src2p, nr_bits, n + 1);
> +
> +       return n + 1;
> +}
>  #else
>  static inline int netif_set_xps_queue(struct net_device *dev,
>                                       const struct cpumask *mask,
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 9f43901..7e5dfdb 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
>         int pos;
>
>         if (dev_maps)
> -               map = xmap_dereference(dev_maps->cpu_map[tci]);
> +               map = xmap_dereference(dev_maps->attr_map[tci]);
>         if (!map)
>                 return false;
>
> @@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
>                         break;
>                 }
>
> -               RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
> +               RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
>                 kfree_rcu(map, rcu);
>                 return false;
>         }
> @@ -2125,7 +2125,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
>                 int i, j;
>
>                 for (i = count, j = offset; i--; j++) {
> -                       if (!remove_xps_queue(dev_maps, cpu, j))
> +                       if (!remove_xps_queue(dev_maps, tci, j))
>                                 break;
>                 }
>
> @@ -2138,30 +2138,47 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
>  static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
>                                    u16 count)
>  {
> +       const unsigned long *possible_mask = NULL;
> +       enum xps_map_type type = XPS_MAP_RXQS;
>         struct xps_dev_maps *dev_maps;
> -       int cpu, i;
>         bool active = false;
> +       unsigned int nr_ids;
> +       int i, j;
>
>         mutex_lock(&xps_map_mutex);
> -       dev_maps = xmap_dereference(dev->xps_maps);
>
> -       if (!dev_maps)
> -               goto out_no_maps;
> +       while (type < __XPS_MAP_MAX) {
> +               dev_maps = xmap_dereference(dev->xps_maps[type]);
> +               if (!dev_maps)
> +                       goto out_no_maps;
> +
> +               if (type == XPS_MAP_CPUS) {
> +                       if (num_possible_cpus() > 1)
> +                               possible_mask = cpumask_bits(cpu_possible_mask);
> +                       nr_ids = nr_cpu_ids;
> +               } else if (type == XPS_MAP_RXQS) {
> +                       nr_ids = dev->num_rx_queues;
> +               }
type is an enum so this should be a switch

>
> -       for_each_possible_cpu(cpu)
> -               active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
> -                                              offset, count);
> +               for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> +                    j < nr_ids;)
> +                       active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
> +                                                      count);
> +               if (!active) {
> +                       RCU_INIT_POINTER(dev->xps_maps[type], NULL);
> +                       kfree_rcu(dev_maps, rcu);
> +               }
>
> -       if (!active) {
> -               RCU_INIT_POINTER(dev->xps_maps, NULL);
> -               kfree_rcu(dev_maps, rcu);
> +               if (type == XPS_MAP_CPUS) {
> +                       for (i = offset + (count - 1); count--; i--)
> +                               netdev_queue_numa_node_write(
> +                                       netdev_get_tx_queue(dev, i),
> +                                                           NUMA_NO_NODE);
> +               }
> +out_no_maps:
> +               type++;
>         }
>
> -       for (i = offset + (count - 1); count--; i--)
> -               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
> -                                            NUMA_NO_NODE);
> -
> -out_no_maps:
>         mutex_unlock(&xps_map_mutex);
>  }
>
> @@ -2170,11 +2187,11 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
>         netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
>  }
>
> -static struct xps_map *expand_xps_map(struct xps_map *map,
> -                                     int cpu, u16 index)
> +static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
> +                                     u16 index, enum xps_map_type type)
>  {
> -       struct xps_map *new_map;
>         int alloc_len = XPS_MIN_MAP_ALLOC;
> +       struct xps_map *new_map = NULL;
>         int i, pos;
>
>         for (pos = 0; map && pos < map->len; pos++) {
> @@ -2183,7 +2200,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>                 return map;
>         }
>
> -       /* Need to add queue to this CPU's existing map */
> +       /* Need to add tx-queue to this CPU's/rx-queue's existing map */
>         if (map) {
>                 if (pos < map->alloc_len)
>                         return map;
> @@ -2191,9 +2208,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>                 alloc_len = map->alloc_len * 2;
>         }
>
> -       /* Need to allocate new map to store queue on this CPU's map */
> -       new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
> -                              cpu_to_node(cpu));
> +       /* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
> +        *  map
> +        */
> +       if (type == XPS_MAP_RXQS)
> +               new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
> +       else if (type == XPS_MAP_CPUS)
> +               new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
> +                                      cpu_to_node(attr_index));
switch here also

>         if (!new_map)
>                 return NULL;
>
> @@ -2205,14 +2227,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>         return new_map;
>  }
>
> -int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
> -                       u16 index)
> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
> +                         u16 index, enum xps_map_type type)
>  {
> +       const unsigned long *online_mask = NULL, *possible_mask = NULL;
>         struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
> -       int i, cpu, tci, numa_node_id = -2;
> +       int i, j, tci, numa_node_id = -2;
>         int maps_sz, num_tc = 1, tc = 0;
>         struct xps_map *map, *new_map;
>         bool active = false;
> +       unsigned int nr_ids;
>
>         if (dev->num_tc) {
>                 num_tc = dev->num_tc;
> @@ -2221,16 +2245,33 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>                         return -EINVAL;
>         }
>
> -       maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
> +       switch (type) {
> +       case XPS_MAP_RXQS:
> +               maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
> +               dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_RXQS]);
> +               nr_ids = dev->num_rx_queues;
> +               break;
> +       case XPS_MAP_CPUS:
> +               maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
> +               if (num_possible_cpus() > 1) {
> +                       online_mask = cpumask_bits(cpu_online_mask);
> +                       possible_mask = cpumask_bits(cpu_possible_mask);
> +               }
> +               dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_CPUS]);
> +               nr_ids = nr_cpu_ids;
> +               break;
> +       default:
> +               return -EINVAL;
> +       }
> +
>         if (maps_sz < L1_CACHE_BYTES)
>                 maps_sz = L1_CACHE_BYTES;
>
>         mutex_lock(&xps_map_mutex);
>
> -       dev_maps = xmap_dereference(dev->xps_maps);
> -
>         /* allocate memory for queue storage */
> -       for_each_cpu_and(cpu, cpu_online_mask, mask) {
> +       for (j = -1; j = attrmask_next_and(j, online_mask, mask, nr_ids),
> +            j < nr_ids;) {
>                 if (!new_dev_maps)
>                         new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
>                 if (!new_dev_maps) {
> @@ -2238,73 +2279,81 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>                         return -ENOMEM;
>                 }
>
> -               tci = cpu * num_tc + tc;
> -               map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
> +               tci = j * num_tc + tc;
> +               map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
>                                  NULL;
>
> -               map = expand_xps_map(map, cpu, index);
> +               map = expand_xps_map(map, j, index, type);
>                 if (!map)
>                         goto error;
>
> -               RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
> +               RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>         }
>
>         if (!new_dev_maps)
>                 goto out_no_new_maps;
>
> -       for_each_possible_cpu(cpu) {
> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> +            j < nr_ids;) {
>                 /* copy maps belonging to foreign traffic classes */
> -               for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
> +               for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
>                         /* fill in the new device map from the old device map */
> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>                 }
>
>                 /* We need to explicitly update tci as prevous loop
>                  * could break out early if dev_maps is NULL.
>                  */
> -               tci = cpu * num_tc + tc;
> +               tci = j * num_tc + tc;
>
> -               if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
> -                       /* add queue to CPU maps */
> +               if (attr_test_mask(j, mask, nr_ids) &&
> +                   attr_test_online(j, online_mask, nr_ids)) {
> +                       /* add tx-queue to CPU/rx-queue maps */
>                         int pos = 0;
>
> -                       map = xmap_dereference(new_dev_maps->cpu_map[tci]);
> +                       map = xmap_dereference(new_dev_maps->attr_map[tci]);
>                         while ((pos < map->len) && (map->queues[pos] != index))
>                                 pos++;
>
>                         if (pos == map->len)
>                                 map->queues[map->len++] = index;
>  #ifdef CONFIG_NUMA
> -                       if (numa_node_id == -2)
> -                               numa_node_id = cpu_to_node(cpu);
> -                       else if (numa_node_id != cpu_to_node(cpu))
> -                               numa_node_id = -1;
> +                       if (type == XPS_MAP_CPUS) {
> +                               if (numa_node_id == -2)
> +                                       numa_node_id = cpu_to_node(j);
> +                               else if (numa_node_id != cpu_to_node(j))
> +                                       numa_node_id = -1;
> +                       }
>  #endif
>                 } else if (dev_maps) {
>                         /* fill in the new device map from the old device map */
> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>                 }
>
>                 /* copy maps belonging to foreign traffic classes */
>                 for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
>                         /* fill in the new device map from the old device map */
> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>                 }
>         }
>
> -       rcu_assign_pointer(dev->xps_maps, new_dev_maps);
> +       if (type == XPS_MAP_RXQS)
> +               rcu_assign_pointer(dev->xps_maps[XPS_MAP_RXQS], new_dev_maps);
> +       else if (type == XPS_MAP_CPUS)
> +               rcu_assign_pointer(dev->xps_maps[XPS_MAP_CPUS], new_dev_maps);
>
>         /* Cleanup old maps */
>         if (!dev_maps)
>                 goto out_no_old_maps;
>
> -       for_each_possible_cpu(cpu) {
> -               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
> -                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> +            j < nr_ids;) {
> +               for (i = num_tc, tci = j * num_tc; i--; tci++) {
> +                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>                         if (map && map != new_map)
>                                 kfree_rcu(map, rcu);
>                 }
> @@ -2317,19 +2366,23 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>         active = true;
>
>  out_no_new_maps:
> -       /* update Tx queue numa node */
> -       netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
> -                                    (numa_node_id >= 0) ? numa_node_id :
> -                                    NUMA_NO_NODE);
> +       if (type == XPS_MAP_CPUS) {
> +               /* update Tx queue numa node */
> +               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
> +                                            (numa_node_id >= 0) ?
> +                                            numa_node_id : NUMA_NO_NODE);
> +       }
>
>         if (!dev_maps)
>                 goto out_no_maps;
>
> -       /* removes queue from unused CPUs */
> -       for_each_possible_cpu(cpu) {
> -               for (i = tc, tci = cpu * num_tc; i--; tci++)
> +       /* removes tx-queue from unused CPUs/rx-queues */
> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> +            j < nr_ids;) {
> +               for (i = tc, tci = j * num_tc; i--; tci++)
>                         active |= remove_xps_queue(dev_maps, tci, index);
> -               if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
> +               if (!attr_test_mask(j, mask, nr_ids) ||
> +                   !attr_test_online(j, online_mask, nr_ids))
>                         active |= remove_xps_queue(dev_maps, tci, index);
>                 for (i = num_tc - tc, tci++; --i; tci++)
>                         active |= remove_xps_queue(dev_maps, tci, index);
> @@ -2337,7 +2390,10 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>
>         /* free map if not active */
>         if (!active) {
> -               RCU_INIT_POINTER(dev->xps_maps, NULL);
> +               if (type == XPS_MAP_RXQS)
> +                       RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_RXQS], NULL);
> +               else if (type == XPS_MAP_CPUS)
> +                       RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_CPUS], NULL);
>                 kfree_rcu(dev_maps, rcu);
>         }
>
> @@ -2347,11 +2403,12 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>         return 0;
>  error:
>         /* remove any maps that we added */
> -       for_each_possible_cpu(cpu) {
> -               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
> -                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> +            j < nr_ids;) {
> +               for (i = num_tc, tci = j * num_tc; i--; tci++) {
> +                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
>                         map = dev_maps ?
> -                             xmap_dereference(dev_maps->cpu_map[tci]) :
> +                             xmap_dereference(dev_maps->attr_map[tci]) :
>                               NULL;
>                         if (new_map && new_map != map)
>                                 kfree(new_map);
> @@ -2363,6 +2420,13 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>         kfree(new_dev_maps);
>         return -ENOMEM;
>  }
> +
> +int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
> +                       u16 index)
> +{
> +       return __netif_set_xps_queue(dev, cpumask_bits(mask), index,
> +                                    XPS_MAP_CPUS);
> +}
>  EXPORT_SYMBOL(netif_set_xps_queue);
>
>  #endif
> @@ -3402,7 +3466,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>         int queue_index = -1;
>
>         rcu_read_lock();
> -       dev_maps = rcu_dereference(dev->xps_maps);
> +       dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>         if (dev_maps) {
>                 unsigned int tci = skb->sender_cpu - 1;
>
> @@ -3411,7 +3475,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>                         tci += netdev_get_prio_tc_map(dev, skb->priority);
>                 }
>
> -               map = rcu_dereference(dev_maps->cpu_map[tci]);
> +               map = rcu_dereference(dev_maps->attr_map[tci]);
>                 if (map) {
>                         if (map->len == 1)
>                                 queue_index = map->queues[0];
> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
> index c476f07..d7abd33 100644
> --- a/net/core/net-sysfs.c
> +++ b/net/core/net-sysfs.c
> @@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
>         }
>
>         rcu_read_lock();
> -       dev_maps = rcu_dereference(dev->xps_maps);
> +       dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>         if (dev_maps) {
>                 for_each_possible_cpu(cpu) {
>                         int i, tci = cpu * num_tc + tc;
>                         struct xps_map *map;
>
> -                       map = rcu_dereference(dev_maps->cpu_map[tci]);
> +                       map = rcu_dereference(dev_maps->attr_map[tci]);
>                         if (!map)
>                                 continue;
>
>

^ permalink raw reply

* Re: [net-next PATCH v2 2/4] net: Enable Tx queue selection based on Rx queues
From: Tom Herbert @ 2018-05-18  4:03 UTC (permalink / raw)
  To: Amritha Nambiar
  Cc: Linux Kernel Network Developers, David S. Miller, Alexander Duyck,
	Sridhar Samudrala, Eric Dumazet, Hannes Frederic Sowa
In-Reply-To: <152643400925.4991.5029989601625953592.stgit@anamdev.jf.intel.com>

On Tue, May 15, 2018 at 6:26 PM, Amritha Nambiar
<amritha.nambiar@intel.com> wrote:
> This patch adds support to pick Tx queue based on the Rx queue map
> configuration set by the admin through the sysfs attribute
> for each Tx queue. If the user configuration for receive
> queue map does not apply, then the Tx queue selection falls back
> to CPU map based selection and finally to hashing.
>
> Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
> ---
>  include/net/sock.h       |   18 ++++++++++++++++++
>  net/core/dev.c           |   36 +++++++++++++++++++++++++++++-------
>  net/core/sock.c          |    5 +++++
>  net/ipv4/tcp_input.c     |    7 +++++++
>  net/ipv4/tcp_ipv4.c      |    1 +
>  net/ipv4/tcp_minisocks.c |    1 +
>  6 files changed, 61 insertions(+), 7 deletions(-)
>
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 4f7c584..0613f63 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -139,6 +139,8 @@ typedef __u64 __bitwise __addrpair;
>   *     @skc_node: main hash linkage for various protocol lookup tables
>   *     @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
>   *     @skc_tx_queue_mapping: tx queue number for this connection
> + *     @skc_rx_queue_mapping: rx queue number for this connection
> + *     @skc_rx_ifindex: rx ifindex for this connection
>   *     @skc_flags: place holder for sk_flags
>   *             %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
>   *             %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
> @@ -215,6 +217,10 @@ struct sock_common {
>                 struct hlist_nulls_node skc_nulls_node;
>         };
>         int                     skc_tx_queue_mapping;
> +#ifdef CONFIG_XPS
> +       int                     skc_rx_queue_mapping;
> +       int                     skc_rx_ifindex;

Isn't this increasing size of sock_common for a narrow use case functionality?

> +#endif
>         union {
>                 int             skc_incoming_cpu;
>                 u32             skc_rcv_wnd;
> @@ -326,6 +332,10 @@ struct sock {
>  #define sk_nulls_node          __sk_common.skc_nulls_node
>  #define sk_refcnt              __sk_common.skc_refcnt
>  #define sk_tx_queue_mapping    __sk_common.skc_tx_queue_mapping
> +#ifdef CONFIG_XPS
> +#define sk_rx_queue_mapping    __sk_common.skc_rx_queue_mapping
> +#define sk_rx_ifindex          __sk_common.skc_rx_ifindex
> +#endif
>
>  #define sk_dontcopy_begin      __sk_common.skc_dontcopy_begin
>  #define sk_dontcopy_end                __sk_common.skc_dontcopy_end
> @@ -1696,6 +1706,14 @@ static inline int sk_tx_queue_get(const struct sock *sk)
>         return sk ? sk->sk_tx_queue_mapping : -1;
>  }
>
> +static inline void sk_mark_rx_queue(struct sock *sk, struct sk_buff *skb)
> +{
> +#ifdef CONFIG_XPS
> +       sk->sk_rx_ifindex = skb->skb_iif;
> +       sk->sk_rx_queue_mapping = skb_get_rx_queue(skb);
> +#endif
> +}
> +
>  static inline void sk_set_socket(struct sock *sk, struct socket *sock)
>  {
>         sk_tx_queue_clear(sk);
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 7e5dfdb..4030368 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -3458,18 +3458,14 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
>  }
>  #endif /* CONFIG_NET_EGRESS */
>
> -static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
> -{
>  #ifdef CONFIG_XPS
> -       struct xps_dev_maps *dev_maps;
> +static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
> +                              struct xps_dev_maps *dev_maps, unsigned int tci)
> +{
>         struct xps_map *map;
>         int queue_index = -1;
>
> -       rcu_read_lock();
> -       dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>         if (dev_maps) {
> -               unsigned int tci = skb->sender_cpu - 1;
> -
>                 if (dev->num_tc) {
>                         tci *= dev->num_tc;
>                         tci += netdev_get_prio_tc_map(dev, skb->priority);
> @@ -3486,6 +3482,32 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>                                 queue_index = -1;
>                 }
>         }
> +       return queue_index;
> +}
> +#endif
> +
> +static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
> +{
> +#ifdef CONFIG_XPS
> +       enum xps_map_type i = XPS_MAP_RXQS;
> +       struct xps_dev_maps *dev_maps;
> +       struct sock *sk = skb->sk;
> +       int queue_index = -1;
> +       unsigned int tci = 0;
> +
> +       if (sk && sk->sk_rx_queue_mapping <= dev->real_num_rx_queues &&
> +           dev->ifindex == sk->sk_rx_ifindex)
> +               tci = sk->sk_rx_queue_mapping;
> +
> +       rcu_read_lock();
> +       while (queue_index < 0 && i < __XPS_MAP_MAX) {
> +               if (i == XPS_MAP_CPUS)

This while loop typifies exactly why I don't think the XPS maps should
be an array. There's only two and we really don't want this to be an
open ended invitation for people to create new mapping methods. The
code is much simpler and potentially more efficient if the two maps
are just separate pointers. It should look something like this:

dev_maps = rcu_dereference(dev->xps_rxqs_map);
if (dev_maps) {
        queue_index = __get_xps_queue_idx(dev, skb, dev_maps, tci);
        if (queue_index < 0) {
              dev_maps = rcu_dereference(dev->xps_rxqs_map);
              if (dev_maps) {
                     queue_index = __get_xps_queue_idx(dev, skb,
dev_rxqs_maps, tci);
               ...

Also, the rxqs is a pretty narrow use case and it's likely to be
rarely configured (relative to a CPU map). A static_key could be used
to eliminate the cost of the extra map check (the static_key could
also be used on for CPU maps, this is analogous to how there are
static keys for RPS and RFS).

> +                       tci = skb->sender_cpu - 1;
> +               dev_maps = rcu_dereference(dev->xps_maps[i]);
> +               queue_index = __get_xps_queue_idx(dev, skb, dev_maps, tci);
> +               i++;
> +       }
> +
>         rcu_read_unlock();
>
>         return queue_index;
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 042cfc6..73d7fa8 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -2824,6 +2824,11 @@ void sock_init_data(struct socket *sock, struct sock *sk)
>         sk->sk_pacing_rate = ~0U;
>         sk->sk_pacing_shift = 10;
>         sk->sk_incoming_cpu = -1;
> +
> +#ifdef CONFIG_XPS
> +       sk->sk_rx_ifindex = -1;
> +       sk->sk_rx_queue_mapping = -1;
> +#endif
>         /*
>          * Before updating sk_refcnt, we must commit prior changes to memory
>          * (Documentation/RCU/rculist_nulls.txt for details)
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index b188e0d..d33911c 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -78,6 +78,7 @@
>  #include <linux/errqueue.h>
>  #include <trace/events/tcp.h>
>  #include <linux/static_key.h>
> +#include <net/busy_poll.h>
>
>  int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
>
> @@ -5559,6 +5560,11 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
>                 __tcp_fast_path_on(tp, tp->snd_wnd);
>         else
>                 tp->pred_flags = 0;
> +
> +       if (skb) {
> +               sk_mark_napi_id(sk, skb);
> +               sk_mark_rx_queue(sk, skb);
> +       }
>  }
>
>  static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
> @@ -6371,6 +6377,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
>         tcp_rsk(req)->snt_isn = isn;
>         tcp_rsk(req)->txhash = net_tx_rndhash();
>         tcp_openreq_init_rwin(req, sk, dst);
> +       sk_mark_rx_queue(req_to_sk(req), skb);
>         if (!want_cookie) {
>                 tcp_reqsk_record_syn(sk, req, skb);
>                 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index caf23de..abdf02e 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1479,6 +1479,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
>
>                 sock_rps_save_rxhash(sk, skb);
>                 sk_mark_napi_id(sk, skb);
> +               sk_mark_rx_queue(sk, skb);

Can this be done in sock_rps_save_rxhash?

>                 if (dst) {
>                         if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
>                             !dst->ops->check(dst, 0)) {
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index f867658..4939c28 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -836,6 +836,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
>
>         /* record NAPI ID of child */
>         sk_mark_napi_id(child, skb);
> +       sk_mark_rx_queue(child, skb);
>
>         tcp_segs_in(tcp_sk(child), skb);
>         if (!sock_owned_by_user(child)) {
>

^ permalink raw reply

* Re: [PATCH bpf-next v3 00/15] Introducing AF_XDP support
From: Alexei Starovoitov @ 2018-05-18  3:38 UTC (permalink / raw)
  To: Björn Töpel, Alexei Starovoitov
  Cc: Daniel Borkmann, Karlsson, Magnus, Duyck, Alexander H,
	Alexander Duyck, John Fastabend, Jesper Dangaard Brouer,
	Willem de Bruijn, Michael S. Tsirkin, Netdev,
	Björn Töpel, michael.lundkvist, Brandeburg, Jesse,
	Singhai, Anjali, Zhang, Qi Z
In-Reply-To: <CAJ+HfNin-ZFMqbVCVJLSvdiHEEUr8Q4gDLxY9EzGjr__rMW2Lg@mail.gmail.com>

On 5/16/18 11:46 PM, Björn Töpel wrote:
> 2018-05-04 1:38 GMT+02:00 Alexei Starovoitov <alexei.starovoitov@gmail.com>:
>> On Fri, May 04, 2018 at 12:49:09AM +0200, Daniel Borkmann wrote:
>>> On 05/02/2018 01:01 PM, Björn Töpel wrote:
>>>> From: Björn Töpel <bjorn.topel@intel.com>
>>>>
>>>> This patch set introduces a new address family called AF_XDP that is
>>>> optimized for high performance packet processing and, in upcoming
>>>> patch sets, zero-copy semantics. In this patch set, we have removed
>>>> all zero-copy related code in order to make it smaller, simpler and
>>>> hopefully more review friendly. This patch set only supports copy-mode
>>>> for the generic XDP path (XDP_SKB) for both RX and TX and copy-mode
>>>> for RX using the XDP_DRV path. Zero-copy support requires XDP and
>>>> driver changes that Jesper Dangaard Brouer is working on. Some of his
>>>> work has already been accepted. We will publish our zero-copy support
>>>> for RX and TX on top of his patch sets at a later point in time.
>>>
>>> +1, would be great to see it land this cycle. Saw few minor nits here
>>> and there but nothing to hold it up, for the series:
>>>
>>> Acked-by: Daniel Borkmann <daniel@iogearbox.net>
>>>
>>> Thanks everyone!
>>
>> Great stuff!
>>
>> Applied to bpf-next, with one condition.
>> Upcoming zero-copy patches for both RX and TX need to be posted
>> and reviewed within this release window.
>> If netdev community as a whole won't be able to agree on the zero-copy
>> bits we'd need to revert this feature before the next merge window.
>>
>> Few other minor nits:
>> patch 3:
>> +struct xdp_ring {
>> +       __u32 producer __attribute__((aligned(64)));
>> +       __u32 consumer __attribute__((aligned(64)));
>> +};
>> It kinda begs for ____cacheline_aligned_in_smp to be introduced for uapi headers.
>>
>
> Hmm, I need some guidance on what a sane uapi variant would be. We
> can't have the uapi depend on the kernel build. ARM64, e.g., can have
> both 64B and 128B according to the specs. Contemporary IA processors
> have 64B.
>
> The simplest, and maybe most future-proof, would be 128B aligned for
> all. Another is having 128B for ARM and 64B for all IA. A third option
> is having a hand-shaking API (I think virtio has that) for determine
> the cache line size, but I'd rather not go down that route.
>
> Thoughts/ideas on how a uapi ____cacheline_aligned_in_smp version
> would look like?

I suspect i40e+arm combination wasn't tested anyway.
The api may have endianness issues too on something like sparc.
I think the way to be backwards compatible in this area
is to make the api usable on x86 only by adding
to include/uapi/linux/if_xdp.h
#if defined(__x86_64__)
#define AF_XDP_CACHE_BYTES 64
#else
#error "AF_XDP support is not yet available for this architecture"
#endif
and doing:
     __u32 producer __attribute__((aligned(AF_XDP_CACHE_BYTES)));
     __u32 consumer __attribute__((aligned(AF_XDP_CACHE_BYTES)));

And progressively add to this for arm64 and few other archs.
Eventually removing #error and adding some generic define
that's good enough for long tail of architectures that
we really cannot test.

^ permalink raw reply

* Re: pull-request: bpf 2018-05-18
From: David Miller @ 2018-05-18  3:34 UTC (permalink / raw)
  To: daniel; +Cc: ast, netdev
In-Reply-To: <20180518002617.15231-1-daniel@iogearbox.net>

From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 18 May 2018 02:26:17 +0200

> The following pull-request contains BPF updates for your *net* tree.
> 
> The main changes are:
 ...
> Please consider pulling these changes from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git

Pulled.

> When this gets later merged into net-next there are a two trivial
> BPF conflicts to resolve:
 ...

Thanks a lot for the conflict guidance.

^ permalink raw reply

* [PATCH v2] net: qcom/emac: Allocate buffers from local node
From: Hemanth Puranik @ 2018-05-18  3:29 UTC (permalink / raw)
  To: netdev, linux-kernel; +Cc: Timur Tabi, Hemanth Puranik

Currently we use non-NUMA aware allocation for TPD and RRD buffers,
this patch modifies to use NUMA friendly allocation.

Signed-off-by: Hemanth Puranik <hpuranik@codeaurora.org>
---
Change since v1:
- Addressed comments related to ordering

 drivers/net/ethernet/qualcomm/emac/emac-mac.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index 092718a..031f6e6 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -683,10 +683,11 @@ static int emac_tx_q_desc_alloc(struct emac_adapter *adpt,
 				struct emac_tx_queue *tx_q)
 {
 	struct emac_ring_header *ring_header = &adpt->ring_header;
+	int node = dev_to_node(adpt->netdev->dev.parent);
 	size_t size;
 
 	size = sizeof(struct emac_buffer) * tx_q->tpd.count;
-	tx_q->tpd.tpbuff = kzalloc(size, GFP_KERNEL);
+	tx_q->tpd.tpbuff = kzalloc_node(size, GFP_KERNEL, node);
 	if (!tx_q->tpd.tpbuff)
 		return -ENOMEM;
 
@@ -723,11 +724,12 @@ static void emac_rx_q_bufs_free(struct emac_adapter *adpt)
 static int emac_rx_descs_alloc(struct emac_adapter *adpt)
 {
 	struct emac_ring_header *ring_header = &adpt->ring_header;
+	int node = dev_to_node(adpt->netdev->dev.parent);
 	struct emac_rx_queue *rx_q = &adpt->rx_q;
 	size_t size;
 
 	size = sizeof(struct emac_buffer) * rx_q->rfd.count;
-	rx_q->rfd.rfbuff = kzalloc(size, GFP_KERNEL);
+	rx_q->rfd.rfbuff = kzalloc_node(size, GFP_KERNEL, node);
 	if (!rx_q->rfd.rfbuff)
 		return -ENOMEM;
 
-- 
Qualcomm Datacenter Technologies as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox