Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net 2/5] net: dsa: b53: Properly account for VLAN filtering
From: Florian Fainelli @ 2019-02-15 20:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, andrew, vivien.didelot, Florian Fainelli
In-Reply-To: <20190215201653.20988-1-f.fainelli@gmail.com>

VLAN filtering can be built into the kernel, and also dynamically turned
on/off through the bridge master device. Allow re-configuring the switch
appropriately to account for that by deciding whether VLAN table
(v_table) misses should lead to a drop or forward.

Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/dsa/b53/b53_common.c | 59 +++++++++++++++++++++++++++++---
 drivers/net/dsa/b53/b53_priv.h   |  3 ++
 2 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 964a9ec4652a..2fef4c564420 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -344,7 +344,8 @@ static void b53_set_forwarding(struct b53_device *dev, int enable)
 	b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, mgmt);
 }
 
-static void b53_enable_vlan(struct b53_device *dev, bool enable)
+static void b53_enable_vlan(struct b53_device *dev, bool enable,
+			    bool enable_filtering)
 {
 	u8 mgmt, vc0, vc1, vc4 = 0, vc5;
 
@@ -369,8 +370,13 @@ static void b53_enable_vlan(struct b53_device *dev, bool enable)
 		vc0 |= VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID;
 		vc1 |= VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN;
 		vc4 &= ~VC4_ING_VID_CHECK_MASK;
-		vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S;
-		vc5 |= VC5_DROP_VTABLE_MISS;
+		if (enable_filtering) {
+			vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S;
+			vc5 |= VC5_DROP_VTABLE_MISS;
+		} else {
+			vc4 |= VC4_ING_VID_VIO_FWD << VC4_ING_VID_CHECK_S;
+			vc5 &= ~VC5_DROP_VTABLE_MISS;
+		}
 
 		if (is5325(dev))
 			vc0 &= ~VC0_RESERVED_1;
@@ -420,6 +426,9 @@ static void b53_enable_vlan(struct b53_device *dev, bool enable)
 	}
 
 	b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt);
+
+	dev->vlan_enabled = enable;
+	dev->vlan_filtering_enabled = enable_filtering;
 }
 
 static int b53_set_jumbo(struct b53_device *dev, bool enable, bool allow_10_100)
@@ -656,7 +665,7 @@ int b53_configure_vlan(struct dsa_switch *ds)
 		b53_do_vlan_op(dev, VTA_CMD_CLEAR);
 	}
 
-	b53_enable_vlan(dev, false);
+	b53_enable_vlan(dev, false, dev->vlan_filtering_enabled);
 
 	b53_for_each_port(dev, i)
 		b53_write16(dev, B53_VLAN_PAGE,
@@ -1265,6 +1274,46 @@ EXPORT_SYMBOL(b53_phylink_mac_link_up);
 
 int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering)
 {
+	struct b53_device *dev = ds->priv;
+	struct net_device *bridge_dev;
+	unsigned int i;
+	u16 pvid, new_pvid;
+
+	/* Handle the case were multiple bridges span the same switch device
+	 * and one of them has a different setting than what is being requested
+	 * which would be breaking filtering semantics for any of the other
+	 * bridge devices.
+	 */
+	b53_for_each_port(dev, i) {
+		bridge_dev = dsa_to_port(ds, i)->bridge_dev;
+		if (bridge_dev &&
+		    bridge_dev != dsa_to_port(ds, port)->bridge_dev &&
+		    br_vlan_enabled(bridge_dev) != vlan_filtering) {
+			netdev_err(bridge_dev,
+				   "VLAN filtering is global to the switch!\n");
+			return -EINVAL;
+		}
+	}
+
+	b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid);
+	new_pvid = pvid;
+	if (dev->vlan_filtering_enabled && !vlan_filtering) {
+		/* Filtering is currently enabled, use the default PVID since
+		 * the bridge does not expect tagging anymore
+		 */
+		dev->ports[port].pvid = pvid;
+		new_pvid = b53_default_pvid(dev);
+	} else if (!dev->vlan_filtering_enabled && vlan_filtering) {
+		/* Filtering is currently disabled, restore the previous PVID */
+		new_pvid = dev->ports[port].pvid;
+	}
+
+	if (pvid != new_pvid)
+		b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port),
+			    new_pvid);
+
+	b53_enable_vlan(dev, dev->vlan_enabled, vlan_filtering);
+
 	return 0;
 }
 EXPORT_SYMBOL(b53_vlan_filtering);
@@ -1280,7 +1329,7 @@ int b53_vlan_prepare(struct dsa_switch *ds, int port,
 	if (vlan->vid_end > dev->num_vlans)
 		return -ERANGE;
 
-	b53_enable_vlan(dev, true);
+	b53_enable_vlan(dev, true, dev->vlan_filtering_enabled);
 
 	return 0;
 }
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index ec796482792d..4dc7ee38b258 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -91,6 +91,7 @@ enum {
 struct b53_port {
 	u16		vlan_ctl_mask;
 	struct ethtool_eee eee;
+	u16		pvid;
 };
 
 struct b53_vlan {
@@ -137,6 +138,8 @@ struct b53_device {
 
 	unsigned int num_vlans;
 	struct b53_vlan *vlans;
+	bool vlan_enabled;
+	bool vlan_filtering_enabled;
 	unsigned int num_ports;
 	struct b53_port *ports;
 };
-- 
2.17.1


^ permalink raw reply related

* [PATCH net 3/5] net: systemport: Fix reception of BPDUs
From: Florian Fainelli @ 2019-02-15 20:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, andrew, vivien.didelot, Florian Fainelli
In-Reply-To: <20190215201653.20988-1-f.fainelli@gmail.com>

SYSTEMPORT has its RXCHK parser block that attempts to validate the
packet structures, unfortunately setting the L2 header check bit will
cause Bridge PDUs (BPDUs) to be incorrectly rejected because they look
like LLC/SNAP packets with a non-IPv4 or non-IPv6 Ethernet Type.

Fixes: 4e8aedfe78c7 ("net: systemport: Turn on offloads by default")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 28c9b0bdf2f6..bc3ac369cbe3 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -134,6 +134,10 @@ static void bcm_sysport_set_rx_csum(struct net_device *dev,
 
 	priv->rx_chk_en = !!(wanted & NETIF_F_RXCSUM);
 	reg = rxchk_readl(priv, RXCHK_CONTROL);
+	/* Clear L2 header checks, which would prevent BPDUs
+	 * from being received.
+	 */
+	reg &= ~RXCHK_L2_HDR_DIS;
 	if (priv->rx_chk_en)
 		reg |= RXCHK_EN;
 	else
-- 
2.17.1


^ permalink raw reply related

* [PATCH net 4/5] net: dsa: bcm_sf2: Do not assume DSA master supports WoL
From: Florian Fainelli @ 2019-02-15 20:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, andrew, vivien.didelot, Florian Fainelli
In-Reply-To: <20190215201653.20988-1-f.fainelli@gmail.com>

We assume in the bcm_sf2 driver that the DSA master network device
supports ethtool_ops::{get,set}_wol operations, which is not a given.
Avoid de-referencing potentially non-existent function pointers and
check them as we should.

Fixes: 96e65d7f3f88 ("net: dsa: bcm_sf2: add support for Wake-on-LAN")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/dsa/bcm_sf2.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 17ec32b0a1cc..14138d423cf1 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -726,10 +726,11 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port,
 {
 	struct net_device *p = ds->ports[port].cpu_dp->master;
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
-	struct ethtool_wolinfo pwol;
+	struct ethtool_wolinfo pwol = { };
 
 	/* Get the parent device WoL settings */
-	p->ethtool_ops->get_wol(p, &pwol);
+	if (p->ethtool_ops->get_wol)
+		p->ethtool_ops->get_wol(p, &pwol);
 
 	/* Advertise the parent device supported settings */
 	wol->supported = pwol.supported;
@@ -750,9 +751,10 @@ static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port,
 	struct net_device *p = ds->ports[port].cpu_dp->master;
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	s8 cpu_port = ds->ports[port].cpu_dp->index;
-	struct ethtool_wolinfo pwol;
+	struct ethtool_wolinfo pwol =  { };
 
-	p->ethtool_ops->get_wol(p, &pwol);
+	if (p->ethtool_ops->get_wol)
+		p->ethtool_ops->get_wol(p, &pwol);
 	if (wol->wolopts & ~pwol.supported)
 		return -EINVAL;
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH net 5/5] net: dsa: b53: Do not program CPU port's PVID
From: Florian Fainelli @ 2019-02-15 20:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, andrew, vivien.didelot, Florian Fainelli
In-Reply-To: <20190215201653.20988-1-f.fainelli@gmail.com>

The CPU port is special and does not need to obey VLAN restrictions as
far as untagged traffic goes, also, having the CPU port be part of a
particular PVID is against the idea of keeping it tagged in all VLANs.

Fixes: ca8931948344 ("net: dsa: b53: Keep CPU port as tagged in all VLANs")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/dsa/b53/b53_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 2fef4c564420..c76892ac4e69 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1359,7 +1359,7 @@ void b53_vlan_add(struct dsa_switch *ds, int port,
 		b53_fast_age_vlan(dev, vid);
 	}
 
-	if (pvid) {
+	if (pvid && !dsa_is_cpu_port(ds, port)) {
 		b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port),
 			    vlan->vid_end);
 		b53_fast_age_vlan(dev, vid);
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH bpf-next 2/4] libbpf: Support 32-bit static data loads
From: Y Song @ 2019-02-15 20:18 UTC (permalink / raw)
  To: Joe Stringer; +Cc: bpf, netdev, Daniel Borkmann, Alexei Starovoitov
In-Reply-To: <CAOftzPjFbBHabG2SR=QrC053Y8cC+Gi1LvK19oLEvDvR6LSLkw@mail.gmail.com>

On Thu, Feb 14, 2019 at 11:16 PM Joe Stringer <joe@wand.net.nz> wrote:
>
> On Thu, 14 Feb 2019 at 21:39, Y Song <ys114321@gmail.com> wrote:
> >
> > On Mon, Feb 11, 2019 at 4:48 PM Joe Stringer <joe@wand.net.nz> wrote:
> > >
> > > Support loads of static 32-bit data when BPF writers make use of
> > > convenience macros for accessing static global data variables. A later
> > > patch in this series will demonstrate its usage in a selftest.
> > >
> > > As of LLVM-7, this technique only works with 32-bit data, as LLVM will
> > > complain if this technique is attempted with data of other sizes:
> > >
> > >     LLVM ERROR: Unsupported relocation: try to compile with -O2 or above,
> > >     or check your static variable usage
> >
> > A little bit clarification from compiler side.
> > The above compiler error is to prevent people use static variables since current
> > kernel/libbpf does not handle this. The compiler only warns if .bss or
> > .data section
> > has more than one definitions. The first definition always has section offset 0
> > and the compiler did not warn.
>
> Ah, interesting. I observed that warning when I attempted to define
> global variables of multiple sizes, and I thought also with sizes
> other than 32-bit. This clarifies things a bit, thanks.
>
> For the .bss my observation was that if you had a definition like:
>
> static int a = 0;
>
> Then this will be placed into .bss, hence why I looked into the
> approach from this patch for patch 3 as well.
>
> > The restriction is a little strange. To only work with 32-bit data is
> > not a right
> > statement. The following are some examples.
> >
> > The following static variable definitions will succeed:
> > static int a; /* one in .bss */
> > static long b = 2;  /* one in .data */
> >
> > The following definitions will fail as both in .bss.
> > static int a;
> > static int b;
> >
> > The following definitions will fail as both in .data:
> > static char a = 2;
> > static int b = 3;
>
> Are there type restrictions or something? I've been defining multiple

There is no type restrictions.
-bash-4.4$ cat g.c
struct t {
  int a;
  char b;
  long c;
};
static volatile struct t v;
int test() { return v.a + v.b; }
-bash-4.4$ clang -O2 -g -c -target bpf g.c
-bash-4.4$

> static uint32_t and using them per the approach in this patch series
> without hitting this compiler assertion.

-bash-4.4$ cat g1.c
static volatile int a;
static volatile int b;
int test() { return a + b; }
-bash-4.4$ clang -O2 -g -c -target bpf g1.c
fatal error: error in backend: Unsupported relocation: try to compile
with -O2 or above, or check your static variable
      usage
-bash-4.4$

>
> > Using global variables can prevent compiler errors.
> > maps are defined as globals and the compiler does not
> > check whether a particular global variable is defining a map or not.
> >
> > If you just use static variable like below
> > static int a = 2;
> > without potential assignment to a, the compiler will replace variable
> > a with 2 at compile time.
> > An alternative is to define like below
> > static volatile int a = 2;
> > You can get a "load" for variable "a" in the bpf load even if there is
> > no assignment to a.
>
> I'll take a closer look at this too.
>
> > Maybe now is the time to remove the compiler assertions as
> > libbpf/kernel starts to
> > handle static variables?
>
> I don't understand why those assertions exists in this form. It
> already allows code which will not load with libbpf (ie generate any
> .data/.bss), does it help prevent unexpected situations for
> developers?

The error is introduced by the following llvm commit:
commit 39184e407cd937f2f20d3f61eec205925bae7b13
Author: Yonghong Song <yhs@fb.com>
Date:   Wed Aug 22 21:21:03 2018 +0000

    bpf: fix an assertion in BPFAsmBackend applyFixup()

    Fix bug https://bugs.llvm.org/show_bug.cgi?id=38643

    In BPFAsmBackend applyFixup(), there is an assertion for FixedValue to be 0.
    This may not be true, esp. for optimiation level 0.
    For example, in the above bug, for the following two
    static variables:
      @bpf_map_lookup_elem = internal global i8* (i8*, i8*)*
          inttoptr (i64 1 to i8* (i8*, i8*)*), align 8
      @bpf_map_update_elem = internal global i32 (i8*, i8*, i8*, i64)*
          inttoptr (i64 2 to i32 (i8*, i8*, i8*, i64)*), align 8

    The static variable @bpf_map_update_elem will have a symbol
    offset of 8 and a FK_SecRel_8 with FixupValue 8 will cause
    the assertion if llvm is built with -DLLVM_ENABLE_ASSERTIONS=ON.

    The above relocations will not exist if the program is compiled
    with optimization level -O1 and above as the compiler optimizes
    those static variables away. In the below error message, -O2
    is suggested as this is the common practice.

    Note that FixedValue = 0 in applyFixup() does exist and is valid,
    e.g., for the global variable my_map in the above bug. The bpf
    loader will process them properly for map_id's before loading
    the program into the kernel.

    The static variables, which are not optimized away by compiler,
    may have FK_SecRel_8 relocation with non-zero FixedValue.

    The patch removed the offending assertion and will issue
    a hard error as below if the FixedValue in applyFixup()
    is not 0.
      $ llc -march=bpf -filetype=obj fixup.ll
      LLVM ERROR: Unsupported relocation: try to compile with -O2 or above,
          or check your static variable usage

Its main purpose is to fix a behavior difference with and without
-DLLVM_ENABLE_ASSERTIONS=ON. The patch generated an error
regardless whether the compiler time assertion is turned on or not.

It does not catch all the cases e.g., only one static variable is defined,
which needs fine tuning as there are legitimate cases (e.g., in some dwarf
sessions) where the Fixup is valid with FixedValue = 0.

If you try to use more than onee static variables, the compiler will
print an error and let you know your potential issues.

The question is since we are on the path to allow static variables
in the bpf programs for later patching, we probably should remove
this compiler fatal error?

^ permalink raw reply

* RE: [Intel-wired-lan] [PATCH] e1000e: Disable runtime PM on CNP+
From: Brown, Aaron F @ 2019-02-15 20:37 UTC (permalink / raw)
  To: Kai-Heng Feng, Kirsher, Jeffrey T
  Cc: netdev@vger.kernel.org, intel-wired-lan@lists.osuosl.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <20190202174016.28539-1-kai.heng.feng@canonical.com>

> From: Intel-wired-lan [mailto:intel-wired-lan-bounces@osuosl.org] On
> Behalf Of Kai-Heng Feng
> Sent: Saturday, February 2, 2019 9:40 AM
> To: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>
> Cc: netdev@vger.kernel.org; Kai-Heng Feng
> <kai.heng.feng@canonical.com>; intel-wired-lan@lists.osuosl.org; linux-
> kernel@vger.kernel.org
> Subject: [Intel-wired-lan] [PATCH] e1000e: Disable runtime PM on CNP+
> 
> There are some new e1000e devices can only be woken up from D3 one
> time,
> by plugging ethernet cable. Subsequent cable plugging does set PME bit
> correctly, but it still doesn't get woken up.
> 
> Since e1000e connects to the root complex directly, we rely on ACPI to
> wake it up. In this case, the GPE from _PRW only works once and stops
> working after that. Though it appears to be a platform bug, e1000e
> maintainers confirmed that I219 does not support D3.
> 
> So disable runtime PM on CNP+ chips. We may need to disable earlier
> generations if this bug also hit older platforms.
> 
> Bugzilla: https://bugzilla.kernel.org/attachment.cgi?id=280819
> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> ---
>  drivers/net/ethernet/intel/e1000e/netdev.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 

Tested-by: Aaron Brown <aaron.f.brown@intel.com>

^ permalink raw reply

* [MERGE HELP] cls_tcindex.c
From: David Miller @ 2019-02-15 20:41 UTC (permalink / raw)
  To: netdev; +Cc: xiyou.wangcong, vladbu

I've merged net into net-next.

The worst conflict was cls_tcindex.c as Cong's fixes collided heavily
with Vlad's work.

The interim solution I used for this merge was to revert back to RCU.

Please take a look at what I did and send me followups because I am
absolutely certain that some are necessary :-)))

Thanks!

^ permalink raw reply

* [PATCH net-next 0/2] net: phy: add helpers for handling C45 10GBT AN register values
From: Heiner Kallweit @ 2019-02-15 20:56 UTC (permalink / raw)
  To: Andrew Lunn, Florian Fainelli, David Miller; +Cc: netdev@vger.kernel.org

Similar to the existing helpers for the Clause 22 registers add helpers
to deal with converting Clause 45 advertisement registers to / from
link mode bitmaps.

Note that these helpers are defined in linux/mdio.h, not like the
Clause 22 helpers in linux/mii.h. Reason is that the Clause 45 register
constants are defined in uapi/linux/mdio.h. And uapi/linux/mdio.h
includes linux/mii.h before defining the C45 register constants.

Heiner Kallweit (2):
  net: phy: add helpers for handling C45 10GBT AN register values
  net: phy: use mii_10gbt_stat_mod_linkmode_lpa_t in genphy_c45_read_lpa

 drivers/net/phy/phy-c45.c | 10 +------
 include/linux/mdio.h      | 63 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 9 deletions(-)

-- 
2.20.1

^ permalink raw reply

* [PATCH net-next 1/2] net: phy: add helpers for handling C45 10GBT AN register values
From: Heiner Kallweit @ 2019-02-15 20:57 UTC (permalink / raw)
  To: Andrew Lunn, Florian Fainelli, David Miller; +Cc: netdev@vger.kernel.org
In-Reply-To: <60b2e19d-e138-dfa0-fa79-19f3cbece99e@gmail.com>

Similar to the existing helpers for the Clause 22 registers add helpers
to deal with converting Clause 45 advertisement registers to / from
link mode bitmaps.

Note that these helpers are defined in linux/mdio.h, not like the
Clause 22 helpers in linux/mii.h. Reason is that the Clause 45 register
constants are defined in uapi/linux/mdio.h. And uapi/linux/mdio.h
includes linux/mii.h before defining the C45 register constants.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 include/linux/mdio.h | 63 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index bfa711416..ce0d5ddbf 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -261,6 +261,69 @@ static inline u16 ethtool_adv_to_mmd_eee_adv_t(u32 adv)
 	return reg;
 }
 
+/**
+ * linkmode_adv_to_mii_10gbt_adv_t
+ * @advertising: the linkmode advertisement settings
+ *
+ * A small helper function that translates linkmode advertisement
+ * settings to phy autonegotiation advertisements for the C45
+ * 10GBASE-T AN CONTROL (7.32) register.
+ */
+static inline u32 linkmode_adv_to_mii_10gbt_adv_t(unsigned long *advertising)
+{
+	u32 result = 0;
+
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+			      advertising))
+		result |= MDIO_AN_10GBT_CTRL_ADV2_5G;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+			      advertising))
+		result |= MDIO_AN_10GBT_CTRL_ADV5G;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+			      advertising))
+		result |= MDIO_AN_10GBT_CTRL_ADV10G;
+
+	return result;
+}
+
+/**
+ * mii_10gbt_stat_mod_linkmode_lpa_t
+ * @advertising: target the linkmode advertisement settings
+ * @adv: value of the C45 10GBASE-T AN STATUS register
+ *
+ * A small helper function that translates C45 10GBASE-T AN STATUS register bits
+ * to linkmode advertisement settings. Other bits in advertising aren't changed.
+ */
+static inline void mii_10gbt_stat_mod_linkmode_lpa_t(unsigned long *advertising,
+						     u32 lpa)
+{
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+			 advertising, lpa & MDIO_AN_10GBT_STAT_LP2_5G);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+			 advertising, lpa & MDIO_AN_10GBT_STAT_LP5G);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+			 advertising, lpa & MDIO_AN_10GBT_STAT_LP10G);
+}
+
+/**
+ * mii_10gbt_adv_mod_linkmode_adv_t
+ * @advertising:pointer to destination link mode.
+ * @adv: value of the C45 10GBASE-T AN CONTROL register
+ *
+ * A small helper function that translates the C45 10GBASE-T AN CONTROL
+ * register to linkmode advertisement settings. Leaves other bits unchanged.
+ */
+static inline void mii_10gbt_adv_mod_linkmode_adv_t(unsigned long *advertising,
+						    u32 adv)
+{
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+			 advertising, adv & MDIO_AN_10GBT_CTRL_ADV2_5G);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+			 advertising, adv & MDIO_AN_10GBT_CTRL_ADV5G);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+			 advertising, adv & MDIO_AN_10GBT_CTRL_ADV10G);
+}
+
 int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
 int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
 
-- 
2.20.1



^ permalink raw reply related

* [PATCH net-next 2/2] net: phy: use mii_10gbt_stat_mod_linkmode_lpa_t in genphy_c45_read_lpa
From: Heiner Kallweit @ 2019-02-15 20:58 UTC (permalink / raw)
  To: Andrew Lunn, Florian Fainelli, David Miller; +Cc: netdev@vger.kernel.org
In-Reply-To: <60b2e19d-e138-dfa0-fa79-19f3cbece99e@gmail.com>

Use mii_10gbt_stat_mod_linkmode_lpa_t() in genphy_c45_read_lpa() to
simplify the code.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/phy/phy-c45.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 7af5fa81d..bef126344 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -204,15 +204,7 @@ int genphy_c45_read_lpa(struct phy_device *phydev)
 	if (val < 0)
 		return val;
 
-	if (val & MDIO_AN_10GBT_STAT_LP2_5G)
-		linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
-				 phydev->lp_advertising);
-	if (val & MDIO_AN_10GBT_STAT_LP5G)
-		linkmode_set_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
-				 phydev->lp_advertising);
-	if (val & MDIO_AN_10GBT_STAT_LP10G)
-		linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
-				 phydev->lp_advertising);
+	mii_10gbt_stat_mod_linkmode_lpa_t(phydev->lp_advertising, val);
 
 	return 0;
 }
-- 
2.20.1



^ permalink raw reply related

* [PATCH] net: mv643xx_eth: disable clk on error path in mv643xx_eth_shared_probe()
From: Alexey Khoroshilov @ 2019-02-15 21:20 UTC (permalink / raw)
  To: Sebastian Hesselbarth, David S. Miller
  Cc: Alexey Khoroshilov, netdev, linux-kernel, ldv-project

If mv643xx_eth_shared_of_probe() fails, mv643xx_eth_shared_probe()
leaves clk undisabled.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
---
 drivers/net/ethernet/marvell/mv643xx_eth.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 2f427271a793..292a668ce88e 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2879,7 +2879,7 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev)
 
 	ret = mv643xx_eth_shared_of_probe(pdev);
 	if (ret)
-		return ret;
+		goto err_put_clk;
 	pd = dev_get_platdata(&pdev->dev);
 
 	msp->tx_csum_limit = (pd != NULL && pd->tx_csum_limit) ?
@@ -2887,6 +2887,11 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev)
 	infer_hw_params(msp);
 
 	return 0;
+
+err_put_clk:
+	if (!IS_ERR(msp->clk))
+		clk_disable_unprepare(msp->clk);
+	return ret;
 }
 
 static int mv643xx_eth_shared_remove(struct platform_device *pdev)
-- 
2.7.4


^ permalink raw reply related

* [PATCH net 0/2] tcp: fix possible crash in tcp_v4_err()
From: Eric Dumazet @ 2019-02-15 21:36 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, Eric Dumazet, Eric Dumazet, Neal Cardwell, Yuchung Cheng,
	soukjin bae

soukjin bae reported a crash in tcp_v4_err() that we
root caused to a missing initialization.

Second patch adds a sanity check in tcp_v4_err() to avoid
future potential problems. Ignoring an ICMP message
is probably better than crashing a machine.

Eric Dumazet (2):
  tcp: clear icsk_backoff in tcp_write_queue_purge()
  tcp: tcp_v4_err() should be more careful

 net/ipv4/tcp.c      | 2 +-
 net/ipv4/tcp_ipv4.c | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

-- 
2.21.0.rc0.258.g878e2cd30e-goog

^ permalink raw reply

* [PATCH net 1/2] tcp: clear icsk_backoff in tcp_write_queue_purge()
From: Eric Dumazet @ 2019-02-15 21:36 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, Eric Dumazet, Eric Dumazet, Neal Cardwell, Yuchung Cheng,
	soukjin bae
In-Reply-To: <20190215213621.183537-1-edumazet@google.com>

soukjin bae reported a crash in tcp_v4_err() handling
ICMP_DEST_UNREACH after tcp_write_queue_head(sk)
returned a NULL pointer.

Current logic should have prevented this :

  if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
      !icsk->icsk_backoff || fastopen)
      break;

Problem is the write queue might have been purged
and icsk_backoff has not been cleared.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: soukjin bae <soukjin.bae@samsung.com>
---
 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2079145a3b7c5f498af429c9a8289342e4421fca..cf3c5095c10e8e7e56621beae2f93c93de184489 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2528,6 +2528,7 @@ void tcp_write_queue_purge(struct sock *sk)
 	sk_mem_reclaim(sk);
 	tcp_clear_all_retrans_hints(tcp_sk(sk));
 	tcp_sk(sk)->packets_out = 0;
+	inet_csk(sk)->icsk_backoff = 0;
 }
 
 int tcp_disconnect(struct sock *sk, int flags)
@@ -2576,7 +2577,6 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->write_seq += tp->max_window + 2;
 	if (tp->write_seq == 0)
 		tp->write_seq = 1;
-	icsk->icsk_backoff = 0;
 	tp->snd_cwnd = 2;
 	icsk->icsk_probes_out = 0;
 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
-- 
2.21.0.rc0.258.g878e2cd30e-goog


^ permalink raw reply related

* [PATCH net 2/2] tcp: tcp_v4_err() should be more careful
From: Eric Dumazet @ 2019-02-15 21:36 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, Eric Dumazet, Eric Dumazet, Neal Cardwell, Yuchung Cheng,
	soukjin bae
In-Reply-To: <20190215213621.183537-1-edumazet@google.com>

ICMP handlers are not very often stressed, we should
make them more resilient to bugs that might surface in
the future.

If there is no packet in retransmit queue, we should
avoid a NULL deref.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: soukjin bae <soukjin.bae@samsung.com>
---
 net/ipv4/tcp_ipv4.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index efc6fef692ffdca4dcdd3f4b87a837656dd66c8c..ec3cea9d68288244d8e03b655d06f91640c36ee7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -536,12 +536,15 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		if (sock_owned_by_user(sk))
 			break;
 
+		skb = tcp_rtx_queue_head(sk);
+		if (WARN_ON_ONCE(!skb))
+			break;
+
 		icsk->icsk_backoff--;
 		icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
 					       TCP_TIMEOUT_INIT;
 		icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
 
-		skb = tcp_rtx_queue_head(sk);
 
 		tcp_mstamp_refresh(tp);
 		delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
-- 
2.21.0.rc0.258.g878e2cd30e-goog


^ permalink raw reply related

* Re: [PATCH net] sock: return uapi errno in sock_setsockopt() for SO_ZEROCOPY
From: Petr Vorel @ 2019-02-15 21:44 UTC (permalink / raw)
  To: Willem de Bruijn; +Cc: Alexey Kodanev, Network Development, David Miller
In-Reply-To: <CA+FuTSeRV3Qy_S3c4qayTe3K1FkaPF7cZnV7_tNVKnS3cEuxmA@mail.gmail.com>

Hi,

> On Fri, Feb 15, 2019 at 11:51 AM Alexey Kodanev
> <alexey.kodanev@oracle.com> wrote:

> > For unsupported protocols, setsockopt() with SO_ZEROCOPY
> > option sets errno to ENOTSUPP(524). But this number is
> > not defined anywhere in the include/uapi/ headers.

> > To make sure userspace sees the known number, replace
> > ENOTSUPP(524) with EOPNOTSUPP(95).

> > Fixes: 76851d1212c1 ("sock: add SOCK_ZEROCOPY sockopt")
> > Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
> > Reported-by: Petr Vorel <pvorel@suse.cz>

> This code has been there since 4.14. I think it's too late to change
> system call behavior.
BTW It reminds me commit 0fb44559ffd6 ("af_unix: move unix_mknod() out of
bindlock"), which while fixing a problem also for certain usage changed changed
from -EINVAL to -EADDRINUSE. Proposed fix to restore old behavior [1] was not
accepted, 0fb44559ffd6 was merged to some stable kernels so the behavior also
differs.
As there is no way to move these NFSv3 related definitions to uapi to make it
visible for userspace, I'd be for changing errno.

Kind regards,
Petr

[1] https://marc.info/?l=linux-kernel&m=149880810113888&w=2

^ permalink raw reply

* [PATCH v3 perf,bpf 06/11] perf, bpf: save bpf_prog_info information as headers to perf.data
From: Song Liu @ 2019-02-15 21:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: ast, daniel, kernel-team, peterz, acme, jolsa, namhyung, Song Liu
In-Reply-To: <20190215215354.3114006-1-songliubraving@fb.com>

This patch enables perf-record to save bpf_prog_info information as
headers to perf.data. A new header type HEADER_BPF_PROG_INFO is
introduced for this data.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 tools/perf/util/header.c | 136 ++++++++++++++++++++++++++++++++++++++-
 tools/perf/util/header.h |   1 +
 2 files changed, 136 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index dec6d218c31c..0889ad797940 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -18,6 +18,7 @@
 #include <sys/utsname.h>
 #include <linux/time64.h>
 #include <dirent.h>
+#include <bpf/libbpf.h>
 
 #include "evlist.h"
 #include "evsel.h"
@@ -39,6 +40,7 @@
 #include "tool.h"
 #include "time-utils.h"
 #include "units.h"
+#include "bpf-event.h"
 
 #include "sane_ctype.h"
 
@@ -1080,6 +1082,51 @@ static int write_clockid(struct feat_fd *ff,
 			sizeof(ff->ph->env.clockid_res_ns));
 }
 
+static int write_bpf_prog_info(struct feat_fd *ff,
+			       struct perf_evlist *evlist __maybe_unused)
+{
+	struct perf_env *env = &ff->ph->env;
+	struct rb_root *root;
+	struct rb_node *next;
+	u32 count = 0;
+	int ret;
+
+	down_read(&env->bpf_progs.bpf_info_lock);
+
+	root = &env->bpf_progs.bpf_prog_infos;
+	next = rb_first(root);
+	while (next) {
+		++count;
+		next = rb_next(next);
+	}
+
+	ret = do_write(ff, &count, sizeof(count));
+
+	if (ret < 0)
+		goto out;
+
+	next = rb_first(root);
+	while (next) {
+		struct bpf_prog_info_node *node;
+		size_t len;
+
+		node = rb_entry(next, struct bpf_prog_info_node, rb_node);
+		next = rb_next(&node->rb_node);
+		len = sizeof(struct bpf_prog_info_linear) +
+			node->info_linear->data_len;
+
+		/* before writing to file, translate address to offset */
+		bpf_program__bpil_addr_to_offs(node->info_linear);
+		ret = do_write(ff, node->info_linear, len);
+		bpf_program__bpil_offs_to_addr(node->info_linear);
+		if (ret < 0)
+			goto out;
+	}
+out:
+	up_read(&env->bpf_progs.bpf_info_lock);
+	return ret;
+}
+
 static int cpu_cache_level__sort(const void *a, const void *b)
 {
 	struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
@@ -1560,6 +1607,29 @@ static void print_clockid(struct feat_fd *ff, FILE *fp)
 		ff->ph->env.clockid_res_ns * 1000);
 }
 
+static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp)
+{
+	struct perf_env *env = &ff->ph->env;
+	struct rb_root *root;
+	struct rb_node *next;
+
+	down_read(&env->bpf_progs.bpf_info_lock);
+
+	root = &env->bpf_progs.bpf_prog_infos;
+	next = rb_first(root);
+
+	while (next) {
+		struct bpf_prog_info_node *node;
+
+		node = rb_entry(next, struct bpf_prog_info_node, rb_node);
+		next = rb_next(&node->rb_node);
+		fprintf(fp, "# bpf_prog_info of id %u\n",
+			node->info_linear->info.id);
+	}
+
+	up_read(&env->bpf_progs.bpf_info_lock);
+}
+
 static void free_event_desc(struct perf_evsel *events)
 {
 	struct perf_evsel *evsel;
@@ -2592,6 +2662,69 @@ static int process_clockid(struct feat_fd *ff,
 	return 0;
 }
 
+static int process_bpf_prog_info(struct feat_fd *ff,
+				 void *data __maybe_unused)
+{
+	struct bpf_prog_info_linear *info_linear;
+	struct bpf_prog_info_node *info_node;
+	struct perf_env *env = &ff->ph->env;
+	u32 count, i;
+	int err = -1;
+
+	if (do_read_u32(ff, &count))
+		return -1;
+
+	down_write(&env->bpf_progs.bpf_info_lock);
+
+	for (i = 0; i < count; ++i) {
+		u32 info_len, data_len;
+
+		info_linear = NULL;
+		info_node = NULL;
+		if (do_read_u32(ff, &info_len))
+			goto out;
+		if (do_read_u32(ff, &data_len))
+			goto out;
+
+		if (info_len > sizeof(struct bpf_prog_info)) {
+			pr_warning("detected invalid bpf_prog_info\n");
+			goto out;
+		}
+
+		info_linear = malloc(sizeof(struct bpf_prog_info_linear) +
+				     data_len);
+		if (!info_linear)
+			goto out;
+		info_linear->info_len = sizeof(struct bpf_prog_info);
+		info_linear->data_len = data_len;
+		if (do_read_u64(ff, (u64 *)(&info_linear->arrays)))
+			goto out;
+		if (__do_read(ff, &info_linear->info, info_len))
+			goto out;
+		if (info_len < sizeof(struct bpf_prog_info))
+			memset(((void *)(&info_linear->info)) + info_len, 0,
+			       sizeof(struct bpf_prog_info) - info_len);
+
+		if (__do_read(ff, info_linear->data, data_len))
+			goto out;
+		info_node = malloc(sizeof(struct bpf_prog_info_node));
+		if (!info_node)
+			goto out;
+
+		/* after reading from file, translate offset to address */
+		bpf_program__bpil_offs_to_addr(info_linear);
+		info_node->info_linear = info_linear;
+		perf_env__insert_bpf_prog_info(env, info_node);
+	}
+
+	return 0;
+out:
+	free(info_linear);
+	free(info_node);
+	up_write(&env->bpf_progs.bpf_info_lock);
+	return err;
+}
+
 struct feature_ops {
 	int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
 	void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2651,7 +2784,8 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPN(CACHE,		cache,		true),
 	FEAT_OPR(SAMPLE_TIME,	sample_time,	false),
 	FEAT_OPR(MEM_TOPOLOGY,	mem_topology,	true),
-	FEAT_OPR(CLOCKID,       clockid,        false)
+	FEAT_OPR(CLOCKID,       clockid,        false),
+	FEAT_OPR(BPF_PROG_INFO, bpf_prog_info,  false)
 };
 
 struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 0d553ddca0a3..0785c91b4c3a 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -39,6 +39,7 @@ enum {
 	HEADER_SAMPLE_TIME,
 	HEADER_MEM_TOPOLOGY,
 	HEADER_CLOCKID,
+	HEADER_BPF_PROG_INFO,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };
-- 
2.17.1


^ permalink raw reply related

* [PATCH v3 perf,bpf 00/11] perf annotation of BPF programs
From: Song Liu @ 2019-02-15 21:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: ast, daniel, kernel-team, peterz, acme, jolsa, namhyung, Song Liu

Changes v2 to v3:
1. Remove unnecessary include in header files;
2. Improved error handling;
3. Better naming of functions, variables, etc.;
4. Enable bpf events by default for perf-top.

Changes v1 to v2:
1. Fix compilation error with different feature-disassembler-four-args;
2. Fix a segfault in perf-record;
3. Split patches 5/9 and 6/9 so that perf_env changes and perf.data changes
   are in separate patches.

This series enables annotation of BPF programs in perf.

perf tool gathers information via sys_bpf and (optionally) stores them in
perf.data as headers.

Patch 1/11 fixes a minor issue in kernel;
Patch 2/11 to 4/11 introduce new helper functions and use them in perf and
     bpftool;
Patch 5/11 to 8/11 saves information of bpf program in perf_env;
Patch 9/11 adds --bpf-event options to perf-top;
Patch 10/11 enables annotation of bpf programs based on information
     gathered in 5/11 to 8/11;
Patch 11/11 handles information of short living BPF program that are loaded
     during perf-record or perf-top.

Commands tested during developments are perf-top, perf-record, perf-report,
and perf-annotate.

===================== Note on patch dependency  ========================
This set has dependency in both bpf-next tree and tip/perf/core. Current
version is developed on bpf-next tree with the following commits
cherry-picked from tip/perf/core:

(from 1/10 to 10/10)
commit 76193a94522f ("perf, bpf: Introduce PERF_RECORD_KSYMBOL")
commit d764ac646491 ("tools headers uapi: Sync tools/include/uapi/linux/perf_event.h")
commit 6ee52e2a3fe4 ("perf, bpf: Introduce PERF_RECORD_BPF_EVENT")
commit df063c83aa2c ("tools headers uapi: Sync tools/include/uapi/linux/perf_event.h")
commit 9aa0bfa370b2 ("perf tools: Handle PERF_RECORD_KSYMBOL")
commit 45178a928a4b ("perf tools: Handle PERF_RECORD_BPF_EVENT")
commit 7b612e291a5a ("perf tools: Synthesize PERF_RECORD_* for loaded BPF programs")
commit a40b95bcd30c ("perf top: Synthesize BPF events for pre-existing loaded BPF programs")
commit 6934058d9fb6 ("bpf: Add module name [bpf] to ksymbols for bpf programs")
commit 811184fb6977 ("perf bpf: Fix synthesized PERF_RECORD_KSYMBOL/BPF_EVENT")
========================================================================

This set is also available at:

https://github.com/liu-song-6/linux/tree/bpf-annotation

Thanks!!

Song Liu (11):
  perf, bpf: consider events with attr.bpf_event as side-band events
  bpf: libbpf: introduce bpf_program__get_prog_info_linear()
  bpf: bpftool: use bpf_program__get_prog_info_linear() in
    prog.c:do_dump()
  perf, bpf: synthesize bpf events with
    bpf_program__get_prog_info_linear()
  perf, bpf: save bpf_prog_info in a rbtree in perf_env
  perf, bpf: save bpf_prog_info information as headers to perf.data
  perf, bpf: save btf in a rbtree in perf_env
  perf, bpf: save btf information as headers to perf.data
  perf-top: add option --no-bpf-event
  perf, bpf: enable annotation of bpf program
  perf, bpf: save information about short living bpf programs

 kernel/events/core.c        |   3 +-
 tools/bpf/bpftool/prog.c    | 266 ++++++--------------------
 tools/lib/bpf/libbpf.c      | 251 +++++++++++++++++++++++++
 tools/lib/bpf/libbpf.h      |  63 +++++++
 tools/lib/bpf/libbpf.map    |   3 +
 tools/perf/Makefile.config  |   6 +-
 tools/perf/builtin-record.c |  15 +-
 tools/perf/builtin-top.c    |  18 +-
 tools/perf/util/annotate.c  | 149 ++++++++++++++-
 tools/perf/util/bpf-event.c | 364 ++++++++++++++++++++++++++++--------
 tools/perf/util/bpf-event.h |  44 ++++-
 tools/perf/util/dso.c       |   1 +
 tools/perf/util/dso.h       |  33 ++--
 tools/perf/util/env.c       | 150 +++++++++++++++
 tools/perf/util/env.h       |  21 +++
 tools/perf/util/evlist.c    |  20 ++
 tools/perf/util/evlist.h    |   2 +
 tools/perf/util/header.c    | 233 ++++++++++++++++++++++-
 tools/perf/util/header.h    |   2 +
 tools/perf/util/symbol.c    |   1 +
 tools/perf/util/top.h       |   1 +
 21 files changed, 1337 insertions(+), 309 deletions(-)

--
2.17.1

^ permalink raw reply

* [PATCH v3 perf,bpf 01/11] perf, bpf: consider events with attr.bpf_event as side-band events
From: Song Liu @ 2019-02-15 21:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: ast, daniel, kernel-team, peterz, acme, jolsa, namhyung, Song Liu
In-Reply-To: <20190215215354.3114006-1-songliubraving@fb.com>

Events with bpf_event should be considered as side-band event, as they
carry information about BPF programs.

Fixes: 6ee52e2a3fe4 ("perf, bpf: Introduce PERF_RECORD_BPF_EVENT")
Signed-off-by: Song Liu <songliubraving@fb.com>
---
 kernel/events/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0a8dab322111..9403bdda5f8c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4238,7 +4238,8 @@ static bool is_sb_event(struct perf_event *event)
 	if (attr->mmap || attr->mmap_data || attr->mmap2 ||
 	    attr->comm || attr->comm_exec ||
 	    attr->task || attr->ksymbol ||
-	    attr->context_switch)
+	    attr->context_switch ||
+	    attr->bpf_event)
 		return true;
 	return false;
 }
-- 
2.17.1


^ permalink raw reply related

* [PATCH v3 perf,bpf 04/11] perf, bpf: synthesize bpf events with bpf_program__get_prog_info_linear()
From: Song Liu @ 2019-02-15 21:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: ast, daniel, kernel-team, peterz, acme, jolsa, namhyung, Song Liu
In-Reply-To: <20190215215354.3114006-1-songliubraving@fb.com>

With bpf_program__get_prog_info_linear, we can simplify the logic that
synthesizes bpf events.

This patch doesn't change the behavior of the code.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 tools/perf/util/bpf-event.c | 118 ++++++++++++------------------------
 1 file changed, 40 insertions(+), 78 deletions(-)

diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 796ef793f4ce..e6dfb95029e5 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -3,7 +3,9 @@
 #include <stdlib.h>
 #include <bpf/bpf.h>
 #include <bpf/btf.h>
+#include <bpf/libbpf.h>
 #include <linux/btf.h>
+#include <linux/err.h>
 #include "bpf-event.h"
 #include "debug.h"
 #include "symbol.h"
@@ -49,99 +51,62 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
 {
 	struct ksymbol_event *ksymbol_event = &event->ksymbol_event;
 	struct bpf_event *bpf_event = &event->bpf_event;
-	u32 sub_prog_cnt, i, func_info_rec_size = 0;
-	u8 (*prog_tags)[BPF_TAG_SIZE] = NULL;
-	struct bpf_prog_info info = { .type = 0, };
-	u32 info_len = sizeof(info);
-	void *func_infos = NULL;
-	u64 *prog_addrs = NULL;
+	struct bpf_prog_info_linear *info_linear;
+	struct bpf_prog_info *info;
 	struct btf *btf = NULL;
-	u32 *prog_lens = NULL;
 	bool has_btf = false;
-	char errbuf[512];
+	u32 sub_prog_cnt, i;
 	int err = 0;
+	u64 arrays;
 
-	/* Call bpf_obj_get_info_by_fd() to get sizes of arrays */
-	err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+	arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS;
+	arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
+	arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
+	arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS;
 
-	if (err) {
-		pr_debug("%s: failed to get BPF program info: %s, aborting\n",
-			 __func__, str_error_r(errno, errbuf, sizeof(errbuf)));
+	info_linear = bpf_program__get_prog_info_linear(fd, arrays);
+	if (IS_ERR_OR_NULL(info_linear)) {
+		info_linear = NULL;
+		pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
 		return -1;
 	}
-	if (info_len < offsetof(struct bpf_prog_info, prog_tags)) {
+
+	if (info_linear->info_len < offsetof(struct bpf_prog_info, prog_tags)) {
 		pr_debug("%s: the kernel is too old, aborting\n", __func__);
 		return -2;
 	}
 
+	info = &info_linear->info;
+
 	/* number of ksyms, func_lengths, and tags should match */
-	sub_prog_cnt = info.nr_jited_ksyms;
-	if (sub_prog_cnt != info.nr_prog_tags ||
-	    sub_prog_cnt != info.nr_jited_func_lens)
+	sub_prog_cnt = info->nr_jited_ksyms;
+	if (sub_prog_cnt != info->nr_prog_tags ||
+	    sub_prog_cnt != info->nr_jited_func_lens)
 		return -1;
 
 	/* check BTF func info support */
-	if (info.btf_id && info.nr_func_info && info.func_info_rec_size) {
+	if (info->btf_id && info->nr_func_info && info->func_info_rec_size) {
 		/* btf func info number should be same as sub_prog_cnt */
-		if (sub_prog_cnt != info.nr_func_info) {
+		if (sub_prog_cnt != info->nr_func_info) {
 			pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__);
-			return -1;
-		}
-		if (btf__get_from_id(info.btf_id, &btf)) {
-			pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info.btf_id);
-			return -1;
+			err = -1;
+			goto out;
 		}
-		func_info_rec_size = info.func_info_rec_size;
-		func_infos = calloc(sub_prog_cnt, func_info_rec_size);
-		if (!func_infos) {
-			pr_debug("%s: failed to allocate memory for func_infos, aborting\n", __func__);
-			return -1;
+		if (btf__get_from_id(info->btf_id, &btf)) {
+			pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id);
+			err = -1;
+			btf = NULL;
+			goto out;
 		}
 		has_btf = true;
 	}
 
-	/*
-	 * We need address, length, and tag for each sub program.
-	 * Allocate memory and call bpf_obj_get_info_by_fd() again
-	 */
-	prog_addrs = calloc(sub_prog_cnt, sizeof(u64));
-	if (!prog_addrs) {
-		pr_debug("%s: failed to allocate memory for prog_addrs, aborting\n", __func__);
-		goto out;
-	}
-	prog_lens = calloc(sub_prog_cnt, sizeof(u32));
-	if (!prog_lens) {
-		pr_debug("%s: failed to allocate memory for prog_lens, aborting\n", __func__);
-		goto out;
-	}
-	prog_tags = calloc(sub_prog_cnt, BPF_TAG_SIZE);
-	if (!prog_tags) {
-		pr_debug("%s: failed to allocate memory for prog_tags, aborting\n", __func__);
-		goto out;
-	}
-
-	memset(&info, 0, sizeof(info));
-	info.nr_jited_ksyms = sub_prog_cnt;
-	info.nr_jited_func_lens = sub_prog_cnt;
-	info.nr_prog_tags = sub_prog_cnt;
-	info.jited_ksyms = ptr_to_u64(prog_addrs);
-	info.jited_func_lens = ptr_to_u64(prog_lens);
-	info.prog_tags = ptr_to_u64(prog_tags);
-	info_len = sizeof(info);
-	if (has_btf) {
-		info.nr_func_info = sub_prog_cnt;
-		info.func_info_rec_size = func_info_rec_size;
-		info.func_info = ptr_to_u64(func_infos);
-	}
-
-	err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
-	if (err) {
-		pr_debug("%s: failed to get BPF program info, aborting\n", __func__);
-		goto out;
-	}
-
 	/* Synthesize PERF_RECORD_KSYMBOL */
 	for (i = 0; i < sub_prog_cnt; i++) {
+		u8 (*prog_tags)[BPF_TAG_SIZE] = (void *)(info->prog_tags);
+		__u32 *prog_lens = (__u32 *)(info->jited_func_lens);
+		__u64 *prog_addrs = (__u64 *)(info->jited_ksyms);
+		void *func_infos = (void *)(info->func_info);
 		const struct bpf_func_info *finfo;
 		const char *short_name = NULL;
 		const struct btf_type *t;
@@ -163,13 +128,13 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
 					 KSYM_NAME_LEN - name_len,
 					 prog_tags[i], BPF_TAG_SIZE);
 		if (has_btf) {
-			finfo = func_infos + i * info.func_info_rec_size;
+			finfo = func_infos + i * info->func_info_rec_size;
 			t = btf__type_by_id(btf, finfo->type_id);
 			short_name = btf__name_by_offset(btf, t->name_off);
 		} else if (i == 0 && sub_prog_cnt == 1) {
 			/* no subprog */
-			if (info.name[0])
-				short_name = info.name;
+			if (info->name[0])
+				short_name = info->name;
 		} else
 			short_name = "F";
 		if (short_name)
@@ -195,9 +160,9 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
 			},
 			.type = PERF_BPF_EVENT_PROG_LOAD,
 			.flags = 0,
-			.id = info.id,
+			.id = info->id,
 		};
-		memcpy(bpf_event->tag, prog_tags[i], BPF_TAG_SIZE);
+		memcpy(bpf_event->tag, info->tag, BPF_TAG_SIZE);
 		memset((void *)event + event->header.size, 0, machine->id_hdr_size);
 		event->header.size += machine->id_hdr_size;
 		err = perf_tool__process_synth_event(tool, event,
@@ -205,10 +170,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
 	}
 
 out:
-	free(prog_tags);
-	free(prog_lens);
-	free(prog_addrs);
-	free(func_infos);
+	free(info_linear);
 	free(btf);
 	return err ? -1 : 0;
 }
-- 
2.17.1


^ permalink raw reply related

* [PATCH v3 perf,bpf 05/11] perf, bpf: save bpf_prog_info in a rbtree in perf_env
From: Song Liu @ 2019-02-15 21:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: ast, daniel, kernel-team, peterz, acme, jolsa, namhyung, Song Liu
In-Reply-To: <20190215215354.3114006-1-songliubraving@fb.com>

bpf_prog_info contains information necessary to annotate bpf programs.
This patch saves bpf_prog_info for bpf programs loaded in the system.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 tools/perf/builtin-record.c |  2 +-
 tools/perf/builtin-top.c    |  2 +-
 tools/perf/util/bpf-event.c | 35 +++++++++++++--
 tools/perf/util/bpf-event.h | 11 +++--
 tools/perf/util/env.c       | 85 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/env.h       | 17 ++++++++
 6 files changed, 143 insertions(+), 9 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 88ea11d57c6f..2355e0a9eda0 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1083,7 +1083,7 @@ static int record__synthesize(struct record *rec, bool tail)
 		return err;
 	}
 
-	err = perf_event__synthesize_bpf_events(tool, process_synthesized_event,
+	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
 						machine, opts);
 	if (err < 0)
 		pr_warning("Couldn't synthesize bpf events.\n");
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5a486d4de56e..27d8d42e0a4d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1216,7 +1216,7 @@ static int __cmd_top(struct perf_top *top)
 
 	init_process_thread(top);
 
-	ret = perf_event__synthesize_bpf_events(&top->tool, perf_event__process,
+	ret = perf_event__synthesize_bpf_events(top->session, perf_event__process,
 						&top->session->machines.host,
 						&top->record_opts);
 	if (ret < 0)
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index e6dfb95029e5..13313a3e6c4d 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -10,6 +10,8 @@
 #include "debug.h"
 #include "symbol.h"
 #include "machine.h"
+#include "env.h"
+#include "session.h"
 
 #define ptr_to_u64(ptr)    ((__u64)(unsigned long)(ptr))
 
@@ -42,7 +44,7 @@ int machine__process_bpf_event(struct machine *machine __maybe_unused,
  *   -1 for failures;
  *   -2 for lack of kernel support.
  */
-static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
+static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
 					       perf_event__handler_t process,
 					       struct machine *machine,
 					       int fd,
@@ -52,17 +54,29 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
 	struct ksymbol_event *ksymbol_event = &event->ksymbol_event;
 	struct bpf_event *bpf_event = &event->bpf_event;
 	struct bpf_prog_info_linear *info_linear;
+	struct perf_tool *tool = session->tool;
+	struct bpf_prog_info_node *info_node;
 	struct bpf_prog_info *info;
 	struct btf *btf = NULL;
 	bool has_btf = false;
+	struct perf_env *env;
 	u32 sub_prog_cnt, i;
 	int err = 0;
 	u64 arrays;
 
+	/*
+	 * for perf-record and perf-report use header.env;
+	 * otherwise, use global perf_env.
+	 */
+	env = session->data ? &session->header.env : &perf_env;
+
 	arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS;
 	arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
 	arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
 	arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS;
+	arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS;
+	arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
+	arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
 
 	info_linear = bpf_program__get_prog_info_linear(fd, arrays);
 	if (IS_ERR_OR_NULL(info_linear)) {
@@ -151,8 +165,8 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
 						     machine, process);
 	}
 
-	/* Synthesize PERF_RECORD_BPF_EVENT */
 	if (opts->bpf_event) {
+		/* Synthesize PERF_RECORD_BPF_EVENT */
 		*bpf_event = (struct bpf_event){
 			.header = {
 				.type = PERF_RECORD_BPF_EVENT,
@@ -165,6 +179,19 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
 		memcpy(bpf_event->tag, info->tag, BPF_TAG_SIZE);
 		memset((void *)event + event->header.size, 0, machine->id_hdr_size);
 		event->header.size += machine->id_hdr_size;
+
+		/* save bpf_prog_info to env */
+		info_node = malloc(sizeof(struct bpf_prog_info_node));
+		if (info_node) {
+			info_node->info_linear = info_linear;
+			perf_env__insert_bpf_prog_info(env, info_node);
+			info_linear = NULL;
+		}
+
+		/*
+		 * process after saving bpf_prog_info to env, so that
+		 * required information is ready for look up
+		 */
 		err = perf_tool__process_synth_event(tool, event,
 						     machine, process);
 	}
@@ -175,7 +202,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
 	return err ? -1 : 0;
 }
 
-int perf_event__synthesize_bpf_events(struct perf_tool *tool,
+int perf_event__synthesize_bpf_events(struct perf_session *session,
 				      perf_event__handler_t process,
 				      struct machine *machine,
 				      struct record_opts *opts)
@@ -209,7 +236,7 @@ int perf_event__synthesize_bpf_events(struct perf_tool *tool,
 			continue;
 		}
 
-		err = perf_event__synthesize_one_bpf_prog(tool, process,
+		err = perf_event__synthesize_one_bpf_prog(session, process,
 							  machine, fd,
 							  event, opts);
 		close(fd);
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index 7890067e1a37..fad932f7404f 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -3,19 +3,24 @@
 #define __PERF_BPF_EVENT_H
 
 #include <linux/compiler.h>
+#include <linux/rbtree.h>
 #include "event.h"
 
 struct machine;
 union perf_event;
 struct perf_sample;
-struct perf_tool;
 struct record_opts;
 
+struct bpf_prog_info_node {
+	struct bpf_prog_info_linear	*info_linear;
+	struct rb_node			rb_node;
+};
+
 #ifdef HAVE_LIBBPF_SUPPORT
 int machine__process_bpf_event(struct machine *machine, union perf_event *event,
 			       struct perf_sample *sample);
 
-int perf_event__synthesize_bpf_events(struct perf_tool *tool,
+int perf_event__synthesize_bpf_events(struct perf_session *session,
 				      perf_event__handler_t process,
 				      struct machine *machine,
 				      struct record_opts *opts);
@@ -27,7 +32,7 @@ static inline int machine__process_bpf_event(struct machine *machine __maybe_unu
 	return 0;
 }
 
-static inline int perf_event__synthesize_bpf_events(struct perf_tool *tool __maybe_unused,
+static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused,
 						    perf_event__handler_t process __maybe_unused,
 						    struct machine *machine __maybe_unused,
 						    struct record_opts *opts __maybe_unused)
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 4c23779e271a..026ca2182d50 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -3,15 +3,93 @@
 #include "env.h"
 #include "sane_ctype.h"
 #include "util.h"
+#include "bpf-event.h"
 #include <errno.h>
 #include <sys/utsname.h>
+#include <bpf/libbpf.h>
 
 struct perf_env perf_env;
 
+void perf_env__insert_bpf_prog_info(struct perf_env *env,
+				    struct bpf_prog_info_node *info_node)
+{
+	__u32 prog_id = info_node->info_linear->info.id;
+	struct bpf_prog_info_node *node;
+	struct rb_node *parent = NULL;
+	struct rb_node **p;
+
+	down_write(&env->bpf_progs.bpf_info_lock);
+	p = &env->bpf_progs.bpf_prog_infos.rb_node;
+
+	while (*p != NULL) {
+		parent = *p;
+		node = rb_entry(parent, struct bpf_prog_info_node, rb_node);
+		if (prog_id < node->info_linear->info.id) {
+			p = &(*p)->rb_left;
+		} else if (prog_id > node->info_linear->info.id) {
+			p = &(*p)->rb_right;
+		} else {
+			pr_debug("duplicated bpf prog info %u\n", prog_id);
+			goto out;
+		}
+	}
+
+	rb_link_node(&info_node->rb_node, parent, p);
+	rb_insert_color(&info_node->rb_node, &env->bpf_progs.bpf_prog_infos);
+out:
+	up_write(&env->bpf_progs.bpf_info_lock);
+}
+
+struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
+							__u32 prog_id)
+{
+	struct bpf_prog_info_node *node = NULL;
+	struct rb_node *n;
+
+	down_read(&env->bpf_progs.bpf_info_lock);
+	n = env->bpf_progs.bpf_prog_infos.rb_node;
+
+	while (n) {
+		node = rb_entry(n, struct bpf_prog_info_node, rb_node);
+		if (prog_id < node->info_linear->info.id)
+			n = n->rb_left;
+		else if (prog_id > node->info_linear->info.id)
+			n = n->rb_right;
+		else
+			break;
+	}
+
+	up_read(&env->bpf_progs.bpf_info_lock);
+	return node;
+}
+
+/* purge data in bpf_prog_infos tree */
+static void perf_env__purge_bpf(struct perf_env *env)
+{
+	struct rb_root *root;
+	struct rb_node *next;
+
+	down_write(&env->bpf_progs.bpf_info_lock);
+
+	root = &env->bpf_progs.bpf_prog_infos;
+	next = rb_first(root);
+
+	while (next) {
+		struct bpf_prog_info_node *node;
+
+		node = rb_entry(next, struct bpf_prog_info_node, rb_node);
+		next = rb_next(&node->rb_node);
+		rb_erase_init(&node->rb_node, root);
+		free(node);
+	}
+	up_write(&env->bpf_progs.bpf_info_lock);
+}
+
 void perf_env__exit(struct perf_env *env)
 {
 	int i;
 
+	perf_env__purge_bpf(env);
 	zfree(&env->hostname);
 	zfree(&env->os_release);
 	zfree(&env->version);
@@ -38,6 +116,12 @@ void perf_env__exit(struct perf_env *env)
 	zfree(&env->memory_nodes);
 }
 
+static void init_bpf_rb_trees(struct perf_env *env)
+{
+	env->bpf_progs.bpf_prog_infos = RB_ROOT;
+	init_rwsem(&env->bpf_progs.bpf_info_lock);
+}
+
 int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
 {
 	int i;
@@ -59,6 +143,7 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
 
 	env->nr_cmdline = argc;
 
+	init_bpf_rb_trees(env);
 	return 0;
 out_free:
 	zfree(&env->cmdline_argv);
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index d01b8355f4ca..d0c53fe6d431 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -3,7 +3,9 @@
 #define __PERF_ENV_H
 
 #include <linux/types.h>
+#include <linux/rbtree.h>
 #include "cpumap.h"
+#include "rwsem.h"
 
 struct cpu_topology_map {
 	int	socket_id;
@@ -64,8 +66,19 @@ struct perf_env {
 	struct memory_node	*memory_nodes;
 	unsigned long long	 memory_bsize;
 	u64                     clockid_res_ns;
+
+	/*
+	 * bpf_info_lock protects bpf rbtrees. This is needed because the
+	 * trees are accessed by different threads in perf-top
+	 */
+	struct {
+		struct rw_semaphore	bpf_info_lock;
+		struct rb_root		bpf_prog_infos;
+	} bpf_progs;
 };
 
+struct bpf_prog_info_node;
+
 extern struct perf_env perf_env;
 
 void perf_env__exit(struct perf_env *env);
@@ -80,4 +93,8 @@ const char *perf_env__arch(struct perf_env *env);
 const char *perf_env__raw_arch(struct perf_env *env);
 int perf_env__nr_cpus_avail(struct perf_env *env);
 
+void perf_env__insert_bpf_prog_info(struct perf_env *env,
+				    struct bpf_prog_info_node *info_node);
+struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
+							__u32 prog_id);
 #endif /* __PERF_ENV_H */
-- 
2.17.1


^ permalink raw reply related

* [PATCH v3 perf,bpf 10/11] perf, bpf: enable annotation of bpf program
From: Song Liu @ 2019-02-15 21:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: ast, daniel, kernel-team, peterz, acme, jolsa, namhyung, Song Liu
In-Reply-To: <20190215215354.3114006-1-songliubraving@fb.com>

This patch enables the annotation of bpf program.

A new dso type DSO_BINARY_TYPE__BPF_PROG_INFO is introduced to for BPF
programs. In symbol__disassemble(), DSO_BINARY_TYPE__BPF_PROG_INFO dso
calls into a new function symbol__disassemble_bpf(), where annotation
line information is filled based bpf_prog_info and btf saved in given
perf_env.

symbol__disassemble_bpf() uses libbfd to disassemble bpf programs.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 tools/perf/Makefile.config  |   6 +-
 tools/perf/util/annotate.c  | 149 +++++++++++++++++++++++++++++++++++-
 tools/perf/util/bpf-event.c |  48 ++++++++++++
 tools/perf/util/bpf-event.h |   4 +
 tools/perf/util/dso.c       |   1 +
 tools/perf/util/dso.h       |  33 +++++---
 tools/perf/util/symbol.c    |   1 +
 7 files changed, 229 insertions(+), 13 deletions(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index b441c88cafa1..ab223239f1fb 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -701,7 +701,7 @@ else
 endif
 
 ifeq ($(feature-libbfd), 1)
-  EXTLIBS += -lbfd
+  EXTLIBS += -lbfd -lopcodes
 else
   # we are on a system that requires -liberty and (maybe) -lz
   # to link against -lbfd; test each case individually here
@@ -796,6 +796,10 @@ ifdef HAVE_KVM_STAT_SUPPORT
     CFLAGS += -DHAVE_KVM_STAT_SUPPORT
 endif
 
+ifeq ($(feature-disassembler-four-args), 1)
+    CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+endif
+
 ifeq (${IS_64_BIT}, 1)
   ifndef NO_PERF_READ_VDSO32
     $(call feature_check,compile-32)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 70de8f6b3aee..078017d31ca9 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -22,6 +22,7 @@
 #include "annotate.h"
 #include "evsel.h"
 #include "evlist.h"
+#include "bpf-event.h"
 #include "block-range.h"
 #include "string2.h"
 #include "arch/common.h"
@@ -29,6 +30,9 @@
 #include <pthread.h>
 #include <linux/bitops.h>
 #include <linux/kernel.h>
+#include <bfd.h>
+#include <dis-asm.h>
+#include <bpf/libbpf.h>
 
 /* FIXME: For the HE_COLORSET */
 #include "ui/browser.h"
@@ -1672,6 +1676,147 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
 	return 0;
 }
 
+static void get_exec_path(char *tpath, size_t size)
+{
+	const char *path = "/proc/self/exe";
+	ssize_t len;
+
+	len = readlink(path, tpath, size - 1);
+	assert(len > 0);
+	tpath[len] = 0;
+}
+
+static int symbol__disassemble_bpf(struct symbol *sym,
+				   struct annotate_args *args)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct annotation_options *opts = args->options;
+	struct bpf_prog_info_linear *info_linear;
+	struct bpf_prog_linfo *prog_linfo = NULL;
+	struct bpf_prog_info_node *info_node;
+	int len = sym->end - sym->start;
+	disassembler_ftype disassemble;
+	struct map *map = args->ms.map;
+	struct disassemble_info info;
+	struct dso *dso = map->dso;
+	int pc = 0, count, sub_id;
+	struct btf *btf = NULL;
+	char tpath[PATH_MAX];
+	size_t buf_size;
+	int nr_skip = 0;
+	__u64 arrays;
+	char *buf;
+	bfd *bfdf;
+	FILE *s;
+
+	if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO)
+		return -1;
+
+	pr_debug("%s: handling sym %s addr %lx len %lx\n", __func__,
+		 sym->name, sym->start, sym->end - sym->start);
+
+	memset(tpath, 0, sizeof(tpath));
+	get_exec_path(tpath, sizeof(tpath));
+
+	bfdf = bfd_openr(tpath, NULL);
+	assert(bfdf);
+	assert(bfd_check_format(bfdf, bfd_object));
+
+	s = open_memstream(&buf, &buf_size);
+	init_disassemble_info(&info, s,
+			      (fprintf_ftype) fprintf);
+
+	info.arch = bfd_get_arch(bfdf);
+	info.mach = bfd_get_mach(bfdf);
+
+	arrays = 1UL << BPF_PROG_INFO_JITED_INSNS;
+	arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS;
+	arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
+	arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
+
+	info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env,
+						 dso->bpf_prog.id);
+	if (!info_node)
+		return -1;
+	info_linear = info_node->info_linear;
+	sub_id = dso->bpf_prog.sub_id;
+
+	info.buffer = (void *)(info_linear->info.jited_prog_insns);
+	info.buffer_length = info_linear->info.jited_prog_len;
+
+	if (info_linear->info.nr_line_info)
+		prog_linfo = bpf_prog_linfo__new(&info_linear->info);
+	prog_linfo = prog_linfo;
+
+	if (info_linear->info.btf_id) {
+		struct btf_node *node;
+
+		node = perf_env__find_btf(dso->bpf_prog.env,
+					  info_linear->info.btf_id);
+		if (node)
+			btf = btf__new((__u8 *)(node->data),
+				       node->data_size);
+	}
+
+	disassemble_init_for_target(&info);
+
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+	disassemble = disassembler(info.arch,
+				   bfd_big_endian(bfdf),
+				   info.mach,
+				   bfdf);
+#else
+	disassemble = disassembler(bfdf);
+#endif
+	assert(disassemble);
+
+	fflush(s);
+	do {
+		const struct bpf_line_info *linfo = NULL;
+		struct disasm_line *dl;
+		size_t prev_buf_size;
+		const char *srcline;
+		u64 addr;
+
+		addr = pc + ((u64 *)(info_linear->info.jited_ksyms))[sub_id];
+		count = disassemble(pc, &info);
+
+		linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo, addr, sub_id,
+							nr_skip);
+
+		if (linfo) {
+			srcline = btf__name_by_offset(btf, linfo->line_off);
+			nr_skip++;
+		} else
+			srcline = NULL;
+
+		fprintf(s, "\n");
+		prev_buf_size = buf_size;
+		fflush(s);
+
+		if (!opts->hide_src_code && srcline) {
+			args->offset = -1;
+			args->line = strdup(srcline);
+			args->line_nr = 0;
+			args->ms.sym  = sym;
+			dl = disasm_line__new(args);
+			annotation_line__add(&dl->al, &notes->src->source);
+		}
+
+		args->offset = pc;
+		args->line = buf + prev_buf_size;
+		args->line_nr = 0;
+		args->ms.sym  = sym;
+		dl = disasm_line__new(args);
+		annotation_line__add(&dl->al, &notes->src->source);
+
+		pc += count;
+	} while (count > 0 && pc < len);
+
+	bfd_close(bfdf);
+	return 0;
+}
+
 static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 {
 	struct annotation_options *opts = args->options;
@@ -1699,7 +1844,9 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 	pr_debug("annotating [%p] %30s : [%p] %30s\n",
 		 dso, dso->long_name, sym, sym->name);
 
-	if (dso__is_kcore(dso)) {
+	if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) {
+		return symbol__disassemble_bpf(sym, args);
+	} else if (dso__is_kcore(dso)) {
 		kce.kcore_filename = symfs_filename;
 		kce.addr = map__rip_2objdump(map, sym->start);
 		kce.offs = sym->start;
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 52507435c464..6c363adc781a 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -25,12 +25,60 @@ static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
 	return ret;
 }
 
+static int machine__process_bpf_event_load(struct machine *machine __maybe_unused,
+					   union perf_event *event,
+					   struct perf_sample *sample __maybe_unused)
+{
+	struct bpf_prog_info_linear *info_linear;
+	struct bpf_prog_info_node *info_node;
+	struct perf_env *env = machine->env;
+	int id = event->bpf_event.id;
+	unsigned int i;
+
+	/* perf-record, no need to handle bpf-event */
+	if (env == NULL)
+		return 0;
+
+	info_node = perf_env__find_bpf_prog_info(env, id);
+	if (!info_node)
+		return 0;
+	info_linear = info_node->info_linear;
+
+	for (i = 0; i < info_linear->info.nr_jited_ksyms; i++) {
+		u64 *addrs = (u64 *)(info_linear->info.jited_ksyms);
+		u64 addr = addrs[i];
+		struct map *map;
+
+		map = map_groups__find(&machine->kmaps, addr);
+
+		if (map) {
+			map->dso->binary_type = DSO_BINARY_TYPE__BPF_PROG_INFO;
+			map->dso->bpf_prog.id = id;
+			map->dso->bpf_prog.sub_id = i;
+			map->dso->bpf_prog.env = env;
+		}
+	}
+	return 0;
+}
+
 int machine__process_bpf_event(struct machine *machine __maybe_unused,
 			       union perf_event *event,
 			       struct perf_sample *sample __maybe_unused)
 {
 	if (dump_trace)
 		perf_event__fprintf_bpf_event(event, stdout);
+
+	switch (event->bpf_event.type) {
+	case PERF_BPF_EVENT_PROG_LOAD:
+		return machine__process_bpf_event_load(machine, event, sample);
+
+	case PERF_BPF_EVENT_PROG_UNLOAD:
+		break;
+	default:
+		pr_debug("unexpected bpf_event type of %d\n",
+			 event->bpf_event.type);
+		break;
+	}
 	return 0;
 }
 
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index b9ec394dc7c7..ab4a975b7e05 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -2,6 +2,10 @@
 #ifndef __PERF_BPF_EVENT_H
 #define __PERF_BPF_EVENT_H
 
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
 #include <linux/compiler.h>
 #include <linux/rbtree.h>
 #include "event.h"
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 62c8cf622607..1798192bf0f9 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -181,6 +181,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
 	case DSO_BINARY_TYPE__KALLSYMS:
 	case DSO_BINARY_TYPE__GUEST_KALLSYMS:
 	case DSO_BINARY_TYPE__JAVA_JIT:
+	case DSO_BINARY_TYPE__BPF_PROG_INFO:
 	case DSO_BINARY_TYPE__NOT_FOUND:
 		ret = -1;
 		break;
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 8c8a7abe809d..f20d319463f1 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -14,6 +14,8 @@
 #include "namespaces.h"
 #include "build-id.h"
 
+struct perf_env;
+
 enum dso_binary_type {
 	DSO_BINARY_TYPE__KALLSYMS = 0,
 	DSO_BINARY_TYPE__GUEST_KALLSYMS,
@@ -34,6 +36,7 @@ enum dso_binary_type {
 	DSO_BINARY_TYPE__KCORE,
 	DSO_BINARY_TYPE__GUEST_KCORE,
 	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+	DSO_BINARY_TYPE__BPF_PROG_INFO,
 	DSO_BINARY_TYPE__NOT_FOUND,
 };
 
@@ -177,17 +180,25 @@ struct dso {
 	struct auxtrace_cache *auxtrace_cache;
 	int		 comp;
 
-	/* dso data file */
-	struct {
-		struct rb_root	 cache;
-		int		 fd;
-		int		 status;
-		u32		 status_seen;
-		size_t		 file_size;
-		struct list_head open_entry;
-		u64		 debug_frame_offset;
-		u64		 eh_frame_hdr_offset;
-	} data;
+	union {
+		/* dso data file */
+		struct {
+			struct rb_root	 cache;
+			int		 fd;
+			int		 status;
+			u32		 status_seen;
+			size_t		 file_size;
+			struct list_head open_entry;
+			u64		 debug_frame_offset;
+			u64		 eh_frame_hdr_offset;
+		} data;
+		/* bpf prog information */
+		struct {
+			u32		id;
+			u32		sub_id;
+			struct perf_env	*env;
+		} bpf_prog;
+	};
 
 	union { /* Tool specific area */
 		void	 *priv;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 48efad6d0f90..33ae59e89da2 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1441,6 +1441,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
 	case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
 		return true;
 
+	case DSO_BINARY_TYPE__BPF_PROG_INFO:
 	case DSO_BINARY_TYPE__NOT_FOUND:
 	default:
 		return false;
-- 
2.17.1


^ permalink raw reply related

* [PATCH v3 perf,bpf 08/11] perf, bpf: save btf information as headers to perf.data
From: Song Liu @ 2019-02-15 21:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: ast, daniel, kernel-team, peterz, acme, jolsa, namhyung, Song Liu
In-Reply-To: <20190215215354.3114006-1-songliubraving@fb.com>

This patch enables perf-record to save btf information as headers to
perf.data A new header type HEADER_BTF is introduced for this data.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 tools/perf/util/header.c | 99 +++++++++++++++++++++++++++++++++++++++-
 tools/perf/util/header.h |  1 +
 2 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 0889ad797940..2de4f4e9b590 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1127,6 +1127,45 @@ static int write_bpf_prog_info(struct feat_fd *ff,
 	return ret;
 }
 
+static int write_btf(struct feat_fd *ff,
+		     struct perf_evlist *evlist __maybe_unused)
+{
+	struct perf_env *env = &ff->ph->env;
+	struct rb_root *root;
+	struct rb_node *next;
+	u32 count = 0;
+	int ret;
+
+	down_read(&env->bpf_progs.bpf_info_lock);
+
+	root = &env->bpf_progs.btfs;
+	next = rb_first(root);
+	while (next) {
+		++count;
+		next = rb_next(next);
+	}
+
+	ret = do_write(ff, &count, sizeof(count));
+
+	if (ret < 0)
+		goto out;
+
+	next = rb_first(root);
+	while (next) {
+		struct btf_node *node;
+
+		node = rb_entry(next, struct btf_node, rb_node);
+		next = rb_next(&node->rb_node);
+		ret = do_write(ff, node,
+			       sizeof(struct btf_node) + node->data_size);
+		if (ret < 0)
+			goto out;
+	}
+out:
+	up_read(&env->bpf_progs.bpf_info_lock);
+	return ret;
+}
+
 static int cpu_cache_level__sort(const void *a, const void *b)
 {
 	struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
@@ -1630,6 +1669,28 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp)
 	up_read(&env->bpf_progs.bpf_info_lock);
 }
 
+static void print_btf(struct feat_fd *ff, FILE *fp)
+{
+	struct perf_env *env = &ff->ph->env;
+	struct rb_root *root;
+	struct rb_node *next;
+
+	down_read(&env->bpf_progs.bpf_info_lock);
+
+	root = &env->bpf_progs.btfs;
+	next = rb_first(root);
+
+	while (next) {
+		struct btf_node *node;
+
+		node = rb_entry(next, struct btf_node, rb_node);
+		next = rb_next(&node->rb_node);
+		fprintf(fp, "# bpf_prog_info of id %u\n", node->id);
+	}
+
+	up_read(&env->bpf_progs.bpf_info_lock);
+}
+
 static void free_event_desc(struct perf_evsel *events)
 {
 	struct perf_evsel *evsel;
@@ -2725,6 +2786,41 @@ static int process_bpf_prog_info(struct feat_fd *ff,
 	return err;
 }
 
+static int process_btf(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct perf_env *env = &ff->ph->env;
+	u32 count, i;
+
+	if (do_read_u32(ff, &count))
+		return -1;
+
+	down_write(&env->bpf_progs.bpf_info_lock);
+
+	for (i = 0; i < count; ++i) {
+		struct btf_node btf_node;
+		struct btf_node *node;
+
+		if (__do_read(ff, &btf_node, sizeof(struct btf_node)))
+			return -1;
+
+		node = malloc(sizeof(struct btf_node) + btf_node.data_size);
+		if (!node)
+			return -1;
+
+		node->id = btf_node.id;
+		node->data_size = btf_node.data_size;
+
+		if (__do_read(ff, node->data, btf_node.data_size)) {
+			free(node);
+			return -1;
+		}
+		perf_env__insert_btf(env, node);
+	}
+
+	up_write(&env->bpf_progs.bpf_info_lock);
+	return 0;
+}
+
 struct feature_ops {
 	int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
 	void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2785,7 +2881,8 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPR(SAMPLE_TIME,	sample_time,	false),
 	FEAT_OPR(MEM_TOPOLOGY,	mem_topology,	true),
 	FEAT_OPR(CLOCKID,       clockid,        false),
-	FEAT_OPR(BPF_PROG_INFO, bpf_prog_info,  false)
+	FEAT_OPR(BPF_PROG_INFO, bpf_prog_info,  false),
+	FEAT_OPR(BTF,           btf,            false)
 };
 
 struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 0785c91b4c3a..ba51d8e43c53 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -40,6 +40,7 @@ enum {
 	HEADER_MEM_TOPOLOGY,
 	HEADER_CLOCKID,
 	HEADER_BPF_PROG_INFO,
+	HEADER_BTF,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };
-- 
2.17.1


^ permalink raw reply related

* [PATCH v3 perf,bpf 11/11] perf, bpf: save information about short living bpf programs
From: Song Liu @ 2019-02-15 21:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: ast, daniel, kernel-team, peterz, acme, jolsa, namhyung, Song Liu
In-Reply-To: <20190215215354.3114006-1-songliubraving@fb.com>

To annotate bpf programs in perf, it is necessary to save information in
bpf_prog_info and btf. For short living bpf program, it is necessary to
save these information before it is unloaded.

This patch saves these information in a separate thread. This thread
creates its own evlist, that only tracks bpf events. This evlists uses
ring buffer with very low watermark for lower latency. When bpf load
events are received, this thread tries to gather information via sys_bpf
and save it in perf_env.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 tools/perf/builtin-record.c |  13 ++++
 tools/perf/builtin-top.c    |  13 ++++
 tools/perf/util/bpf-event.c | 139 ++++++++++++++++++++++++++++++++++++
 tools/perf/util/bpf-event.h |  22 ++++++
 tools/perf/util/evlist.c    |  20 ++++++
 tools/perf/util/evlist.h    |   2 +
 6 files changed, 209 insertions(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 2355e0a9eda0..46abb44aaaab 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1106,6 +1106,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	struct perf_data *data = &rec->data;
 	struct perf_session *session;
 	bool disabled = false, draining = false;
+	struct bpf_event_poll_args poll_args;
+	bool bpf_thread_running = false;
 	int fd;
 
 	atexit(record__sig_exit);
@@ -1206,6 +1208,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		goto out_child;
 	}
 
+	if (rec->opts.bpf_event) {
+		poll_args.env = &session->header.env;
+		poll_args.target = &rec->opts.target;
+		poll_args.done = &done;
+		if (bpf_event__start_polling_thread(&poll_args) == 0)
+			bpf_thread_running = true;
+	}
+
 	err = record__synthesize(rec, false);
 	if (err < 0)
 		goto out_child;
@@ -1456,6 +1466,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
 out_delete_session:
 	perf_session__delete(session);
+
+	if (bpf_thread_running)
+		bpf_event__stop_polling_thread(&poll_args);
 	return status;
 }
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ccdf5689452f..6c60952a61c7 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1524,10 +1524,12 @@ int cmd_top(int argc, const char **argv)
 			"number of thread to run event synthesize"),
 	OPT_END()
 	};
+	struct bpf_event_poll_args poll_args;
 	const char * const top_usage[] = {
 		"perf top [<options>]",
 		NULL
 	};
+	bool bpf_thread_running = false;
 	int status = hists__init();
 
 	if (status < 0)
@@ -1654,8 +1656,19 @@ int cmd_top(int argc, const char **argv)
 
 	top.record_opts.bpf_event = !top.no_bpf_event;
 
+	if (top.record_opts.bpf_event) {
+		poll_args.env = &perf_env;
+		poll_args.target = target;
+		poll_args.done = &done;
+		if (bpf_event__start_polling_thread(&poll_args) == 0)
+			bpf_thread_running = true;
+	}
+
 	status = __cmd_top(&top);
 
+	if (bpf_thread_running)
+		bpf_event__stop_polling_thread(&poll_args);
+
 out_delete_evlist:
 	perf_evlist__delete(top.evlist);
 
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 6c363adc781a..2fdf39519fc4 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -12,6 +12,7 @@
 #include "machine.h"
 #include "env.h"
 #include "session.h"
+#include "evlist.h"
 
 #define ptr_to_u64(ptr)    ((__u64)(unsigned long)(ptr))
 
@@ -322,3 +323,141 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
 	free(event);
 	return err;
 }
+
+static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
+{
+	struct bpf_prog_info_linear *info_linear;
+	struct bpf_prog_info_node *info_node;
+	struct btf *btf = NULL;
+	u64 arrays;
+	u32 btf_id;
+	int fd;
+
+	fd = bpf_prog_get_fd_by_id(id);
+	if (fd < 0)
+		return;
+
+	arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS;
+	arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
+	arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
+	arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS;
+	arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS;
+	arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
+	arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
+
+	info_linear = bpf_program__get_prog_info_linear(fd, arrays);
+	if (IS_ERR_OR_NULL(info_linear)) {
+		pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
+		goto out;
+	}
+
+	btf_id = info_linear->info.btf_id;
+
+	info_node = malloc(sizeof(struct bpf_prog_info_node));
+	if (info_node) {
+		info_node->info_linear = info_linear;
+		perf_env__insert_bpf_prog_info(env, info_node);
+	} else
+		free(info_linear);
+
+	if (btf_id == 0)
+		goto out;
+
+	if (btf__get_from_id(btf_id, &btf)) {
+		pr_debug("%s: failed to get BTF of id %u, aborting\n",
+			 __func__, btf_id);
+		goto out;
+	}
+	perf_env__fetch_btf(env, btf_id, btf);
+
+out:
+	free(btf);
+	close(fd);
+}
+
+static void *bpf_poll_thread(void *arg)
+{
+	struct bpf_event_poll_args *args = arg;
+	int i;
+
+	while (!*(args->done)) {
+		perf_evlist__poll(args->evlist, 1000);
+
+		for (i = 0; i < args->evlist->nr_mmaps; i++) {
+			struct perf_mmap *map = &args->evlist->mmap[i];
+			union perf_event *event;
+
+			if (perf_mmap__read_init(map))
+				continue;
+			while ((event = perf_mmap__read_event(map)) != NULL) {
+				pr_debug("processing vip event of type %d\n",
+					 event->header.type);
+				switch (event->header.type) {
+				case PERF_RECORD_BPF_EVENT:
+					if (event->bpf_event.type != PERF_BPF_EVENT_PROG_LOAD)
+						break;
+					perf_env__add_bpf_info(args->env, event->bpf_event.id);
+					break;
+				default:
+					break;
+				}
+				perf_mmap__consume(map);
+			}
+			perf_mmap__read_done(map);
+		}
+	}
+	return NULL;
+}
+
+pthread_t poll_thread;
+
+int bpf_event__start_polling_thread(struct bpf_event_poll_args *args)
+{
+	struct perf_evsel *counter;
+	struct perf_event_attr attr = {
+		.type	          = PERF_TYPE_SOFTWARE,
+		.config           = PERF_COUNT_SW_DUMMY,
+		.watermark        = 1,
+		.bpf_event        = 1,
+		.wakeup_watermark = 1,
+		.size	   = sizeof(attr), /* to capture ABI version */
+	};
+
+	args->evlist = perf_evlist__new_side_band(&attr);
+
+	if (args->evlist == NULL)
+		return -1;
+
+	if (perf_evlist__create_maps(args->evlist, args->target))
+		goto out_delete_evlist;
+
+	evlist__for_each_entry(args->evlist, counter) {
+		if (perf_evsel__open(counter, args->evlist->cpus,
+				     args->evlist->threads) < 0)
+			goto out_delete_evlist;
+	}
+
+	if (perf_evlist__mmap(args->evlist, UINT_MAX))
+		goto out_delete_evlist;
+
+	evlist__for_each_entry(args->evlist, counter) {
+		if (perf_evsel__enable(counter))
+			goto out_delete_evlist;
+	}
+
+	if (pthread_create(&poll_thread, NULL, bpf_poll_thread, args))
+		goto out_delete_evlist;
+
+	return 0;
+
+out_delete_evlist:
+	perf_evlist__delete(args->evlist);
+	args->evlist = NULL;
+	return -1;
+}
+
+void bpf_event__stop_polling_thread(struct bpf_event_poll_args *args)
+{
+	pthread_join(poll_thread, NULL);
+	perf_evlist__exit(args->evlist);
+}
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index ab4a975b7e05..546133f16c89 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -8,12 +8,17 @@
 #include <linux/btf.h>
 #include <linux/compiler.h>
 #include <linux/rbtree.h>
+#include <pthread.h>
+#include <api/fd/array.h>
 #include "event.h"
 
 struct machine;
 union perf_event;
+struct perf_env;
 struct perf_sample;
 struct record_opts;
+struct evlist;
+struct target;
 
 struct bpf_prog_info_node {
 	struct bpf_prog_info_linear	*info_linear;
@@ -27,6 +32,13 @@ struct btf_node {
 	char		data[];
 };
 
+struct bpf_event_poll_args {
+	struct perf_env		*env;
+	struct perf_evlist	*evlist;
+	struct target		*target;
+	volatile int		*done;
+};
+
 #ifdef HAVE_LIBBPF_SUPPORT
 int machine__process_bpf_event(struct machine *machine, union perf_event *event,
 			       struct perf_sample *sample);
@@ -35,6 +47,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
 				      perf_event__handler_t process,
 				      struct machine *machine,
 				      struct record_opts *opts);
+int bpf_event__start_polling_thread(struct bpf_event_poll_args *args);
+void bpf_event__stop_polling_thread(struct bpf_event_poll_args *args);
 #else
 static inline int machine__process_bpf_event(struct machine *machine __maybe_unused,
 					     union perf_event *event __maybe_unused,
@@ -50,5 +64,13 @@ static inline int perf_event__synthesize_bpf_events(struct perf_session *session
 {
 	return 0;
 }
+
+static inline int bpf_event__start_polling_thread(struct bpf_event_poll_args *args __maybe_unused)
+{
+	return 0;
+}
+void bpf_event__stop_polling_thread(struct bpf_event_poll_args *args __maybe_unused)
+{
+}
 #endif // HAVE_LIBBPF_SUPPORT
 #endif
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 8c902276d4b4..32680bb92b34 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -77,6 +77,26 @@ struct perf_evlist *perf_evlist__new_default(void)
 	return evlist;
 }
 
+struct perf_evlist *perf_evlist__new_side_band(struct perf_event_attr *attr)
+{
+	struct perf_evlist *evlist = perf_evlist__new();
+	struct perf_evsel *evsel;
+
+	if (!evlist)
+		return NULL;
+
+	evsel = perf_evsel__new_idx(attr, evlist->nr_entries);
+	if (!evsel)
+		goto out_err;
+
+	perf_evlist__add(evlist, evsel);
+	return evlist;
+
+out_err:
+	perf_evlist__delete(evlist);
+	return NULL;
+}
+
 struct perf_evlist *perf_evlist__new_dummy(void)
 {
 	struct perf_evlist *evlist = perf_evlist__new();
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 868294491194..74f1031fd557 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -84,6 +84,8 @@ int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
 
 int perf_evlist__add_dummy(struct perf_evlist *evlist);
 
+struct perf_evlist *perf_evlist__new_side_band(struct perf_event_attr *attr);
+
 int perf_evlist__add_newtp(struct perf_evlist *evlist,
 			   const char *sys, const char *name, void *handler);
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH v3 perf,bpf 07/11] perf, bpf: save btf in a rbtree in perf_env
From: Song Liu @ 2019-02-15 21:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: ast, daniel, kernel-team, peterz, acme, jolsa, namhyung, Song Liu
In-Reply-To: <20190215215354.3114006-1-songliubraving@fb.com>

btf contains information necessary to annotate bpf programs. This patch
saves btf for bpf programs loaded in the system.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 tools/perf/util/bpf-event.c | 24 ++++++++++++++
 tools/perf/util/bpf-event.h |  7 ++++
 tools/perf/util/env.c       | 65 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/env.h       |  4 +++
 4 files changed, 100 insertions(+)

diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 13313a3e6c4d..52507435c464 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -34,6 +34,29 @@ int machine__process_bpf_event(struct machine *machine __maybe_unused,
 	return 0;
 }
 
+static int perf_env__fetch_btf(struct perf_env *env,
+			       u32 btf_id,
+			       struct btf *btf)
+{
+	struct btf_node *node;
+	u32 data_size;
+	const void *data;
+
+	data = btf__get_raw_data(btf, &data_size);
+
+	node = malloc(data_size + sizeof(struct btf_node));
+
+	if (!node)
+		return -1;
+
+	node->id = btf_id;
+	node->data_size = data_size;
+	memcpy(node->data, data, data_size);
+
+	perf_env__insert_btf(env, node);
+	return 0;
+}
+
 /*
  * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf
  * program. One PERF_RECORD_BPF_EVENT is generated for the program. And
@@ -113,6 +136,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
 			goto out;
 		}
 		has_btf = true;
+		perf_env__fetch_btf(env, info->btf_id, btf);
 	}
 
 	/* Synthesize PERF_RECORD_KSYMBOL */
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index fad932f7404f..b9ec394dc7c7 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -16,6 +16,13 @@ struct bpf_prog_info_node {
 	struct rb_node			rb_node;
 };
 
+struct btf_node {
+	struct rb_node	rb_node;
+	u32		id;
+	u32		data_size;
+	char		data[];
+};
+
 #ifdef HAVE_LIBBPF_SUPPORT
 int machine__process_bpf_event(struct machine *machine, union perf_event *event,
 			       struct perf_sample *sample);
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 026ca2182d50..f5556d2f4063 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -63,6 +63,57 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
 	return node;
 }
 
+void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
+{
+	struct rb_node *parent = NULL;
+	__u32 btf_id = btf_node->id;
+	struct btf_node *node;
+	struct rb_node **p;
+
+	down_write(&env->bpf_progs.bpf_info_lock);
+	p = &env->bpf_progs.btfs.rb_node;
+
+	while (*p != NULL) {
+		parent = *p;
+		node = rb_entry(parent, struct btf_node, rb_node);
+		if (btf_id < node->id) {
+			p = &(*p)->rb_left;
+		} else if (btf_id > node->id) {
+			p = &(*p)->rb_right;
+		} else {
+			pr_debug("duplicated btf %u\n", btf_id);
+			goto out;
+		}
+	}
+
+	rb_link_node(&btf_node->rb_node, parent, p);
+	rb_insert_color(&btf_node->rb_node, &env->bpf_progs.btfs);
+out:
+	up_write(&env->bpf_progs.bpf_info_lock);
+}
+
+struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
+{
+	struct btf_node *node = NULL;
+	struct rb_node *n;
+
+	down_read(&env->bpf_progs.bpf_info_lock);
+	n = env->bpf_progs.btfs.rb_node;
+
+	while (n) {
+		node = rb_entry(n, struct btf_node, rb_node);
+		if (btf_id < node->id)
+			n = n->rb_left;
+		else if (btf_id > node->id)
+			n = n->rb_right;
+		else
+			break;
+	}
+
+	up_read(&env->bpf_progs.bpf_info_lock);
+	return node;
+}
+
 /* purge data in bpf_prog_infos tree */
 static void perf_env__purge_bpf(struct perf_env *env)
 {
@@ -82,6 +133,19 @@ static void perf_env__purge_bpf(struct perf_env *env)
 		rb_erase_init(&node->rb_node, root);
 		free(node);
 	}
+
+	root = &env->bpf_progs.btfs;
+	next = rb_first(root);
+
+	while (next) {
+		struct btf_node *node;
+
+		node = rb_entry(next, struct btf_node, rb_node);
+		next = rb_next(&node->rb_node);
+		rb_erase_init(&node->rb_node, root);
+		free(node);
+	}
+
 	up_write(&env->bpf_progs.bpf_info_lock);
 }
 
@@ -119,6 +183,7 @@ void perf_env__exit(struct perf_env *env)
 static void init_bpf_rb_trees(struct perf_env *env)
 {
 	env->bpf_progs.bpf_prog_infos = RB_ROOT;
+	env->bpf_progs.btfs = RB_ROOT;
 	init_rwsem(&env->bpf_progs.bpf_info_lock);
 }
 
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index d0c53fe6d431..b3ce7813b1a0 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -74,10 +74,12 @@ struct perf_env {
 	struct {
 		struct rw_semaphore	bpf_info_lock;
 		struct rb_root		bpf_prog_infos;
+		struct rb_root		btfs;
 	} bpf_progs;
 };
 
 struct bpf_prog_info_node;
+struct btf_node;
 
 extern struct perf_env perf_env;
 
@@ -97,4 +99,6 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env,
 				    struct bpf_prog_info_node *info_node);
 struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
 							__u32 prog_id);
+void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
+struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
 #endif /* __PERF_ENV_H */
-- 
2.17.1


^ permalink raw reply related

* [PATCH v3 perf,bpf 02/11] bpf: libbpf: introduce bpf_program__get_prog_info_linear()
From: Song Liu @ 2019-02-15 21:53 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: ast, daniel, kernel-team, peterz, acme, jolsa, namhyung, Song Liu
In-Reply-To: <20190215215354.3114006-1-songliubraving@fb.com>

Currently, bpf_prog_info includes 9 arrays. The user has the option to
fetch any combination of these arrays. However, this requires a lot of
handling of these arrays. This work becomes more tricky when we need to
store bpf_prog_info to a file, because these arrays are allocated
independently.

This patch introduces struct bpf_prog_info_linear, which stores arrays
of bpf_prog_info in continues memory. Helper functions are introduced
to unify the work to get different information of bpf_prog_info.
Specifically, bpf_program__get_prog_info_linear() allows the user to
select which arrays to fetch, and handles details for the user.

Plesae see the comments before enum bpf_prog_info_array for more details
and examples.

Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
 tools/lib/bpf/libbpf.c   | 251 +++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.h   |  63 ++++++++++
 tools/lib/bpf/libbpf.map |   3 +
 3 files changed, 317 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b38dcbe7460a..fa12729de283 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -112,6 +112,11 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...)
 # define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
 #endif
 
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64) (unsigned long) ptr;
+}
+
 struct bpf_capabilities {
 	/* v4.14: kernel support for program & map names. */
 	__u32 name:1;
@@ -2997,3 +3002,249 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
 	ring_buffer_write_tail(header, data_tail);
 	return ret;
 }
+
+struct bpf_prog_info_array_desc {
+	int	array_offset;	/* e.g. offset of jited_prog_insns */
+	int	count_offset;	/* e.g. offset of jited_prog_len */
+	int	size_offset;	/* > 0: offset of rec size,
+				 * < 0: fix size of -size_offset
+				 */
+};
+
+static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
+	[BPF_PROG_INFO_JITED_INSNS] = {
+		offsetof(struct bpf_prog_info, jited_prog_insns),
+		offsetof(struct bpf_prog_info, jited_prog_len),
+		-1,
+	},
+	[BPF_PROG_INFO_XLATED_INSNS] = {
+		offsetof(struct bpf_prog_info, xlated_prog_insns),
+		offsetof(struct bpf_prog_info, xlated_prog_len),
+		-1,
+	},
+	[BPF_PROG_INFO_MAP_IDS] = {
+		offsetof(struct bpf_prog_info, map_ids),
+		offsetof(struct bpf_prog_info, nr_map_ids),
+		-(int)sizeof(__u32),
+	},
+	[BPF_PROG_INFO_JITED_KSYMS] = {
+		offsetof(struct bpf_prog_info, jited_ksyms),
+		offsetof(struct bpf_prog_info, nr_jited_ksyms),
+		-(int)sizeof(__u64),
+	},
+	[BPF_PROG_INFO_JITED_FUNC_LENS] = {
+		offsetof(struct bpf_prog_info, jited_func_lens),
+		offsetof(struct bpf_prog_info, nr_jited_func_lens),
+		-(int)sizeof(__u32),
+	},
+	[BPF_PROG_INFO_FUNC_INFO] = {
+		offsetof(struct bpf_prog_info, func_info),
+		offsetof(struct bpf_prog_info, nr_func_info),
+		offsetof(struct bpf_prog_info, func_info_rec_size),
+	},
+	[BPF_PROG_INFO_LINE_INFO] = {
+		offsetof(struct bpf_prog_info, line_info),
+		offsetof(struct bpf_prog_info, nr_line_info),
+		offsetof(struct bpf_prog_info, line_info_rec_size),
+	},
+	[BPF_PROG_INFO_JITED_LINE_INFO] = {
+		offsetof(struct bpf_prog_info, jited_line_info),
+		offsetof(struct bpf_prog_info, nr_jited_line_info),
+		offsetof(struct bpf_prog_info, jited_line_info_rec_size),
+	},
+	[BPF_PROG_INFO_PROG_TAGS] = {
+		offsetof(struct bpf_prog_info, prog_tags),
+		offsetof(struct bpf_prog_info, nr_prog_tags),
+		-(int)sizeof(__u8) * BPF_TAG_SIZE,
+	},
+
+};
+
+static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, int offset)
+{
+	__u32 *array = (__u32 *)info;
+
+	if (offset >= 0)
+		return array[offset / sizeof(__u32)];
+	return -(int)offset;
+}
+
+static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, int offset)
+{
+	__u64 *array = (__u64 *)info;
+
+	if (offset >= 0)
+		return array[offset / sizeof(__u64)];
+	return -(int)offset;
+}
+
+static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
+					 __u32 val)
+{
+	__u32 *array = (__u32 *)info;
+
+	if (offset >= 0)
+		array[offset / sizeof(__u32)] = val;
+}
+
+static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
+					 __u64 val)
+{
+	__u64 *array = (__u64 *)info;
+
+	if (offset >= 0)
+		array[offset / sizeof(__u64)] = val;
+}
+
+struct bpf_prog_info_linear *
+bpf_program__get_prog_info_linear(int fd, __u64 arrays)
+{
+	struct bpf_prog_info_linear *info_linear;
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	__u32 data_len = 0;
+	int i, err;
+	void *ptr;
+
+	if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
+		return ERR_PTR(-EINVAL);
+
+	/* step 1: get array dimensions */
+	err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+	if (err) {
+		pr_debug("can't get prog info: %s", strerror(errno));
+		return ERR_PTR(-EFAULT);
+	}
+
+	/* step 2: calculate total size of all arrays */
+	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+		bool include_array = (arrays & (1UL << i)) > 0;
+		struct bpf_prog_info_array_desc *desc;
+		__u32 count, size;
+
+		desc = bpf_prog_info_array_desc + i;
+
+		/* kernel is too old to support this field */
+		if (info_len < desc->array_offset + sizeof(__u32) ||
+		    info_len < desc->count_offset + sizeof(__u32) ||
+		    (desc->size_offset > 0 && info_len < desc->size_offset))
+			include_array = false;
+
+		if (!include_array) {
+			arrays &= ~(1UL << i);	/* clear the bit */
+			continue;
+		}
+
+		count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+		size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+
+		data_len += count * size;
+	}
+
+	/* step 3: allocate continuous memory */
+	data_len = roundup(data_len, sizeof(__u64));
+	info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
+	if (!info_linear)
+		return ERR_PTR(-ENOMEM);
+
+	/* step 4: fill data to info_linear->info */
+	info_linear->arrays = arrays;
+	memset(&info_linear->info, 0, sizeof(info));
+	ptr = info_linear->data;
+
+	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+		struct bpf_prog_info_array_desc *desc;
+		__u32 count, size;
+
+		if ((arrays & (1UL << i)) == 0)
+			continue;
+
+		desc = bpf_prog_info_array_desc + i;
+		count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+		size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+		bpf_prog_info_set_offset_u32(&info_linear->info,
+					     desc->count_offset, count);
+		bpf_prog_info_set_offset_u32(&info_linear->info,
+					     desc->size_offset, size);
+		bpf_prog_info_set_offset_u64(&info_linear->info,
+					     desc->array_offset,
+					     ptr_to_u64(ptr));
+		ptr += count * size;
+	}
+
+	/* step 5: call syscall again to get required arrays */
+	err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
+	if (err) {
+		pr_debug("can't get prog info: %s", strerror(errno));
+		free(info_linear);
+		return ERR_PTR(-EFAULT);
+	}
+
+	/* step 6: verify the data */
+	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+		struct bpf_prog_info_array_desc *desc;
+		__u32 v1, v2;
+
+		if ((arrays & (1UL << i)) == 0)
+			continue;
+
+		desc = bpf_prog_info_array_desc + i;
+		v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+		v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+						   desc->count_offset);
+		if (v1 != v2)
+			pr_warning("%s: mismatch in element count\n", __func__);
+
+		v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+		v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+						   desc->size_offset);
+		if (v1 != v2)
+			pr_warning("%s: mismatch in rec size\n", __func__);
+	}
+
+	/* step 7: update info_len and data_len */
+	info_linear->info_len = sizeof(struct bpf_prog_info);
+	info_linear->data_len = data_len;
+
+	return info_linear;
+}
+
+void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
+{
+	int i;
+
+	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+		struct bpf_prog_info_array_desc *desc;
+		__u64 addr, offs;
+
+		if ((info_linear->arrays & (1UL << i)) == 0)
+			continue;
+
+		desc = bpf_prog_info_array_desc + i;
+		addr = bpf_prog_info_read_offset_u64(&info_linear->info,
+						     desc->array_offset);
+		offs = addr - ptr_to_u64(info_linear->data);
+		bpf_prog_info_set_offset_u64(&info_linear->info,
+					     desc->array_offset, offs);
+	}
+}
+
+void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
+{
+	int i;
+
+	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+		struct bpf_prog_info_array_desc *desc;
+		__u64 addr, offs;
+
+		if ((info_linear->arrays & (1UL << i)) == 0)
+			continue;
+
+		desc = bpf_prog_info_array_desc + i;
+		offs = bpf_prog_info_read_offset_u64(&info_linear->info,
+						     desc->array_offset);
+		addr = offs + ptr_to_u64(info_linear->data);
+		bpf_prog_info_set_offset_u64(&info_linear->info,
+					     desc->array_offset, addr);
+	}
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6c0168f8bba5..809275c47bb5 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -376,6 +376,69 @@ LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex);
 LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id,
 				 enum bpf_prog_type prog_type, __u32 ifindex);
 
+/*
+ * Get bpf_prog_info in continuous memory
+ *
+ * struct bpf_prog_info has multiple arrays. The user has option to choose
+ * arrays to fetch from kernel. The following APIs provide uniform way to
+ * fetch these data. All arrays in bpf_prog_info are stored in singile
+ * continuous memory region. This makes it easy to store the info in a
+ * file.
+ *
+ * Before writing bpf_prog_info_linear to files, it is necessary to
+ * translate pointers bpf_prog_info to offsets. Helper functions
+ * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr()
+ * are introduced to switch between pointers and offsets.
+ *
+ * Examples:
+ *   # To fetch map_ids and prog_tags:
+ *   __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) |
+ *           (1UL << BPF_PROG_INFO_PROG_TAGS);
+ *   struct bpf_prog_info_linear *info_linear =
+ *           bpf_program__get_prog_info_linear(fd, arrays);
+ *
+ *   # To save data in file
+ *   bpf_program__bpil_addr_to_offs(info_linear);
+ *   write(f, info_linear, sizeof(*info_linear) + info_linear->data_len);
+ *
+ *   # To read data from file
+ *   read(f, info_linear, <proper_size>);
+ *   bpf_program__bpil_offs_to_addr(info_linear);
+ */
+enum bpf_prog_info_array {
+	BPF_PROG_INFO_FIRST_ARRAY = 0,
+	BPF_PROG_INFO_JITED_INSNS = 0,
+	BPF_PROG_INFO_XLATED_INSNS,
+	BPF_PROG_INFO_MAP_IDS,
+	BPF_PROG_INFO_JITED_KSYMS,
+	BPF_PROG_INFO_JITED_FUNC_LENS,
+	BPF_PROG_INFO_FUNC_INFO,
+	BPF_PROG_INFO_LINE_INFO,
+	BPF_PROG_INFO_JITED_LINE_INFO,
+	BPF_PROG_INFO_PROG_TAGS,
+	BPF_PROG_INFO_LAST_ARRAY,
+};
+
+struct bpf_prog_info_linear {
+	/* size of struct bpf_prog_info, when the tool is compiled */
+	__u32			info_len;
+	/* total bytes allocated for data, round up to 8 bytes */
+	__u32			data_len;
+	/* which arrays are included in data */
+	__u64			arrays;
+	struct bpf_prog_info	info;
+	__u8			data[];
+};
+
+LIBBPF_API struct bpf_prog_info_linear *
+bpf_program__get_prog_info_linear(int fd, __u64 arrays);
+
+LIBBPF_API void
+bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);
+
+LIBBPF_API void
+bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 99dfa710c818..24616162447e 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -147,4 +147,7 @@ LIBBPF_0.0.2 {
 		btf_ext__new;
 		btf_ext__reloc_func_info;
 		btf_ext__reloc_line_info;
+		bpf_program__get_prog_info_linear;
+		bpf_program__bpil_addr_to_offs;
+		bpf_program__bpil_offs_to_addr;
 } LIBBPF_0.0.1;
-- 
2.17.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox