Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v2 7/7] sh_eth: offload RX checksum on SH7763
From: Sergei Shtylyov @ 2019-02-04 18:12 UTC (permalink / raw)
  To: netdev, David S. Miller; +Cc: linux-renesas-soc, linux-sh
In-Reply-To: <a21deed1-35dc-f1be-6c7e-7061ebe4b56c@cogentembedded.com>

The SH7763 SoC manual describes the Ether MAC's RX checksum offload
the same way as it's implemented in the EtherAVB MACs...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

---
 drivers/net/ethernet/renesas/sh_eth.c |    1 +
 1 file changed, 1 insertion(+)

Index: renesas/drivers/net/ethernet/renesas/sh_eth.c
===================================================================
--- renesas.orig/drivers/net/ethernet/renesas/sh_eth.c
+++ renesas/drivers/net/ethernet/renesas/sh_eth.c
@@ -1092,6 +1092,7 @@ static struct sh_eth_cpu_data sh7763_dat
 	.irq_flags	= IRQF_SHARED,
 	.magic		= 1,
 	.cexcr		= 1,
+	.rx_csum	= 1,
 	.dual_port	= 1,
 };
 

^ permalink raw reply

* [PATCH] net: phylink: dsa: mv88e6xxx: Revise irq setup ordering
From: John David Anglin @ 2019-02-04 18:37 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: Russell King, Vivien Didelot, Florian Fainelli, netdev
In-Reply-To: <a89d5296-88f3-4dde-74b3-25ad1791d5b6@bell.net>

This change fixes a race condition in the setup of hardware irqs and the
code enabling PHY link
detection.

This was observed on the espressobin board where the GPIO interrupt
controller only supports edge
interrupts.  If the INTn output pin goes low before the GPIO interrupt
is enabled, PHY link interrupts
are not detected.

With this change, we
1) force INTn high by clearing all interrupt enables in global 1 control1,
2) setup the hardware irq, and
3) perform the remaining common setup.

This in fact simplifies the setup and allows some unnecessary code to be
removed.

In order to prevent races in mv88e6xxx_g1_irq_thread_work(), we clear
and restore the interrupt
enables in the normal path just prior to exit.

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c
b/drivers/net/dsa/mv88e6xxx/chip.c
index b2a0e59b6252..0dcbc25c1b4b 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -260,7 +260,7 @@ static irqreturn_t
mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip)
     unsigned int nhandled = 0;
     unsigned int sub_irq;
     unsigned int n;
-    u16 reg;
+    u16 mask, reg;
     int err;
 
     mutex_lock(&chip->reg_lock);
@@ -277,6 +277,14 @@ static irqreturn_t
mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip)
             ++nhandled;
         }
     }
+
+    mutex_lock(&chip->reg_lock);
+    mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &reg);
+    mask = ~GENMASK(chip->g1_irq.nirqs, 0);
+    mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, (reg & mask));
+    mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, reg);
+    mutex_unlock(&chip->reg_lock);
+
 out:
     return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
 }
@@ -374,10 +382,30 @@ static void mv88e6xxx_g1_irq_free(struct
mv88e6xxx_chip *chip)
     mutex_unlock(&chip->reg_lock);
 }
 
+static int mv88e6xxx_g1_irq_setup_masks(struct mv88e6xxx_chip *chip)
+{
+    int err;
+    u16 reg;
+
+    /* The INTn output must be high when hardware interrupts are setup.
+       The EEPROM done interrupt enable is set on reset, so clear all
+       interrupt enable bits to ensure INTn is not driven low */
+    err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &reg);
+    if (err)
+        return err;
+    reg &= ~GENMASK(chip->info->g1_irqs, 0);
+    err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, reg);
+    if (err)
+        return err;
+
+    /* Reading the interrupt status clears (most of) them */
+    err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &reg);
+    return err;
+}
+
 static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
 {
-    int err, irq, virq;
-    u16 reg, mask;
+    int irq;
 
     chip->g1_irq.nirqs = chip->info->g1_irqs;
     chip->g1_irq.domain = irq_domain_add_simple(
@@ -392,43 +420,14 @@ static int mv88e6xxx_g1_irq_setup_common(struct
mv88e6xxx_chip *chip)
     chip->g1_irq.chip = mv88e6xxx_g1_irq_chip;
     chip->g1_irq.masked = ~0;
 
-    err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &mask);
-    if (err)
-        goto out_mapping;
-
-    mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
-
-    err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
-    if (err)
-        goto out_disable;
-
-    /* Reading the interrupt status clears (most of) them */
-    err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &reg);
-    if (err)
-        goto out_disable;
-
     return 0;
-
-out_disable:
-    mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
-    mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
-
-out_mapping:
-    for (irq = 0; irq < 16; irq++) {
-        virq = irq_find_mapping(chip->g1_irq.domain, irq);
-        irq_dispose_mapping(virq);
-    }
-
-    irq_domain_remove(chip->g1_irq.domain);
-
-    return err;
 }
 
 static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
 {
     int err;
 
-    err = mv88e6xxx_g1_irq_setup_common(chip);
+    err = mv88e6xxx_g1_irq_setup_masks(chip);
     if (err)
         return err;
 
@@ -437,9 +436,9 @@ static int mv88e6xxx_g1_irq_setup(struct
mv88e6xxx_chip *chip)
                    IRQF_ONESHOT | IRQF_SHARED,
                    dev_name(chip->dev), chip);
     if (err)
-        mv88e6xxx_g1_irq_free_common(chip);
+        return err;
 
-    return err;
+    return mv88e6xxx_g1_irq_setup_common(chip);
 }
 
 static void mv88e6xxx_irq_poll(struct kthread_work *work)
@@ -457,6 +456,10 @@ static int mv88e6xxx_irq_poll_setup(struct
mv88e6xxx_chip *chip)
 {
     int err;
 
+    err = mv88e6xxx_g1_irq_setup_masks(chip);
+    if (err)
+        return err;
+
     err = mv88e6xxx_g1_irq_setup_common(chip);
     if (err)
         return err;

Signed-off-by: John David Anglin <dave.anglin@bell.net>
-- 

John David Anglin  dave.anglin@bell.net


^ permalink raw reply related

* [PATCH bpf-next] selftests/bpf: use localhost in tcp_{server,client}.py
From: Stanislav Fomichev @ 2019-02-04 18:43 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, Stanislav Fomichev

Bind and connect to localhost. There is no reason for this test to
use non-localhost interface. This lets us run this test in a network
namespace.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 tools/testing/selftests/bpf/tcp_client.py | 3 +--
 tools/testing/selftests/bpf/tcp_server.py | 5 +----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
index 7f8200a8702b..a53ed58528d6 100755
--- a/tools/testing/selftests/bpf/tcp_client.py
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -30,12 +30,11 @@ import select
 
 
 serverPort = int(sys.argv[1])
-HostName = socket.gethostname()
 
 # create active socket
 sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
 try:
-    sock.connect((HostName, serverPort))
+    sock.connect(('localhost', serverPort))
 except socket.error as e:
     sys.exit(1)
 
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
index b39903fca4c8..0ca60d193bed 100755
--- a/tools/testing/selftests/bpf/tcp_server.py
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -35,13 +35,10 @@ MAX_PORTS = 2
 serverPort = SERVER_PORT
 serverSocket = None
 
-HostName = socket.gethostname()
-
 # create passive socket
 serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
-host = socket.gethostname()
 
-try: serverSocket.bind((host, 0))
+try: serverSocket.bind(('localhost', 0))
 except socket.error as msg:
     print('bind fails: ' + str(msg))
 
-- 
2.20.1.611.gfbb209baf1-goog


^ permalink raw reply related

* Re: [PATCH v2 2/2] r8169: Avoid pointer aliasing
From: Heiner Kallweit @ 2019-02-04 18:44 UTC (permalink / raw)
  To: Thierry Reding, David S. Miller, Andrew Lunn
  Cc: Realtek linux nic maintainers, netdev, linux-kernel
In-Reply-To: <20190204164213.30727-2-thierry.reding@gmail.com>

On 04.02.2019 17:42, Thierry Reding wrote:
> From: Thierry Reding <treding@nvidia.com>
> 
> Read MAC address 32-bit at a time and manually extract the individual
> bytes. This avoids pointer aliasing and gives the compiler a better
> chance of optimizing the operation.
> 
> Suggested-by: Andrew Lunn <andrew@lunn.ch>
> Signed-off-by: Thierry Reding <treding@nvidia.com>
> ---
> Applies to net-next.
> 
> I tested this on a Jetson TX2 with an add-in Realtek ethernet card that
> has a properly programmed OTP to verify that I got the endianess right.
> Seems like everything works and the device behaves the same with or
> without this patch.
> 
>  drivers/net/ethernet/realtek/r8169.c | 15 ++++++++++++---
>  1 file changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
> index 501891be7c56..192fbb36bc9f 100644
> --- a/drivers/net/ethernet/realtek/r8169.c
> +++ b/drivers/net/ethernet/realtek/r8169.c
> @@ -7113,12 +7113,21 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
>  static void rtl_read_mac_address(struct rtl8169_private *tp,
>  				 u8 mac_addr[ETH_ALEN])
>  {
> +	u32 value;
> +
>  	/* Get MAC address */
>  	switch (tp->mac_version) {
>  	case RTL_GIGA_MAC_VER_35 ... RTL_GIGA_MAC_VER_38:
>  	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
> -		*(u32 *)&mac_addr[0] = rtl_eri_read(tp, 0xe0, ERIAR_EXGMAC);
> -		*(u16 *)&mac_addr[4] = rtl_eri_read(tp, 0xe4, ERIAR_EXGMAC);
> +		value = rtl_eri_read(tp, 0xe0, ERIAR_EXGMAC);
> +		mac_addr[0] = (value >>  0) & 0xff;
> +		mac_addr[1] = (value >>  8) & 0xff;
> +		mac_addr[2] = (value >> 16) & 0xff;
> +		mac_addr[3] = (value >> 24) & 0xff;
> +
> +		value = rtl_eri_read(tp, 0xe4, ERIAR_EXGMAC);
> +		mac_addr[4] = (value >>  0) & 0xff;
> +		mac_addr[5] = (value >>  8) & 0xff;
>  		break;
>  	default:
>  		break;
> @@ -7316,7 +7325,7 @@ static int rtl_get_ether_clk(struct rtl8169_private *tp)
>  static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  {
>  	const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data;
> -	u8 mac_addr[ETH_ALEN] __aligned(4) = {};
> +	u8 mac_addr[ETH_ALEN] = {};
>  	struct rtl8169_private *tp;
>  	struct net_device *dev;
>  	int chipset, region, i;
> 
I just have one concern / question:

After this there's a call to is_valid_ether_addr(mac_addr) and kernel-doc of
is_valid_ether_addr() states that argument must be u16-aligned.
AFAIK that's not guaranteed for a byte array.

Heiner

^ permalink raw reply

* Need to retouch your photos?
From: Stacy @ 2019-02-04 11:55 UTC (permalink / raw)
  To: netdev

Need to retouch your photos?  Deep etching or masking for your photos?

We are the studio who can do those service for your photos.

Please send photos to start

Thanks,
Stacy

Herfodrd

Rottenburg

^ permalink raw reply

* Re: [ovs-dev] [PATCH net-next V2 1/1] openvswitch: Declare ovs key structures using macros
From: Yi-Hung Wei @ 2019-02-04 18:47 UTC (permalink / raw)
  To: Eli Britstein
  Cc: Pravin B Shelar, ovs dev, Linux Kernel Network Developers,
	Simon Horman, David S. Miller
In-Reply-To: <20190203091217.8459-1-elibr@mellanox.com>

On Sun, Feb 3, 2019 at 1:13 AM Eli Britstein <elibr@mellanox.com> wrote:
>
> Declare ovs key structures using macros as a pre-step towards to
> enable retrieving fields information, as a work done in proposed
> commit in the OVS tree https://patchwork.ozlabs.org/patch/1023406/
> ("odp-util: Do not rewrite fields with the same values as matched"),
> with no functional change.
>
> Signed-off-by: Eli Britstein <elibr@mellanox.com>
> Reviewed-by: Roi Dayan <roid@mellanox.com>
> ---
>  include/uapi/linux/openvswitch.h | 102 ++++++++++++++++++++++++++-------------
>  1 file changed, 69 insertions(+), 33 deletions(-)
>
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index dbe0cbe4f1b7..dc8246f871fd 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -387,73 +387,109 @@ enum ovs_frag_type {
>
>  #define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
>
> +#define OVS_KEY_FIELD_ARR(type, name, elements) type name[elements];
> +#define OVS_KEY_FIELD(type, name) type name;
......
> +#define OVS_KEY_IPV6_FIELDS \
> +    OVS_KEY_FIELD_ARR(__be32, ipv6_src, 4) \
> +    OVS_KEY_FIELD_ARR(__be32, ipv6_dst, 4) \
> +    OVS_KEY_FIELD(__be32, ipv6_label /* 20-bits in least-significant bits. */) \
> +    OVS_KEY_FIELD(__u8, ipv6_proto) \
> +    OVS_KEY_FIELD(__u8, ipv6_tclass) \
> +    OVS_KEY_FIELD(__u8, ipv6_hlimit) \
> +    OVS_KEY_FIELD(__u8, ipv6_frag /* One of OVS_FRAG_TYPE_*. */)
> +
>  struct ovs_key_ipv6 {
> -       __be32 ipv6_src[4];
> -       __be32 ipv6_dst[4];
> -       __be32 ipv6_label;      /* 20-bits in least-significant bits. */
> -       __u8   ipv6_proto;
> -       __u8   ipv6_tclass;
> -       __u8   ipv6_hlimit;
> -       __u8   ipv6_frag;       /* One of OVS_FRAG_TYPE_*. */
> +    OVS_KEY_IPV6_FIELDS
>  };

Hi Eli,

Thanks for the patch.  In my personal opinion, I feel this patch makes
the header file harder to read.

For example, to see how 'struct ovs_key_ipv6' is defined, now we need
to trace how OVS_KEY_IPV6_FIELDS is defined, and how OVS_KEY_FIELD_ARR
and OVS_KEY_FIELD defined.  I think it makes the header file to be
more complicated.

There are also some discussion on ovs-dev mailing list about this
patch: https://patchwork.ozlabs.org/cover/1023404/

Thanks,

-Yi-Hung

^ permalink raw reply

* [PATCH net-next 0/2] mlxsw: core: Trace EMAD errors
From: Ido Schimmel @ 2019-02-04 18:47 UTC (permalink / raw)
  To: netdev@vger.kernel.org
  Cc: davem@davemloft.net, Jiri Pirko, Nir Dotan, mlxsw, Ido Schimmel

Nir says:

This patchset adds a trace for EMAD errors to the existing EMAD payload
traces. This tracepoint is useful to track user or firmware errors during
tests execution.

Patch #1 defines the devlink tracepoint.
Patch #2 uses it for reporting mlxsw EMAD errors.

Nir Dotan (2):
  devlink: add hardware errors tracing facility
  mlxsw: core: Trace EMAD errors

 drivers/net/ethernet/mellanox/mlxsw/core.c |  6 +++-
 include/trace/events/devlink.h             | 33 ++++++++++++++++++++++
 net/core/devlink.c                         |  1 +
 3 files changed, 39 insertions(+), 1 deletion(-)

-- 
2.20.1

^ permalink raw reply

* [PATCH net-next 1/2] devlink: add hardware errors tracing facility
From: Ido Schimmel @ 2019-02-04 18:47 UTC (permalink / raw)
  To: netdev@vger.kernel.org
  Cc: davem@davemloft.net, Jiri Pirko, Nir Dotan, mlxsw, Ido Schimmel
In-Reply-To: <20190204184649.5292-1-idosch@mellanox.com>

From: Nir Dotan <nird@mellanox.com>

Define a tracepoint and allow user to trace messages in case of an hardware
error code for hardware associated with devlink instance.

Signed-off-by: Nir Dotan <nird@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 include/trace/events/devlink.h | 33 +++++++++++++++++++++++++++++++++
 net/core/devlink.c             |  1 +
 2 files changed, 34 insertions(+)

diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
index 44acfbca1266..40705364a50f 100644
--- a/include/trace/events/devlink.h
+++ b/include/trace/events/devlink.h
@@ -46,6 +46,35 @@ TRACE_EVENT(devlink_hwmsg,
 		  (int) __entry->len, __get_dynamic_array(buf), __entry->len)
 );
 
+/*
+ * Tracepoint for devlink hardware error:
+ */
+TRACE_EVENT(devlink_hwerr,
+	TP_PROTO(const struct devlink *devlink, int err, const char *msg),
+
+	TP_ARGS(devlink, err, msg),
+
+	TP_STRUCT__entry(
+		__string(bus_name, devlink->dev->bus->name)
+		__string(dev_name, dev_name(devlink->dev))
+		__string(driver_name, devlink->dev->driver->name)
+		__field(int, err)
+		__string(msg, msg)
+		),
+
+	TP_fast_assign(
+		__assign_str(bus_name, devlink->dev->bus->name);
+		__assign_str(dev_name, dev_name(devlink->dev));
+		__assign_str(driver_name, devlink->dev->driver->name);
+		__entry->err = err;
+		__assign_str(msg, msg);
+		),
+
+	TP_printk("bus_name=%s dev_name=%s driver_name=%s err=%d %s",
+			__get_str(bus_name), __get_str(dev_name),
+			__get_str(driver_name), __entry->err, __get_str(msg))
+);
+
 #endif /* _TRACE_DEVLINK_H */
 
 /* This part must be outside protection */
@@ -64,6 +93,10 @@ static inline void trace_devlink_hwmsg(const struct devlink *devlink,
 {
 }
 
+static inline void trace_devlink_hwerr(const struct devlink *devlink,
+				       int err, const char *msg)
+{
+}
 #endif /* _TRACE_DEVLINK_H */
 
 #endif
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 52bf27491fb8..cd0d393bc62d 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -81,6 +81,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
 EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
 
 static LIST_HEAD(devlink_list);
 
-- 
2.20.1


^ permalink raw reply related

* [PATCH net-next 2/2] mlxsw: core: Trace EMAD errors
From: Ido Schimmel @ 2019-02-04 18:47 UTC (permalink / raw)
  To: netdev@vger.kernel.org
  Cc: davem@davemloft.net, Jiri Pirko, Nir Dotan, mlxsw, Ido Schimmel
In-Reply-To: <20190204184649.5292-1-idosch@mellanox.com>

From: Nir Dotan <nird@mellanox.com>

Trace EMAD errors returned from HW.

Signed-off-by: Nir Dotan <nird@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index ddedf8ab5b64..4f6fa515394e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1460,13 +1460,17 @@ static int mlxsw_reg_trans_wait(struct mlxsw_reg_trans *trans)
 	if (trans->retries)
 		dev_warn(mlxsw_core->bus_info->dev, "EMAD retries (%d/%d) (tid=%llx)\n",
 			 trans->retries, MLXSW_EMAD_MAX_RETRY, trans->tid);
-	if (err)
+	if (err) {
 		dev_err(mlxsw_core->bus_info->dev, "EMAD reg access failed (tid=%llx,reg_id=%x(%s),type=%s,status=%x(%s))\n",
 			trans->tid, trans->reg->id,
 			mlxsw_reg_id_str(trans->reg->id),
 			mlxsw_core_reg_access_type_str(trans->type),
 			trans->emad_status,
 			mlxsw_emad_op_tlv_status_str(trans->emad_status));
+		trace_devlink_hwerr(priv_to_devlink(mlxsw_core),
+				    trans->emad_status,
+				    mlxsw_emad_op_tlv_status_str(trans->emad_status));
+	}
 
 	list_del(&trans->bulk_list);
 	kfree_rcu(trans, rcu);
-- 
2.20.1


^ permalink raw reply related

* Re: bpf: BPF_PROG_TEST_RUN leads to unkillable process
From: Y Song @ 2019-02-04 18:53 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Dmitry Vyukov, Alexei Starovoitov, Daniel Borkmann,
	Martin KaFai Lau, songliubraving, Yonghong Song, netdev, LKML,
	syzkaller
In-Reply-To: <20190204174856.GA10769@mini-arch>

On Mon, Feb 4, 2019 at 9:49 AM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> On 02/01, Dmitry Vyukov wrote:
> > Hello,
> >
> > The following program leads to an unkillable process that eats CPU in
> > an infinite loop in BPF_PROG_TEST_RUN syscall. But kernel does not
> > self-detect cpu/rcu/task stalls either. The program contains max
> > number of repetitions, but as far as I see the intention is that it
> > should be killable. I see that bpf_test_run() checks for
> > signal_pending(current), but it does so only if need_resched() is also
> > set. Can need_resched() be not set for prolonged periods of time?
> > /proc/pid/stack is empty, not sure what other info I can provide.
> There is a bunch of places in the kernel where we do the same nested check:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/net/ethernet/broadcom/tg3.c#n12059
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/char/hw_random/s390-trng.c#n80
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/char/random.c#n1049
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/s390/crypto/prng.c#n470
>
> So it's not something unusual we do. OTOH, in the kernel/bpf/verifier.c
> do_check() we do signal_pending() and need_resched() sequentially. In
> theory, it should not hurt to do them in sequence. Any thoughts about
> the patch below? I think we also need to properly return -ERESTARTSYS
> when returning from signal_pending().

I think return value -ERESTARTSYS should be okay.
For the test_run attributes,

        struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
                __u32           prog_fd;
                __u32           retval;
                __u32           data_size_in;   /* input: len of data_in */
                __u32           data_size_out;  /* input/output: len of data_out
                                                 *   returns ENOSPC if data_out
                                                 *   is too small.
                                                 */
                __aligned_u64   data_in;
                __aligned_u64   data_out;
                __u32           repeat;
                __u32           duration;
        } test;

The field data_size_out could be changed during the system call.
But that only happens at bpf_test_finish(). At the time when
-ERESTARTSYS is returned, no attributes have been changed.

>
> --
>
> From ce360c909ce4f3caf8eb69f2ad5ce0d3eee1515d Mon Sep 17 00:00:00 2001
> Message-Id: <ce360c909ce4f3caf8eb69f2ad5ce0d3eee1515d.1549302207.git.sdf@google.com>
> From: Stanislav Fomichev <sdf@google.com>
> Date: Mon, 4 Feb 2019 09:17:37 -0800
> Subject: [PATCH bpf] bpf/test_run: properly handle signal_pending
>
> Syzbot found out that running BPF_PROG_TEST_RUN with repeat=0xffffffff
> makes process unkillable. Let's move signal_pending out of need_resched
> and properly return -ERESTARTSYS to the userspace.
>
> In the kernel/bpf/verifier.c do_check() we do:
>         if (signal_pending())
>                 ...
>         if (need_resched())
>                 ...
>
> Reported-by: syzbot <syzkaller@googlegroups.com>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  net/bpf/test_run.c | 15 +++++++++------
>  1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
> index fa2644d276ef..a891c60cf248 100644
> --- a/net/bpf/test_run.c
> +++ b/net/bpf/test_run.c
> @@ -28,12 +28,13 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
>         return ret;
>  }
>
> -static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
> -                       u32 *time)
> +static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
> +                       u32 *retval, u32 *time)
>  {
>         struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
>         enum bpf_cgroup_storage_type stype;
>         u64 time_start, time_spent = 0;
> +       int ret = 0;
>         u32 i;
>
>         for_each_cgroup_storage_type(stype) {
> @@ -50,10 +51,12 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
>                 repeat = 1;
>         time_start = ktime_get_ns();
>         for (i = 0; i < repeat; i++) {
> -               *ret = bpf_test_run_one(prog, ctx, storage);
> +               *retval = bpf_test_run_one(prog, ctx, storage);
> +               if (signal_pending(current)) {
> +                       ret = -ERESTARTSYS;
> +                       break;
> +               }
>                 if (need_resched()) {
> -                       if (signal_pending(current))
> -                               break;
>                         time_spent += ktime_get_ns() - time_start;
>                         cond_resched();
>                         time_start = ktime_get_ns();
> @@ -66,7 +69,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
>         for_each_cgroup_storage_type(stype)
>                 bpf_cgroup_storage_free(storage[stype]);
>
> -       return 0;
> +       return ret;
>  }
>
>  static int bpf_test_finish(const union bpf_attr *kattr,
>
> >
> > Tested is on upstream commit 4aa9fc2a435abe95a1e8d7f8c7b3d6356514b37a.
> > Config is attached.
> >
> > FTR, generated from the following syzkaller program:
> >
> > r1 = bpf$PROG_LOAD(0x5, &(0x7f0000000080)={0x3, 0x3,
> > &(0x7f0000001fd8)=@framed={{0xffffff85, 0x0, 0x0, 0x0, 0x13, 0x5}},
> > &(0x7f0000000000)='\x00', 0x5, 0x487, &(0x7f000000cf3d)=""/195}, 0x48)
> > bpf$BPF_PROG_TEST_RUN(0xa, &(0x7f0000000200)={r1, 0x0, 0xe, 0x0,
> > &(0x7f0000000100)="8557147d6187677523fea28c88a8", 0x0,
> > 0xfffffffffffffffe}, 0x28)
> >
> >
> > // autogenerated by syzkaller (https://github.com/google/syzkaller)
> > #define _GNU_SOURCE
> > #include <endian.h>
> > #include <stdint.h>
> > #include <stdio.h>
> > #include <stdlib.h>
> > #include <string.h>
> > #include <sys/syscall.h>
> > #include <sys/types.h>
> > #include <unistd.h>
> >
> > int main(void)
> > {
> >   syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0);
> >   long res = 0;
> >   *(uint32_t*)0x20000080 = 3;
> >   *(uint32_t*)0x20000084 = 3;
> >   *(uint64_t*)0x20000088 = 0x20001fd8;
> >   *(uint8_t*)0x20001fd8 = 0x85;
> >   *(uint8_t*)0x20001fd9 = 0x44;
> >   *(uint16_t*)0x20001fda = 0;
> >   *(uint32_t*)0x20001fdc = 0x13;
> >   *(uint8_t*)0x20001fe0 = 5;
> >   *(uint8_t*)0x20001fe1 = 0;
> >   *(uint16_t*)0x20001fe2 = 0;
> >   *(uint32_t*)0x20001fe4 = 0;
> >   *(uint8_t*)0x20001fe8 = 0x95;
> >   *(uint8_t*)0x20001fe9 = 0;
> >   *(uint16_t*)0x20001fea = 0;
> >   *(uint32_t*)0x20001fec = 0;
> >   *(uint64_t*)0x20000090 = 0x20000000;
> >   memcpy((void*)0x20000000, "\000", 1);
> >   *(uint32_t*)0x20000098 = 5;
> >   *(uint32_t*)0x2000009c = 0x487;
> >   *(uint64_t*)0x200000a0 = 0x2000cf3d;
> >   *(uint32_t*)0x200000a8 = 0;
> >   *(uint32_t*)0x200000ac = 0;
> >   *(uint8_t*)0x200000b0 = 0;
> >   *(uint8_t*)0x200000b1 = 0;
> >   *(uint8_t*)0x200000b2 = 0;
> >   *(uint8_t*)0x200000b3 = 0;
> >   *(uint8_t*)0x200000b4 = 0;
> >   *(uint8_t*)0x200000b5 = 0;
> >   *(uint8_t*)0x200000b6 = 0;
> >   *(uint8_t*)0x200000b7 = 0;
> >   *(uint8_t*)0x200000b8 = 0;
> >   *(uint8_t*)0x200000b9 = 0;
> >   *(uint8_t*)0x200000ba = 0;
> >   *(uint8_t*)0x200000bb = 0;
> >   *(uint8_t*)0x200000bc = 0;
> >   *(uint8_t*)0x200000bd = 0;
> >   *(uint8_t*)0x200000be = 0;
> >   *(uint8_t*)0x200000bf = 0;
> >   *(uint32_t*)0x200000c0 = 0;
> >   *(uint32_t*)0x200000c4 = 0;
> >   int fd = syscall(__NR_bpf, 5, 0x20000080, 0x48);
> >   *(uint32_t*)0x20000200 = fd;
> >   *(uint32_t*)0x20000204 = 0;
> >   *(uint32_t*)0x20000208 = 0xe;
> >   *(uint32_t*)0x2000020c = 0;
> >   *(uint64_t*)0x20000210 = 0x20000100;
> >   memcpy((void*)0x20000100,
> >          "\x85\x57\x14\x7d\x61\x87\x67\x75\x23\xfe\xa2\x8c\x88\xa8", 14);
> >   *(uint64_t*)0x20000218 = 0;
> >   *(uint32_t*)0x20000220 = 0xfffffffe;
> >   *(uint32_t*)0x20000224 = 0;
> >   syscall(__NR_bpf, 0xa, 0x20000200, 0x28);
> >   return 0;
> > }
>
>

^ permalink raw reply

* [PATCH bpf-next 0/2] tools/bpf: expose several libbpf API functions
From: Yonghong Song @ 2019-02-04 19:00 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Yonghong Song

This patch set exposed a few functions in libbpf.
All these newly added API functions are helpful for
JIT based bpf compilation where .BTF and .BTF.ext
are available as in-memory data blobs.

Patch #1 exposed several btf_ext__* API functions which
are used to handle .BTF.ext ELF sections.
Patch #2 refactored the function bpf_map_find_btf_info()
and exposed API function btf__get_map_kv_tids() to
retrieve the map key/value type id's generated by
bpf program through BPF_ANNOTATE_KV_PAIR macro.

Yonghong Song (2):
  tools/bpf: expose functions btf_ext__* as API functions
  tools/bpf: implement libbpf btf__get_map_kv_tids() API function

 tools/lib/bpf/btf.c      | 73 ++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/btf.h      | 28 ++++++++-------
 tools/lib/bpf/libbpf.c   | 72 +++++----------------------------------
 tools/lib/bpf/libbpf.map |  7 ++++
 4 files changed, 105 insertions(+), 75 deletions(-)

-- 
2.17.1

^ permalink raw reply

* [PATCH bpf-next 1/2] tools/bpf: expose functions btf_ext__* as API functions
From: Yonghong Song @ 2019-02-04 19:00 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Yonghong Song
In-Reply-To: <20190204190057.3965903-1-yhs@fb.com>

The following set of functions, which manipulates .BTF.ext
section, are exposed as API functions:
  . btf_ext__new
  . btf_ext__free
  . btf_ext__reloc_func_info
  . btf_ext__reloc_line_info
  . btf_ext__func_info_rec_size
  . btf_ext__line_info_rec_size

These functions are useful for JIT based bpf codegen, e.g.,
bcc, to manipulate in-memory .BTF.ext sections.

The signature of function btf_ext__reloc_func_info()
is also changed to be the same as its definition in btf.c.

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/lib/bpf/btf.h      | 24 ++++++++++++------------
 tools/lib/bpf/libbpf.map |  6 ++++++
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index b1e8e54cc21d..418389e2a662 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -67,18 +67,18 @@ LIBBPF_API int btf__fd(const struct btf *btf);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
 
-struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
-void btf_ext__free(struct btf_ext *btf_ext);
-int btf_ext__reloc_func_info(const struct btf *btf,
-			     const struct btf_ext *btf_ext,
-			     const char *sec_name, __u32 insns_cnt,
-			     void **func_info, __u32 *func_info_len);
-int btf_ext__reloc_line_info(const struct btf *btf,
-			     const struct btf_ext *btf_ext,
-			     const char *sec_name, __u32 insns_cnt,
-			     void **line_info, __u32 *cnt);
-__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
-__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
+LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
+LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
+LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf,
+					const struct btf_ext *btf_ext,
+					const char *sec_name, __u32 insns_cnt,
+					void **func_info, __u32 *cnt);
+LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf,
+					const struct btf_ext *btf_ext,
+					const char *sec_name, __u32 insns_cnt,
+					void **line_info, __u32 *cnt);
+LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
+LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
 
 #ifdef __cplusplus
 } /* extern "C" */
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 62c680fb13d1..46441c5f030b 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -133,4 +133,10 @@ LIBBPF_0.0.2 {
 		bpf_map_lookup_elem_flags;
 		bpf_object__find_map_fd_by_name;
 		bpf_get_link_xdp_id;
+		btf_ext__free;
+		btf_ext__func_info_rec_size;
+		btf_ext__line_info_rec_size;
+		btf_ext__new;
+		btf_ext__reloc_func_info;
+		btf_ext__reloc_line_info;
 } LIBBPF_0.0.1;
-- 
2.17.1


^ permalink raw reply related

* [PATCH bpf-next 2/2] tools/bpf: implement libbpf btf__get_map_kv_tids() API function
From: Yonghong Song @ 2019-02-04 19:00 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Yonghong Song
In-Reply-To: <20190204190057.3965903-1-yhs@fb.com>

Currently, to get map key/value type id's, the macro
  BPF_ANNOTATE_KV_PAIR(<map_name>, <key_type>, <value_type>)
needs to be defined in the bpf program for the
corresponding map.

During program/map loading time,
the local static function bpf_map_find_btf_info()
in libbpf.c is implemented to retrieve the key/value
type ids given the map name.

The patch refactored function bpf_map_find_btf_info()
to create an API btf__get_map_kv_tids() which includes
the bulk of implementation for the original function.
The API btf__get_map_kv_tids() can be used by bcc,
a JIT based bpf compilation system, which uses the
same BPF_ANNOTATE_KV_PAIR to record map key/value types.

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/lib/bpf/btf.c      | 73 ++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/btf.h      |  4 +++
 tools/lib/bpf/libbpf.c   | 72 +++++----------------------------------
 tools/lib/bpf/libbpf.map |  1 +
 4 files changed, 87 insertions(+), 63 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 51a0db05bf80..7ec0463354db 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 /* Copyright (c) 2018 Facebook */
 
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
@@ -504,6 +505,78 @@ int btf__get_from_id(__u32 id, struct btf **btf)
 	return err;
 }
 
+int btf__get_map_kv_tids(const struct btf *btf, char *map_name,
+			 __u32 expected_key_size, __u32 expected_value_size,
+			 __u32 *key_type_id, __u32 *value_type_id)
+{
+	const struct btf_type *container_type;
+	const struct btf_member *key, *value;
+	const size_t max_name = 256;
+	char container_name[max_name];
+	__s64 key_size, value_size;
+	__s32 container_id;
+
+	if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
+	    max_name) {
+		pr_warning("map:%s length of '____btf_map_%s' is too long\n",
+			   map_name, map_name);
+		return -EINVAL;
+	}
+
+	container_id = btf__find_by_name(btf, container_name);
+	if (container_id < 0) {
+		pr_warning("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
+			   map_name, container_name);
+		return container_id;
+	}
+
+	container_type = btf__type_by_id(btf, container_id);
+	if (!container_type) {
+		pr_warning("map:%s cannot find BTF type for container_id:%u\n",
+			   map_name, container_id);
+		return -EINVAL;
+	}
+
+	if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT ||
+	    BTF_INFO_VLEN(container_type->info) < 2) {
+		pr_warning("map:%s container_name:%s is an invalid container struct\n",
+			   map_name, container_name);
+		return -EINVAL;
+	}
+
+	key = (struct btf_member *)(container_type + 1);
+	value = key + 1;
+
+	key_size = btf__resolve_size(btf, key->type);
+	if (key_size < 0) {
+		pr_warning("map:%s invalid BTF key_type_size\n", map_name);
+		return key_size;
+	}
+
+	if (expected_key_size != key_size) {
+		pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
+			   map_name, (__u32)key_size, expected_key_size);
+		return -EINVAL;
+	}
+
+	value_size = btf__resolve_size(btf, value->type);
+	if (value_size < 0) {
+		pr_warning("map:%s invalid BTF value_type_size\n", map_name);
+		return value_size;
+	}
+
+	if (expected_value_size != value_size) {
+		pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
+			   map_name, (__u32)value_size, expected_value_size);
+		return -EINVAL;
+	}
+
+	*key_type_id = key->type;
+	*value_type_id = value->type;
+
+	return 0;
+}
+
 struct btf_ext_sec_copy_param {
 	__u32 off;
 	__u32 len;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 418389e2a662..258c87e9f55d 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -66,6 +66,10 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__fd(const struct btf *btf);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
+LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, char *map_name,
+				    __u32 expected_key_size,
+				    __u32 expected_value_size,
+				    __u32 *key_type_id, __u32 *value_type_id);
 
 LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
 LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ce209ab9a1a2..84ca6c2bea91 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1056,72 +1056,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 
 static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
 {
-	const struct btf_type *container_type;
-	const struct btf_member *key, *value;
 	struct bpf_map_def *def = &map->def;
-	const size_t max_name = 256;
-	char container_name[max_name];
-	__s64 key_size, value_size;
-	__s32 container_id;
-
-	if (snprintf(container_name, max_name, "____btf_map_%s", map->name) ==
-	    max_name) {
-		pr_warning("map:%s length of '____btf_map_%s' is too long\n",
-			   map->name, map->name);
-		return -EINVAL;
-	}
-
-	container_id = btf__find_by_name(btf, container_name);
-	if (container_id < 0) {
-		pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
-			 map->name, container_name);
-		return container_id;
-	}
-
-	container_type = btf__type_by_id(btf, container_id);
-	if (!container_type) {
-		pr_warning("map:%s cannot find BTF type for container_id:%u\n",
-			   map->name, container_id);
-		return -EINVAL;
-	}
-
-	if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT ||
-	    BTF_INFO_VLEN(container_type->info) < 2) {
-		pr_warning("map:%s container_name:%s is an invalid container struct\n",
-			   map->name, container_name);
-		return -EINVAL;
-	}
-
-	key = (struct btf_member *)(container_type + 1);
-	value = key + 1;
-
-	key_size = btf__resolve_size(btf, key->type);
-	if (key_size < 0) {
-		pr_warning("map:%s invalid BTF key_type_size\n",
-			   map->name);
-		return key_size;
-	}
-
-	if (def->key_size != key_size) {
-		pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
-			   map->name, (__u32)key_size, def->key_size);
-		return -EINVAL;
-	}
-
-	value_size = btf__resolve_size(btf, value->type);
-	if (value_size < 0) {
-		pr_warning("map:%s invalid BTF value_type_size\n", map->name);
-		return value_size;
-	}
+	__u32 key_type_id, value_type_id;
+	int ret;
 
-	if (def->value_size != value_size) {
-		pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
-			   map->name, (__u32)value_size, def->value_size);
-		return -EINVAL;
-	}
+	ret = btf__get_map_kv_tids(btf, map->name, def->key_size,
+				   def->value_size, &key_type_id,
+				   &value_type_id);
+	if (ret)
+		return ret;
 
-	map->btf_key_type_id = key->type;
-	map->btf_value_type_id = value->type;
+	map->btf_key_type_id = key_type_id;
+	map->btf_value_type_id = value_type_id;
 
 	return 0;
 }
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 46441c5f030b..7990e857e003 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -133,6 +133,7 @@ LIBBPF_0.0.2 {
 		bpf_map_lookup_elem_flags;
 		bpf_object__find_map_fd_by_name;
 		bpf_get_link_xdp_id;
+		btf__get_map_kv_tids;
 		btf_ext__free;
 		btf_ext__func_info_rec_size;
 		btf_ext__line_info_rec_size;
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH v2 1/2] r8169: Load MAC address from device tree if present
From: Heiner Kallweit @ 2019-02-04 19:30 UTC (permalink / raw)
  To: Thierry Reding, David S. Miller
  Cc: Andrew Lunn, Realtek linux nic maintainers, netdev, linux-kernel,
	devicetree
In-Reply-To: <20190204164213.30727-1-thierry.reding@gmail.com>

On 04.02.2019 17:42, Thierry Reding wrote:
> From: Thierry Reding <treding@nvidia.com>
> 
> If the system was booted using a device tree and if the device tree
> contains a MAC address, use it instead of reading one from the EEPROM.
> This is useful in situations where the EEPROM isn't properly programmed
> or where the firmware wants to override the existing MAC address.
> 
> Signed-off-by: Thierry Reding <treding@nvidia.com>

Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>


^ permalink raw reply

* Re: [PATCH] net: phylink: dsa: mv88e6xxx: Revise irq setup ordering
From: Andrew Lunn @ 2019-02-04 19:35 UTC (permalink / raw)
  To: John David Anglin; +Cc: Russell King, Vivien Didelot, Florian Fainelli, netdev
In-Reply-To: <53b49df8-53ed-704f-9197-230b18d83090@bell.net>

On Mon, Feb 04, 2019 at 01:37:13PM -0500, John David Anglin wrote:
> This change fixes a race condition in the setup of hardware irqs and the
> code enabling PHY link
> detection.
> 
> This was observed on the espressobin board where the GPIO interrupt
> controller only supports edge
> interrupts.  If the INTn output pin goes low before the GPIO interrupt
> is enabled, PHY link interrupts
> are not detected.

Hi David

Please break this up into two patches.

Masking interrupts in the setup code before enabling the interrupt i'm
happy with.

The change to the interrupt handler i'm pretty sure is wrong. You have
to accept with edge interrupts you are going to loose interrupts.

    Andrew

^ permalink raw reply

* Re: [ovs-dev] [PATCH net-next V2 1/1] openvswitch: Declare ovs key structures using macros
From: Gregory Rose @ 2019-02-04 19:41 UTC (permalink / raw)
  To: Eli Britstein, Pravin B Shelar; +Cc: dev, netdev, Simon Horman, David S. Miller
In-Reply-To: <20190203091217.8459-1-elibr@mellanox.com>


On 2/3/2019 1:12 AM, Eli Britstein wrote:
> Declare ovs key structures using macros as a pre-step towards to
> enable retrieving fields information, as a work done in proposed
> commit in the OVS tree https://patchwork.ozlabs.org/patch/1023406/
> ("odp-util: Do not rewrite fields with the same values as matched"),
> with no functional change.
>
> Signed-off-by: Eli Britstein <elibr@mellanox.com>
> Reviewed-by: Roi Dayan <roid@mellanox.com>

Obscuring the structures with these macros is awful.  I'm opposed but I 
see it has already been
accepted upstream so I guess that's that.

Ugh...

- Greg

> ---
>   include/uapi/linux/openvswitch.h | 102 ++++++++++++++++++++++++++-------------
>   1 file changed, 69 insertions(+), 33 deletions(-)
>
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index dbe0cbe4f1b7..dc8246f871fd 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -387,73 +387,109 @@ enum ovs_frag_type {
>   
>   #define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
>   
> +#define OVS_KEY_FIELD_ARR(type, name, elements) type name[elements];
> +#define OVS_KEY_FIELD(type, name) type name;
> +
> +#define OVS_KEY_ETHERNET_FIELDS \
> +    OVS_KEY_FIELD_ARR(__u8, eth_src, ETH_ALEN) \
> +    OVS_KEY_FIELD_ARR(__u8, eth_dst, ETH_ALEN)
> +
>   struct ovs_key_ethernet {
> -	__u8	 eth_src[ETH_ALEN];
> -	__u8	 eth_dst[ETH_ALEN];
> +    OVS_KEY_ETHERNET_FIELDS
>   };
>   
>   struct ovs_key_mpls {
>   	__be32 mpls_lse;
>   };
>   
> +#define OVS_KEY_IPV4_FIELDS \
> +    OVS_KEY_FIELD(__be32, ipv4_src) \
> +    OVS_KEY_FIELD(__be32, ipv4_dst) \
> +    OVS_KEY_FIELD(__u8, ipv4_proto) \
> +    OVS_KEY_FIELD(__u8, ipv4_tos) \
> +    OVS_KEY_FIELD(__u8, ipv4_ttl) \
> +    OVS_KEY_FIELD(__u8, ipv4_frag /* One of OVS_FRAG_TYPE_*. */)
> +
>   struct ovs_key_ipv4 {
> -	__be32 ipv4_src;
> -	__be32 ipv4_dst;
> -	__u8   ipv4_proto;
> -	__u8   ipv4_tos;
> -	__u8   ipv4_ttl;
> -	__u8   ipv4_frag;	/* One of OVS_FRAG_TYPE_*. */
> +    OVS_KEY_IPV4_FIELDS
>   };
>   
> +#define OVS_KEY_IPV6_FIELDS \
> +    OVS_KEY_FIELD_ARR(__be32, ipv6_src, 4) \
> +    OVS_KEY_FIELD_ARR(__be32, ipv6_dst, 4) \
> +    OVS_KEY_FIELD(__be32, ipv6_label /* 20-bits in least-significant bits. */) \
> +    OVS_KEY_FIELD(__u8, ipv6_proto) \
> +    OVS_KEY_FIELD(__u8, ipv6_tclass) \
> +    OVS_KEY_FIELD(__u8, ipv6_hlimit) \
> +    OVS_KEY_FIELD(__u8, ipv6_frag /* One of OVS_FRAG_TYPE_*. */)
> +
>   struct ovs_key_ipv6 {
> -	__be32 ipv6_src[4];
> -	__be32 ipv6_dst[4];
> -	__be32 ipv6_label;	/* 20-bits in least-significant bits. */
> -	__u8   ipv6_proto;
> -	__u8   ipv6_tclass;
> -	__u8   ipv6_hlimit;
> -	__u8   ipv6_frag;	/* One of OVS_FRAG_TYPE_*. */
> +    OVS_KEY_IPV6_FIELDS
>   };
>   
> +#define OVS_KEY_TCP_FIELDS \
> +    OVS_KEY_FIELD(__be16, tcp_src) \
> +    OVS_KEY_FIELD(__be16, tcp_dst)
> +
>   struct ovs_key_tcp {
> -	__be16 tcp_src;
> -	__be16 tcp_dst;
> +    OVS_KEY_TCP_FIELDS
>   };
>   
> +#define OVS_KEY_UDP_FIELDS \
> +    OVS_KEY_FIELD(__be16, udp_src) \
> +    OVS_KEY_FIELD(__be16, udp_dst)
> +
>   struct ovs_key_udp {
> -	__be16 udp_src;
> -	__be16 udp_dst;
> +    OVS_KEY_UDP_FIELDS
>   };
>   
> +#define OVS_KEY_SCTP_FIELDS \
> +    OVS_KEY_FIELD(__be16, sctp_src) \
> +    OVS_KEY_FIELD(__be16, sctp_dst)
> +
>   struct ovs_key_sctp {
> -	__be16 sctp_src;
> -	__be16 sctp_dst;
> +    OVS_KEY_SCTP_FIELDS
>   };
>   
> +#define OVS_KEY_ICMP_FIELDS \
> +    OVS_KEY_FIELD(__u8, icmp_type) \
> +    OVS_KEY_FIELD(__u8, icmp_code)
> +
>   struct ovs_key_icmp {
> -	__u8 icmp_type;
> -	__u8 icmp_code;
> +    OVS_KEY_ICMP_FIELDS
>   };
>   
> +#define OVS_KEY_ICMPV6_FIELDS \
> +    OVS_KEY_FIELD(__u8, icmpv6_type) \
> +    OVS_KEY_FIELD(__u8, icmpv6_code)
> +
>   struct ovs_key_icmpv6 {
> -	__u8 icmpv6_type;
> -	__u8 icmpv6_code;
> +    OVS_KEY_ICMPV6_FIELDS
>   };
>   
> +#define OVS_KEY_ARP_FIELDS \
> +    OVS_KEY_FIELD(__be32, arp_sip) \
> +    OVS_KEY_FIELD(__be32, arp_tip) \
> +    OVS_KEY_FIELD(__be16, arp_op) \
> +    OVS_KEY_FIELD_ARR(__u8, arp_sha, ETH_ALEN) \
> +    OVS_KEY_FIELD_ARR(__u8, arp_tha, ETH_ALEN)
> +
>   struct ovs_key_arp {
> -	__be32 arp_sip;
> -	__be32 arp_tip;
> -	__be16 arp_op;
> -	__u8   arp_sha[ETH_ALEN];
> -	__u8   arp_tha[ETH_ALEN];
> +    OVS_KEY_ARP_FIELDS
>   };
>   
> +#define OVS_KEY_ND_FIELDS \
> +    OVS_KEY_FIELD_ARR(__be32, nd_target, 4) \
> +    OVS_KEY_FIELD_ARR(__u8, nd_sll, ETH_ALEN) \
> +    OVS_KEY_FIELD_ARR(__u8, nd_tll, ETH_ALEN)
> +
>   struct ovs_key_nd {
> -	__be32	nd_target[4];
> -	__u8	nd_sll[ETH_ALEN];
> -	__u8	nd_tll[ETH_ALEN];
> +    OVS_KEY_ND_FIELDS
>   };
>   
> +#undef OVS_KEY_FIELD_ARR
> +#undef OVS_KEY_FIELD
> +
>   #define OVS_CT_LABELS_LEN_32	4
>   #define OVS_CT_LABELS_LEN	(OVS_CT_LABELS_LEN_32 * sizeof(__u32))
>   struct ovs_key_ct_labels {


^ permalink raw reply

* Re: [PATCH] net: phylink: dsa: mv88e6xxx: Revise irq setup ordering
From: John David Anglin @ 2019-02-04 19:52 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: Russell King, Vivien Didelot, Florian Fainelli, netdev
In-Reply-To: <20190204193518.GE24989@lunn.ch>

Hi Andrew,

On 2019-02-04 2:35 p.m., Andrew Lunn wrote:
> The change to the interrupt handler i'm pretty sure is wrong. You have
> to accept with edge interrupts you are going to loose interrupts.
Can you be more specific regarding what you think is wrong with this hunk?

I can see that an interrupt might be handled twice if the source wasn't
cleared before interrupts
are re-enabled but I think that would also occur with level interrupts.

Dave

-- 
John David Anglin  dave.anglin@bell.net



^ permalink raw reply

* Re: [PATCH bpf-next 1/3] bpf, riscv: add BPF JIT for RV64G
From: Daniel Borkmann @ 2019-02-04 20:06 UTC (permalink / raw)
  To: bjorn.topel, linux-riscv, ast, netdev; +Cc: palmer, hch
In-Reply-To: <20190203115132.8766-2-bjorn.topel@gmail.com>

On 02/03/2019 12:51 PM, bjorn.topel@gmail.com wrote:
> From: Björn Töpel <bjorn.topel@gmail.com>
> 
> This commit adds BPF JIT for RV64G.
> 
> The JIT is a two-pass JIT, and has a dynamic prolog/epilogue (similar
> to the MIPS64 BPF JIT) instead of static ones (e.g. x86_64).
> 
> At the moment the RISC-V Linux port does not support HAVE_KPROBES,
> which means that CONFIG_BPF_EVENTS is not supported. Thus, no tests
> involving BPF_PROG_TYPE_TRACEPOINT passes.
> 
> Further, the implementation does not support "far branching" (>4KiB).
> 
> The implementation passes all the test_bpf.ko tests:
>   test_bpf: Summary: 378 PASSED, 0 FAILED, [366/366 JIT'ed]
> 
> All the tail_call tests in the selftest/bpf/test_verifier program
> passes.
> 
> All tests where done on QEMU (QEMU emulator version 3.1.50
> (v3.1.0-688-g8ae951fbc106)).
> 
> Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>

Some minor comments:

Looks like all the BPF_JMP32 instructions are missing. Would probably
make sense to include these into the initial merge as well unless there
is some good reason not to; presumably the test_verifier parts with
BPF_JMP32 haven't been tried out?

[...]
> +
> +enum {
> +	RV_CTX_F_SEEN_TAIL_CALL =	0,
> +	RV_CTX_F_SEEN_CALL =		RV_REG_RA,
> +	RV_CTX_F_SEEN_S1 =		RV_REG_S1,
> +	RV_CTX_F_SEEN_S2 =		RV_REG_S2,
> +	RV_CTX_F_SEEN_S3 =		RV_REG_S3,
> +	RV_CTX_F_SEEN_S4 =		RV_REG_S4,
> +	RV_CTX_F_SEEN_S5 =		RV_REG_S5,
> +	RV_CTX_F_SEEN_S6 =		RV_REG_S6,
> +};
> +
> +struct rv_jit_context {
> +	struct bpf_prog *prog;
> +	u32 *insns; /* RV insns */
> +	int ninsns;
> +	int epilogue_offset;
> +	int *offset; /* BPF to RV */
> +	unsigned long flags;
> +	int stack_size;
> +};
> +
> +struct rv_jit_data {
> +	struct bpf_binary_header *header;
> +	u8 *image;
> +	struct rv_jit_context ctx;
> +};
> +
> +static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx)
> +{
> +	u8 reg = regmap[bpf_reg];
> +
> +	switch (reg) {
> +	case RV_CTX_F_SEEN_S1:
> +	case RV_CTX_F_SEEN_S2:
> +	case RV_CTX_F_SEEN_S3:
> +	case RV_CTX_F_SEEN_S4:
> +	case RV_CTX_F_SEEN_S5:
> +	case RV_CTX_F_SEEN_S6:
> +		__set_bit(reg, &ctx->flags);
> +	}
> +	return reg;
> +};
> +
> +static bool seen_reg(int reg, struct rv_jit_context *ctx)
> +{
> +	switch (reg) {
> +	case RV_CTX_F_SEEN_CALL:
> +	case RV_CTX_F_SEEN_S1:
> +	case RV_CTX_F_SEEN_S2:
> +	case RV_CTX_F_SEEN_S3:
> +	case RV_CTX_F_SEEN_S4:
> +	case RV_CTX_F_SEEN_S5:
> +	case RV_CTX_F_SEEN_S6:
> +		return test_bit(reg, &ctx->flags);
> +	}
> +	return false;
> +}
> +
> +static void mark_call(struct rv_jit_context *ctx)
> +{
> +	__set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
> +}
> +
> +static bool seen_call(struct rv_jit_context *ctx)
> +{
> +	return seen_reg(RV_REG_RA, ctx);
> +}

Just nit: probably might be more obvious to remove this asymmetry in
seen_reg() and do __set_bit()/test_bit() for RV_CTX_F_SEEN_CALL similar
like below.

> +static void mark_tail_call(struct rv_jit_context *ctx)
> +{
> +	__set_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags);
> +}
> +
> +static bool seen_tail_call(struct rv_jit_context *ctx)
> +{
> +	return test_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags);
> +}
> +
> +static u8 rv_tail_call_reg(struct rv_jit_context *ctx)
> +{
> +	mark_tail_call(ctx);
> +
> +	if (seen_call(ctx)) {
> +		__set_bit(RV_CTX_F_SEEN_S6, &ctx->flags);
> +		return RV_REG_S6;
> +	}
> +	return RV_REG_A6;
> +}
> +
> +static void emit(const u32 insn, struct rv_jit_context *ctx)
> +{
> +	if (ctx->insns)
> +		ctx->insns[ctx->ninsns] = insn;
> +
> +	ctx->ninsns++;
> +}
> +
> +static u32 rv_r_insn(u8 funct7, u8 rs2, u8 rs1, u8 funct3, u8 rd, u8 opcode)
> +{
[...]
> +	/* Allocate image, now that we know the size. */
> +	image_size = sizeof(u32) * ctx->ninsns;
> +	jit_data->header = bpf_jit_binary_alloc(image_size, &jit_data->image,
> +						sizeof(u32),
> +						bpf_fill_ill_insns);
> +	if (!jit_data->header) {
> +		prog = orig_prog;
> +		goto out_offset;
> +	}
> +
> +	/* Second, real pass, that acutally emits the image. */
> +	ctx->insns = (u32 *)jit_data->image;
> +skip_init_ctx:
> +	ctx->ninsns = 0;
> +
> +	build_prologue(ctx);
> +	if (build_body(ctx, extra_pass)) {
> +		bpf_jit_binary_free(jit_data->header);
> +		prog = orig_prog;
> +		goto out_offset;
> +	}
> +	build_epilogue(ctx);
> +
> +	if (bpf_jit_enable > 1)
> +		bpf_jit_dump(prog->len, image_size, 2, ctx->insns);
> +
> +	prog->bpf_func = (void *)ctx->insns;
> +	prog->jited = 1;
> +	prog->jited_len = image_size;
> +
> +	bpf_flush_icache(jit_data->header, (u8 *)ctx->insns + ctx->ninsns);

Shouldn't this be '(u32 *)ctx->insns + ctx->ninsns' to cover the range?

> +
> +	if (!prog->is_func || extra_pass) {
> +out_offset:
> +		kfree(ctx->offset);
> +		kfree(jit_data);
> +		prog->aux->jit_data = NULL;
> +	}
> +out:
> +	if (tmp_blinded)
> +		bpf_jit_prog_release_other(prog, prog == orig_prog ?
> +					   tmp : orig_prog);
> +	return prog;
> +}
> 


^ permalink raw reply

* Re: [ovs-dev] [PATCH net-next V2 1/1] openvswitch: Declare ovs key structures using macros
From: David Miller @ 2019-02-04 20:07 UTC (permalink / raw)
  To: yihung.wei; +Cc: elibr, pshelar, dev, netdev, simon.horman
In-Reply-To: <CAG1aQhJVzbYnYNb-mWH4s6+23jOnDz9UVYTPtBmNkhKAfowrQg@mail.gmail.com>

From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Mon, 4 Feb 2019 10:47:18 -0800

> For example, to see how 'struct ovs_key_ipv6' is defined, now we need
> to trace how OVS_KEY_IPV6_FIELDS is defined, and how OVS_KEY_FIELD_ARR
> and OVS_KEY_FIELD defined.  I think it makes the header file to be
> more complicated.

I completely agree.

Unless this is totally unavoidable, I do not want to apply a patch
which makes reading and auditing the networking code more difficult.

^ permalink raw reply

* Re: [ovs-dev] [PATCH net-next V2 1/1] openvswitch: Declare ovs key structures using macros
From: David Miller @ 2019-02-04 20:09 UTC (permalink / raw)
  To: gvrose8192; +Cc: elibr, pshelar, dev, netdev, simon.horman
In-Reply-To: <0bbab6ba-5892-e0c3-8290-6d844414ca2e@gmail.com>

From: Gregory Rose <gvrose8192@gmail.com>
Date: Mon, 4 Feb 2019 11:41:29 -0800

> 
> On 2/3/2019 1:12 AM, Eli Britstein wrote:
>> Declare ovs key structures using macros as a pre-step towards to
>> enable retrieving fields information, as a work done in proposed
>> commit in the OVS tree https://patchwork.ozlabs.org/patch/1023406/
>> ("odp-util: Do not rewrite fields with the same values as matched"),
>> with no functional change.
>>
>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
>> Reviewed-by: Roi Dayan <roid@mellanox.com>
> 
> Obscuring the structures with these macros is awful.  I'm opposed but
> I see it has already been
> accepted upstream so I guess that's that.

I am personally in no way obligated to apply this patch to my tree
just because "upstream" did, and I absolutely have no plans to do so
at this point.

This patch is absolutely awful.

^ permalink raw reply

* Re: [PATCH bpf-next 3/3] bpf, doc: add RISC-V to filter.txt
From: Daniel Borkmann @ 2019-02-04 20:09 UTC (permalink / raw)
  To: bjorn.topel, linux-riscv, ast, netdev; +Cc: palmer, hch
In-Reply-To: <20190203115132.8766-4-bjorn.topel@gmail.com>

On 02/03/2019 12:51 PM, bjorn.topel@gmail.com wrote:
> From: Björn Töpel <bjorn.topel@gmail.com>
> 
> Update Documentation/networking/filter.txt to mention RISC-V.
> 
> Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>

Nit: in Documentation/sysctl/net.txt under bpf_jit_enable there is also
a concrete list of eBPF/cBPF JITs, would be good to add riscv64 there as
well.

>  Documentation/networking/filter.txt | 16 +++++++++-------
>  1 file changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
> index 01603bc2eff1..b5e060edfc38 100644
> --- a/Documentation/networking/filter.txt
> +++ b/Documentation/networking/filter.txt
> @@ -464,10 +464,11 @@ breakpoints: 0 1
>  JIT compiler
>  ------------
>  
> -The Linux kernel has a built-in BPF JIT compiler for x86_64, SPARC, PowerPC,
> -ARM, ARM64, MIPS and s390 and can be enabled through CONFIG_BPF_JIT. The JIT
> -compiler is transparently invoked for each attached filter from user space
> -or for internal kernel users if it has been previously enabled by root:
> +The Linux kernel has a built-in BPF JIT compiler for x86_64, SPARC,
> +PowerPC, ARM, ARM64, MIPS, RISC-V and s390 and can be enabled through
> +CONFIG_BPF_JIT. The JIT compiler is transparently invoked for each
> +attached filter from user space or for internal kernel users if it has
> +been previously enabled by root:
>  
>    echo 1 > /proc/sys/net/core/bpf_jit_enable
>  
> @@ -603,9 +604,10 @@ got from bpf_prog_create(), and 'ctx' the given context (e.g.
>  skb pointer). All constraints and restrictions from bpf_check_classic() apply
>  before a conversion to the new layout is being done behind the scenes!
>  
> -Currently, the classic BPF format is being used for JITing on most 32-bit
> -architectures, whereas x86-64, aarch64, s390x, powerpc64, sparc64, arm32 perform
> -JIT compilation from eBPF instruction set.
> +Currently, the classic BPF format is being used for JITing on most
> +32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64,
> +sparc64, arm32, riscv (RV64G) perform JIT compilation from eBPF
> +instruction set.
>  
>  Some core changes of the new internal format:
>  
> 


^ permalink raw reply

* Re: [PATCH bpf-next 3/3] bpf, doc: add RISC-V to filter.txt
From: Björn Töpel @ 2019-02-04 20:16 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: linux-riscv, ast, Netdev, Palmer Dabbelt, Christoph Hellwig
In-Reply-To: <d033e68a-b520-c25d-e951-761693c16fe7@iogearbox.net>

Den mån 4 feb. 2019 kl 21:09 skrev Daniel Borkmann <daniel@iogearbox.net>:
>
> On 02/03/2019 12:51 PM, bjorn.topel@gmail.com wrote:
> > From: Björn Töpel <bjorn.topel@gmail.com>
> >
> > Update Documentation/networking/filter.txt to mention RISC-V.
> >
> > Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>
>
> Nit: in Documentation/sysctl/net.txt under bpf_jit_enable there is also
> a concrete list of eBPF/cBPF JITs, would be good to add riscv64 there as
> well.
>

Thanks for pointing this out! I'll do a v2.


Björn

> >  Documentation/networking/filter.txt | 16 +++++++++-------
> >  1 file changed, 9 insertions(+), 7 deletions(-)
> >
> > diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
> > index 01603bc2eff1..b5e060edfc38 100644
> > --- a/Documentation/networking/filter.txt
> > +++ b/Documentation/networking/filter.txt
> > @@ -464,10 +464,11 @@ breakpoints: 0 1
> >  JIT compiler
> >  ------------
> >
> > -The Linux kernel has a built-in BPF JIT compiler for x86_64, SPARC, PowerPC,
> > -ARM, ARM64, MIPS and s390 and can be enabled through CONFIG_BPF_JIT. The JIT
> > -compiler is transparently invoked for each attached filter from user space
> > -or for internal kernel users if it has been previously enabled by root:
> > +The Linux kernel has a built-in BPF JIT compiler for x86_64, SPARC,
> > +PowerPC, ARM, ARM64, MIPS, RISC-V and s390 and can be enabled through
> > +CONFIG_BPF_JIT. The JIT compiler is transparently invoked for each
> > +attached filter from user space or for internal kernel users if it has
> > +been previously enabled by root:
> >
> >    echo 1 > /proc/sys/net/core/bpf_jit_enable
> >
> > @@ -603,9 +604,10 @@ got from bpf_prog_create(), and 'ctx' the given context (e.g.
> >  skb pointer). All constraints and restrictions from bpf_check_classic() apply
> >  before a conversion to the new layout is being done behind the scenes!
> >
> > -Currently, the classic BPF format is being used for JITing on most 32-bit
> > -architectures, whereas x86-64, aarch64, s390x, powerpc64, sparc64, arm32 perform
> > -JIT compilation from eBPF instruction set.
> > +Currently, the classic BPF format is being used for JITing on most
> > +32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64,
> > +sparc64, arm32, riscv (RV64G) perform JIT compilation from eBPF
> > +instruction set.
> >
> >  Some core changes of the new internal format:
> >
> >
>

^ permalink raw reply

* Re: [PATCH] net: phylink: dsa: mv88e6xxx: Revise irq setup ordering
From: Andrew Lunn @ 2019-02-04 20:19 UTC (permalink / raw)
  To: John David Anglin; +Cc: Russell King, Vivien Didelot, Florian Fainelli, netdev
In-Reply-To: <2d8c0eff-00cd-31c7-9906-89ff9d3c7dd4@bell.net>

> Can you be more specific regarding what you think is wrong with this hunk?

Hi David

The IRQ core would do this if it was needed.

How many other irq thread work functions can you point to which do
something similar?

	  Andrew

^ permalink raw reply

* Re: [PATCH bpf-next] bpf: support SO_DEBUG in bpf_setsockopt()
From: Daniel Borkmann @ 2019-02-04 20:23 UTC (permalink / raw)
  To: Alexei Starovoitov, Yafang Shao; +Cc: kafai, brakmo, ast, netdev, shaoyafang
In-Reply-To: <20190204173517.6o5v7yd5yn7pxjzi@ast-mbp.dhcp.thefacebook.com>

On 02/04/2019 06:35 PM, Alexei Starovoitov wrote:
> On Sun, Feb 03, 2019 at 04:15:07PM +0800, Yafang Shao wrote:
>> Then we can enable/disable socket debugging without modifying user code.
>> That is more convenient for debugging.
>>
>> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
>> ---
>>  include/net/sock.h | 8 ++++++++
>>  net/core/filter.c  | 3 +++
>>  net/core/sock.c    | 8 --------
>>  3 files changed, 11 insertions(+), 8 deletions(-)
>>
>> diff --git a/include/net/sock.h b/include/net/sock.h
>> index 2b229f7..8decee9 100644
>> --- a/include/net/sock.h
>> +++ b/include/net/sock.h
>> @@ -1935,6 +1935,14 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
>>  	}
>>  }
>>  
>> +static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
>> +{
>> +	if (valbool)
>> +		sock_set_flag(sk, bit);
>> +	else
>> +		sock_reset_flag(sk, bit);
>> +}
>> +
>>  bool sk_mc_loop(struct sock *sk);
>>  
>>  static inline bool sk_can_gso(const struct sock *sk)
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 3a49f68..ce5da57 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -4111,6 +4111,9 @@ static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
>>  
>>  		/* Only some socketops are supported */
>>  		switch (optname) {
>> +		case SO_DEBUG:
>> +			sock_valbool_flag(sk, SOCK_DBG, val);
>> +			break;
> 
> I'm missing the point here.
> This flag has any effect only when SOCK_DEBUGGING is set.
> But it is off in distros.
> Since it's for custom debug kernel only why bother with
> setting the flag via bpf prog?

+1, this seems like some ancient debugging interface. Back at last netconf
there was a proposal [0] to have a tcp_stats(sk, TCP_MIB_...) API for MIBs
counter such that this can be traced via BPF on a per socket basis, for
example. Might be worthwhile to work into that direction instead and potentially
get rid of the SOCK_DEBUG() statements and convert (where appropriate) to
such an interface. Thoughts?

  [0] page 14, http://vger.kernel.org/netconf2018_files/BrendanGregg_netconf2018.pdf

^ permalink raw reply

* Re: [PATCH bpf-next 1/3] bpf, riscv: add BPF JIT for RV64G
From: Björn Töpel @ 2019-02-04 20:27 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: linux-riscv, ast, Netdev, Palmer Dabbelt, Christoph Hellwig
In-Reply-To: <88cdae60-494e-6294-b2c1-10b9cbeb95ac@iogearbox.net>

Den mån 4 feb. 2019 kl 21:06 skrev Daniel Borkmann <daniel@iogearbox.net>:
>
> On 02/03/2019 12:51 PM, bjorn.topel@gmail.com wrote:
> > From: Björn Töpel <bjorn.topel@gmail.com>
> >
> > This commit adds BPF JIT for RV64G.
> >
> > The JIT is a two-pass JIT, and has a dynamic prolog/epilogue (similar
> > to the MIPS64 BPF JIT) instead of static ones (e.g. x86_64).
> >
> > At the moment the RISC-V Linux port does not support HAVE_KPROBES,
> > which means that CONFIG_BPF_EVENTS is not supported. Thus, no tests
> > involving BPF_PROG_TYPE_TRACEPOINT passes.
> >
> > Further, the implementation does not support "far branching" (>4KiB).
> >
> > The implementation passes all the test_bpf.ko tests:
> >   test_bpf: Summary: 378 PASSED, 0 FAILED, [366/366 JIT'ed]
> >
> > All the tail_call tests in the selftest/bpf/test_verifier program
> > passes.
> >
> > All tests where done on QEMU (QEMU emulator version 3.1.50
> > (v3.1.0-688-g8ae951fbc106)).
> >
> > Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>
>
> Some minor comments:
>
> Looks like all the BPF_JMP32 instructions are missing. Would probably
> make sense to include these into the initial merge as well unless there
> is some good reason not to; presumably the test_verifier parts with
> BPF_JMP32 haven't been tried out?
>

Yes indeed. My bad, I didn't realize that Jiong's patches were in the
tree! BPF_JMP32 should definitely be in the initial merge.

> [...]
> > +
> > +enum {
> > +     RV_CTX_F_SEEN_TAIL_CALL =       0,
> > +     RV_CTX_F_SEEN_CALL =            RV_REG_RA,
> > +     RV_CTX_F_SEEN_S1 =              RV_REG_S1,
> > +     RV_CTX_F_SEEN_S2 =              RV_REG_S2,
> > +     RV_CTX_F_SEEN_S3 =              RV_REG_S3,
> > +     RV_CTX_F_SEEN_S4 =              RV_REG_S4,
> > +     RV_CTX_F_SEEN_S5 =              RV_REG_S5,
> > +     RV_CTX_F_SEEN_S6 =              RV_REG_S6,
> > +};
> > +
> > +struct rv_jit_context {
> > +     struct bpf_prog *prog;
> > +     u32 *insns; /* RV insns */
> > +     int ninsns;
> > +     int epilogue_offset;
> > +     int *offset; /* BPF to RV */
> > +     unsigned long flags;
> > +     int stack_size;
> > +};
> > +
> > +struct rv_jit_data {
> > +     struct bpf_binary_header *header;
> > +     u8 *image;
> > +     struct rv_jit_context ctx;
> > +};
> > +
> > +static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx)
> > +{
> > +     u8 reg = regmap[bpf_reg];
> > +
> > +     switch (reg) {
> > +     case RV_CTX_F_SEEN_S1:
> > +     case RV_CTX_F_SEEN_S2:
> > +     case RV_CTX_F_SEEN_S3:
> > +     case RV_CTX_F_SEEN_S4:
> > +     case RV_CTX_F_SEEN_S5:
> > +     case RV_CTX_F_SEEN_S6:
> > +             __set_bit(reg, &ctx->flags);
> > +     }
> > +     return reg;
> > +};
> > +
> > +static bool seen_reg(int reg, struct rv_jit_context *ctx)
> > +{
> > +     switch (reg) {
> > +     case RV_CTX_F_SEEN_CALL:
> > +     case RV_CTX_F_SEEN_S1:
> > +     case RV_CTX_F_SEEN_S2:
> > +     case RV_CTX_F_SEEN_S3:
> > +     case RV_CTX_F_SEEN_S4:
> > +     case RV_CTX_F_SEEN_S5:
> > +     case RV_CTX_F_SEEN_S6:
> > +             return test_bit(reg, &ctx->flags);
> > +     }
> > +     return false;
> > +}
> > +
> > +static void mark_call(struct rv_jit_context *ctx)
> > +{
> > +     __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
> > +}
> > +
> > +static bool seen_call(struct rv_jit_context *ctx)
> > +{
> > +     return seen_reg(RV_REG_RA, ctx);
> > +}
>
> Just nit: probably might be more obvious to remove this asymmetry in
> seen_reg() and do __set_bit()/test_bit() for RV_CTX_F_SEEN_CALL similar
> like below.
>

Yeah, let's do that.

> > +static void mark_tail_call(struct rv_jit_context *ctx)
> > +{
> > +     __set_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags);
> > +}
> > +
> > +static bool seen_tail_call(struct rv_jit_context *ctx)
> > +{
> > +     return test_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags);
> > +}
> > +
> > +static u8 rv_tail_call_reg(struct rv_jit_context *ctx)
> > +{
> > +     mark_tail_call(ctx);
> > +
> > +     if (seen_call(ctx)) {
> > +             __set_bit(RV_CTX_F_SEEN_S6, &ctx->flags);
> > +             return RV_REG_S6;
> > +     }
> > +     return RV_REG_A6;
> > +}
> > +
> > +static void emit(const u32 insn, struct rv_jit_context *ctx)
> > +{
> > +     if (ctx->insns)
> > +             ctx->insns[ctx->ninsns] = insn;
> > +
> > +     ctx->ninsns++;
> > +}
> > +
> > +static u32 rv_r_insn(u8 funct7, u8 rs2, u8 rs1, u8 funct3, u8 rd, u8 opcode)
> > +{
> [...]
> > +     /* Allocate image, now that we know the size. */
> > +     image_size = sizeof(u32) * ctx->ninsns;
> > +     jit_data->header = bpf_jit_binary_alloc(image_size, &jit_data->image,
> > +                                             sizeof(u32),
> > +                                             bpf_fill_ill_insns);
> > +     if (!jit_data->header) {
> > +             prog = orig_prog;
> > +             goto out_offset;
> > +     }
> > +
> > +     /* Second, real pass, that acutally emits the image. */
> > +     ctx->insns = (u32 *)jit_data->image;
> > +skip_init_ctx:
> > +     ctx->ninsns = 0;
> > +
> > +     build_prologue(ctx);
> > +     if (build_body(ctx, extra_pass)) {
> > +             bpf_jit_binary_free(jit_data->header);
> > +             prog = orig_prog;
> > +             goto out_offset;
> > +     }
> > +     build_epilogue(ctx);
> > +
> > +     if (bpf_jit_enable > 1)
> > +             bpf_jit_dump(prog->len, image_size, 2, ctx->insns);
> > +
> > +     prog->bpf_func = (void *)ctx->insns;
> > +     prog->jited = 1;
> > +     prog->jited_len = image_size;
> > +
> > +     bpf_flush_icache(jit_data->header, (u8 *)ctx->insns + ctx->ninsns);
>
> Shouldn't this be '(u32 *)ctx->insns + ctx->ninsns' to cover the range?
>

Yikes! Indeed so, I'll make sure this is corrected!

Thanks for the comments!


Björn

> > +
> > +     if (!prog->is_func || extra_pass) {
> > +out_offset:
> > +             kfree(ctx->offset);
> > +             kfree(jit_data);
> > +             prog->aux->jit_data = NULL;
> > +     }
> > +out:
> > +     if (tmp_blinded)
> > +             bpf_jit_prog_release_other(prog, prog == orig_prog ?
> > +                                        tmp : orig_prog);
> > +     return prog;
> > +}
> >
>

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox