* Re: [PATCH net-next v10 3/4] netdev: octeon-ethernet: Add Cavium Octeon III support.
From: David Miller @ 2018-05-09 2:28 UTC (permalink / raw)
To: steven.hill; +Cc: netdev, cmunoz, david.daney
In-Reply-To: <1525713731-27092-4-git-send-email-steven.hill@cavium.com>
From: "Steven J. Hill" <steven.hill@cavium.com>
Date: Mon, 7 May 2018 12:22:10 -0500
> +static atomic_t request_mgmt_once;
> +static atomic_t load_driver_once;
> +static atomic_t pki_id;
...
> + /* One time request driver module */
> + if (is_mix) {
> + if (atomic_cmpxchg(&request_mgmt_once, 0, 1) == 0)
> + request_module_nowait("octeon_mgmt");
> + }
> + if (is_pki) {
> + if (atomic_cmpxchg(&load_driver_once, 0, 1) == 0)
> + request_module_nowait("octeon3-ethernet");
> + }
You're going to have to explain this, it makes no sense to me.
> +static int bgx_pki_ports_init(void)
> +{
> + int i;
> + int j;
> + int k;
"int i, j, k;" please.
> +static int bgx_port_xgmii_set_link_up(struct bgx_port_priv *priv)
> +{
> + u64 data;
> + int timeout;
Please order from longest to shortest line for variable declarations.
> +static int bgx_port_sgmii_set_link_speed(struct bgx_port_priv *priv,
> + struct port_status status)
> +{
> + int timeout;
> + u64 miscx;
> + u64 data;
> + u64 prtx;
Please use "u64 miscx, data, prtx;" and put it on the first line.
> +static struct port_status bgx_port_get_xaui_link(struct bgx_port_priv *priv)
> +{
> + struct port_status status;
> + int lanes;
> + int speed;
> + u64 data;
"int lanes, speed;"
> +static int bgx_port_gser_27882(struct bgx_port_priv *priv)
> +{
> + int timeout;
> + u64 addr;
> + u64 data;
"u64 addr, data;" and move to first line.
> +static int bgx_port_qlm_rx_equalization(struct bgx_port_priv *priv,
> + int qlm, int lane)
> +{
> + int max_lanes = bgx_port_get_max_qlm_lanes(qlm);
> + int lane_mask;
> + int timeout;
> + int rc = 0;
> + u64 lmode;
> + u64 addr;
> + u64 data;
> + int i;
Please group these local variables. Have some pity for people who
have not so much vertical space on their screen when they are reading
your code. :)
> +static int bgx_port_init_xaui_link(struct bgx_port_priv *priv)
> +{
> + int use_training = 0;
> + int use_ber = 0;
> + int timeout;
> + int rc = 0;
> + u64 data;
Please group the int variables into a smaller number of lines.
> + /* Wait for mac rx to be ready */
> + timeout = 10000;
> + do {
> + data = oct_csr_read(BGX_SMU_RX_CTL(priv->node, priv->bgx, priv->index));
> + data &= GENMASK_ULL(1, 0);
> + if (!data)
> + break;
> + timeout--;
> + udelay(1);
> + } while (timeout);
This construct is repeated so many times, over and over. Make a helper
function that performs this operation.
> +static void bgx_port_adjust_link(struct net_device *netdev)
> +{
> + struct bgx_port_priv *priv = bgx_port_netdev2priv(netdev);
> + bool link_changed = false;
> + unsigned int duplex;
> + unsigned int speed;
> + unsigned int link;
Please group the unsigned ints.
> +static int bgx_port_probe(struct platform_device *pdev)
> +{
> + struct bgx_port_priv *priv;
> + const __be32 *reg;
> + const u8 *mac;
> + int numa_node;
> + u32 index;
> + u64 addr;
> + int rc;
Please group variables of the same time into one line.
> +static int __init bgx_port_driver_init(void)
> +{
> + int r;
> + int i;
> + int j;
> + int k;
"int r, i, j, k;"
> +static inline u64 scratch_read64(u64 offset)
Do not use "inline" for functions in foo.c files, let the compiler
decide.
> +static inline void scratch_write64(u64 offset, u64 value)
Likewise.
> +static inline struct wr_ret octeon3_core_get_work_sync(int grp)
> +{
> + u64 node = cvmx_get_node_num();
> + struct wr_ret r;
> + u64 response;
> + u64 addr;
"u64 response, addr;" Don't use inline.
> +static inline void octeon3_core_get_work_async(unsigned int grp)
> +{
Kill the inline.
> +static inline struct wr_ret octeon3_core_get_response_async(void)
Likewise.
> +static int octeon3_eth_tx_complete_hwtstamp(struct octeon3_ethernet *priv,
> + struct sk_buff *skb)
> +{
> + struct skb_shared_hwtstamps shts;
> + u64 hwts;
> + u64 ns;
"u64 hwts, ns;"
> +static int octeon3_eth_tx_complete_worker(void *data)
> +{
> + struct octeon3_ethernet_worker *worker = data;
> + struct octeon3_ethernet_node *oen;
> + int tx_complete_stop_thresh;
> + int backlog_stop_thresh;
> + int backlog;
> + u64 aq_cnt;
> + int order;
> + int i;
Group the variable declarations, please.
> +static int octeon3_eth_common_ndo_init(struct net_device *netdev,
> + int extra_skip)
> +{
> + struct octeon3_ethernet_node *oen;
> + int base_rx_grp[MAX_RX_QUEUES];
> + struct octeon3_ethernet *priv;
> + int pki_chan;
> + int aura;
> + int dq;
> + int i;
> + int r;
"int pki_chan, aura, dq, i, r;"
> +static void octeon3_eth_ndo_get_stats64(struct net_device *netdev,
> + struct rtnl_link_stats64 *s)
> +{
> + struct octeon3_ethernet *priv = netdev_priv(netdev);
> + u64 delta_packets;
> + u64 delta_dropped;
> + u64 delta_octets;
> + u64 dropped;
> + u64 packets;
> + u64 octets;
Group the u64s please.
> +static int octeon3_eth_common_ndo_open(struct net_device *netdev)
> +{
> + struct octeon3_ethernet *priv = netdev_priv(netdev);
> + struct octeon3_rx *rx;
> + int i;
> + int r;
"int i, r;"
> +static inline u64 build_pko_send_ext_desc(struct sk_buff *skb)
Kill the inline.
> +static inline u64 build_pko_send_tso(struct sk_buff *skb, uint mtu)
Likewise.
> +static inline u64 build_pko_send_mem_sub(u64 addr)
Likewise.
> +static inline u64 build_pko_send_mem_ts(u64 addr)
Likewise.
> +static inline u64 build_pko_send_free(u64 addr)
Likewise.
> +static inline u64 build_pko_send_work(int grp, u64 addr)
Likewise.
> +static int octeon3_eth_ndo_start_xmit(struct sk_buff *skb,
> + struct net_device *netdev)
> +{
> + struct octeon3_ethernet *priv = netdev_priv(netdev);
> + struct octeon3_ethernet_node *oen;
> + u64 scr_off = LMTDMA_SCR_OFFSET;
> + struct sk_buff *skb_tmp;
> + u64 pko_send_desc;
> + u64 *lmtdma_addr;
> + unsigned int mss;
> + u64 lmtdma_data;
> + u64 aq_cnt = 0;
> + int frag_count;
> + long backlog;
> + u64 head_len;
> + void **work;
> + int grp;
> + int i;
Please group these variables, this is crazy...
> +static int octeon3_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
> +{
> + struct octeon3_ethernet *priv;
> + int neg_ppb = 0;
> + u64 comp;
> + u64 diff;
"u64 comp, diff;"
> +int octeon_fpa3_init(int node)
> +{
> + static bool init_done[2];
> + int aura_cnt;
> + u64 data;
> + int i;
"int aura_cnt, i; "
> +int octeon_fpa3_aura_init(int node, int pool, int aura_num,
> + int *aura, int num_bufs, unsigned int limit)
> +{
> + struct global_resource_tag tag;
> + unsigned int drop;
> + unsigned int pass;
> + char buf[16];
> + int rc = 0;
> + u64 shift;
> + u64 data;
"unsigned int drop, pass;"
"u64 shift, data;"
> +void *octeon_fpa3_alloc(int node, int aura)
> +{
> + void *buf = NULL;
> + u64 buf_phys;
> + u64 addr;
"u64 buf_phys, addr;"
> +void octeon_fpa3_free(int node, int aura, const void *buf)
> +{
> + u64 buf_phys;
> + u64 addr;
"u64 buf_phys, addr;"
> +int octeon_fpa3_mem_fill(int node, struct kmem_cache *cache,
> + int aura, int num_bufs)
> +{
> + void *mem;
> + int rc = 0;
> + int i;
"int i, rc = 0;"
> +static int octeon3_pki_pcam_alloc_entry(int node, int entry, int bank)
> +{
> + struct global_resource_tag tag;
> + char buf[16];
> + int num_clusters;
> + int rc;
> + int i;
"int num_clusters, rc, i;"
> +static int octeon3_pki_pcam_write_entry(int node,
> + struct pcam_term_info *term_info)
> +{
> + int num_clusters;
> + u64 action;
> + int entry;
> + u64 match;
> + int bank;
> + u64 term;
> + int i;
"u64 action, match, term;"
"int num_clusters, entry, bank, i;"
> +int octeon3_pki_set_ptp_skip(int node, int pknd, int skip)
> +{
> + int num_clusters;
> + u64 data;
> + u64 i;
"u64 data, i;"
That's all I have the stomache for at the moment.
This thing is really large, making it nearly impossible to review
as one huge patch #3. Perhaps you can find a way to split it up
logically somehow?
^ permalink raw reply
* [PATCH bpf-next] bpf: sync tools bpf.h uapi header
From: Prashant Bhole @ 2018-05-09 2:04 UTC (permalink / raw)
To: Daniel Borkmann, Alexei Starovoitov
Cc: Prashant Bhole, David S . Miller, netdev
sync the header from include/uapi/linux/bpf.h which was updated to add
fib lookup helper function. This fixes selftests/bpf build failure
Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
---
tools/include/uapi/linux/bpf.h | 84 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 83 insertions(+), 1 deletion(-)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 83a95ae388dd..ddc566cb7492 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -10,6 +10,8 @@
#include <linux/types.h>
#include <linux/bpf_common.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
/* Extended instruction set based on top of classic BPF */
@@ -116,6 +118,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP,
BPF_MAP_TYPE_CPUMAP,
+ BPF_MAP_TYPE_XSKMAP,
};
enum bpf_prog_type {
@@ -1825,6 +1828,33 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * Description
+ * Do FIB lookup in kernel tables using parameters in *params*.
+ * If lookup is successful and result shows packet is to be
+ * forwarded, the neighbor tables are searched for the nexthop.
+ * If successful (ie., FIB lookup shows forwarding and nexthop
+ * is resolved), the nexthop address is returned in ipv4_dst,
+ * ipv6_dst or mpls_out based on family, smac is set to mac
+ * address of egress device, dmac is set to nexthop mac address,
+ * rt_metric is set to metric from route.
+ *
+ * *plen* argument is the size of the passed in struct.
+ * *flags* argument can be one or more BPF_FIB_LOOKUP_ flags:
+ *
+ * **BPF_FIB_LOOKUP_DIRECT** means do a direct table lookup vs
+ * full lookup using FIB rules
+ * **BPF_FIB_LOOKUP_OUTPUT** means do lookup from an egress
+ * perspective (default is ingress)
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+ *
+ * Return
+ * Egress device index on success, 0 if packet needs to continue
+ * up the stack for further processing or a negative error in case
+ * of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -1895,7 +1925,8 @@ union bpf_attr {
FN(xdp_adjust_tail), \
FN(skb_get_xfrm_state), \
FN(get_stack), \
- FN(skb_load_bytes_relative),
+ FN(skb_load_bytes_relative), \
+ FN(fib_lookup),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2309,4 +2340,55 @@ struct bpf_raw_tracepoint_args {
__u64 args[0];
};
+/* DIRECT: Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT: Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT BIT(0)
+#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
+
+struct bpf_fib_lookup {
+ /* input */
+ __u8 family; /* network family, AF_INET, AF_INET6, AF_MPLS */
+
+ /* set if lookup is to consider L4 data - e.g., FIB rules */
+ __u8 l4_protocol;
+ __be16 sport;
+ __be16 dport;
+
+ /* total length of packet from network header - used for MTU check */
+ __u16 tot_len;
+ __u32 ifindex; /* L3 device index for lookup */
+
+ union {
+ /* inputs to lookup */
+ __u8 tos; /* AF_INET */
+ __be32 flowlabel; /* AF_INET6 */
+
+ /* output: metric of fib result */
+ __u32 rt_metric;
+ };
+
+ union {
+ __be32 mpls_in;
+ __be32 ipv4_src;
+ struct in6_addr ipv6_src;
+ };
+
+ /* input to bpf_fib_lookup, *dst is destination address.
+ * output: bpf_fib_lookup sets to gateway address
+ */
+ union {
+ /* return for MPLS lookups */
+ __be32 mpls_out[4]; /* support up to 4 labels */
+ __be32 ipv4_dst;
+ struct in6_addr ipv6_dst;
+ };
+
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __u8 smac[ETH_ALEN];
+ __u8 dmac[ETH_ALEN];
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
--
2.14.3
^ permalink raw reply related
* Re: [PATCH v2 net-next 2/4] net: add skeleton of bpfilter kernel module
From: Alexei Starovoitov @ 2018-05-09 2:29 UTC (permalink / raw)
To: Luis R. Rodriguez
Cc: Alexei Starovoitov, davem, daniel, torvalds, gregkh, luto, netdev,
linux-kernel, kernel-team, Juergen Gross, Eric Paris,
Matthew Auld, Josh Triplett, Kirill A. Shutemov, Joonas Lahtinen,
Chris Wilson, Stephen Smalley, Eric W. Biederman, Mimi Zohar,
David Howells, Kees Cook, Andrew Morton
In-Reply-To: <20180507185124.GA18195@wotan.suse.de>
On Mon, May 07, 2018 at 06:51:24PM +0000, Luis R. Rodriguez wrote:
> > Notice that _binary_net_bpfilter_bpfilter_umh_start - end
> > is placed into .init.rodata section, so it's freed as soon as __init
> > function of bpfilter.ko is finished.
> > As part of __init the bpfilter.ko does first request/reply action
> > via two unix pipe provided by fork_usermode_blob() helper to
> > make sure that umh is healthy. If not it will kill it via pid.
>
> It does this very fast, right away. On a really slow system how are you sure
> that this won't race and the execution of the check happens early on prior to
> letting the actual setup trigger? After all, we're calling the userpsace
> process in async mode. We could preempt it now.
I don't see an issue.
the kernel synchronously writes into a pipe. User space process reads.
Exactly the same as coredump logic with pipes.
> > +# a bit of elf magic to convert bpfilter_umh binary into a binary blob
> > +# inside bpfilter_umh.o elf file referenced by
> > +# _binary_net_bpfilter_bpfilter_umh_start symbol
> > +# which bpfilter_kern.c passes further into umh blob loader at run-time
> > +quiet_cmd_copy_umh = GEN $@
> > + cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \
> > + $(OBJCOPY) -I binary -O $(CONFIG_OUTPUT_FORMAT) \
> > + -B `$(OBJDUMP) -f $<|grep architecture|cut -d, -f1|cut -d' ' -f2` \
> > + --rename-section .data=.init.rodata $< $@
>
> Cool, but so our expectation is that the compiler sets this symbol, how
> are we sure it will always be set?
Compiler doesn't set it. objcopy does.
> > +
> > + if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) {
>
> See, here, what if the userspace process gets preemtped and we run this
> check afterwards? Is that possible?
User space is a normal task. It can sleep and can be single stepped with GDB.
^ permalink raw reply
* Re: [net-next PATCH v3 0/6] UDP GSO Segmentation clean-ups
From: David Miller @ 2018-05-09 2:30 UTC (permalink / raw)
To: alexander.duyck; +Cc: netdev, willemb
In-Reply-To: <CAKgT0UfRXs5SQjxN7gzZ=qo2yORteLXfdDOj_pOrGyYX_LouRA@mail.gmail.com>
From: Alexander Duyck <alexander.duyck@gmail.com>
Date: Mon, 7 May 2018 13:03:47 -0700
> On Mon, May 7, 2018 at 11:08 AM, Alexander Duyck
> <alexander.duyck@gmail.com> wrote:
>> This patch set addresses a number of issues I found while sorting out
>> enabling UDP GSO Segmentation support for ixgbe/ixgbevf. Specifically there
>> were a number of issues related to the checksum and such that seemed to
>> cause either minor irregularities or kernel panics in the case of the
>> offload request being allowed to traverse between name spaces.
>>
>> With this set applied I am was able to get UDP GSO traffic to pass over
>> vxlan tunnels in both offloaded modes and non-offloaded modes for ixgbe and
>> ixgbevf.
>>
>> I submitted the driver specific patches earlier as an RFC:
>> https://patchwork.ozlabs.org/project/netdev/list/?series=42477&archive=both&state=*
>>
>> v2: Updated patches based on feedback from Eric Dumazet
>> Split first patch into several patches based on feedback from Eric
>> v3: Drop patch that was calling pskb_may_pull as it was redundant.
>> Added code to use MANGLED_0 in case of UDP checksum
>> Drop patch adding NETIF_F_GSO_UDP_L4 to list of GSO software offloads
>> Added Acked-by for patches reviewed by Willem and not changed
>
> Just noticed I forgot to update the subject before sending out the
> cover page. I updated it for this reply. If needed I will submit a v4,
> but for now I will leave this out here to finish up review.
I thought it was kinda amusing, because it shows up as the series name
in patchwork too :-)
Series applied with header posting Subj fixed, thanks Alexander.
^ permalink raw reply
* [PATCH bpf-next 0/2] nfp: bpf: add programmable RSS support
From: Jakub Kicinski @ 2018-05-09 2:37 UTC (permalink / raw)
To: alexei.starovoitov, daniel; +Cc: davem, netdev, oss-drivers, Jakub Kicinski
Hi!
This small series adds a feature which extends BPF offload beyond
a pure host processing offload and firmly into the realm of
heterogeneous processing. Allowing offloaded XDP programs to set
the RX queue index opens the door for defining fully programmable
RSS/n-tuple filter replacement. In fact the device datapath will
skip the RSS processing completely if BPF decided on the queue
already, making the XDP program replace part of the standard NIC
datapath.
We hope some day the entire NIC datapath will be defined by BPF :)
Jakub Kicinski (2):
bpf: xdp: allow offloads to store into rx_queue_index
nfp: bpf: support setting the RX queue index
drivers/net/ethernet/netronome/nfp/bpf/fw.h | 1 +
drivers/net/ethernet/netronome/nfp/bpf/jit.c | 47 +++++++++++++++++++
drivers/net/ethernet/netronome/nfp/bpf/main.c | 11 +++++
drivers/net/ethernet/netronome/nfp/bpf/main.h | 8 ++++
.../net/ethernet/netronome/nfp/bpf/verifier.c | 28 ++++++++++-
drivers/net/ethernet/netronome/nfp/nfp_asm.h | 22 +++++----
include/linux/bpf.h | 2 +-
kernel/bpf/verifier.c | 2 +-
net/core/filter.c | 9 +++-
9 files changed, 115 insertions(+), 15 deletions(-)
--
2.17.0
^ permalink raw reply
* [PATCH bpf-next 1/2] bpf: xdp: allow offloads to store into rx_queue_index
From: Jakub Kicinski @ 2018-05-09 2:37 UTC (permalink / raw)
To: alexei.starovoitov, daniel; +Cc: davem, netdev, oss-drivers, Jakub Kicinski
In-Reply-To: <20180509023707.23601-1-jakub.kicinski@netronome.com>
It's fairly easy for offloaded XDP programs to select the RX queue
packets go to. We need a way of expressing this in the software.
Allow write to the rx_queue_index field of struct xdp_md for
device-bound programs.
Skip convert_ctx_access callback entirely for offloads.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
include/linux/bpf.h | 2 +-
kernel/bpf/verifier.c | 2 +-
net/core/filter.c | 9 ++++++++-
3 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 321969da67b7..a38e474bf7ee 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -627,7 +627,7 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map);
#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
-static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
+static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux)
{
return aux->offload_requested;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d5e1a6c4165d..d92d9c37affd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5215,7 +5215,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
}
}
- if (!ops->convert_ctx_access)
+ if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
return 0;
insn = env->prog->insnsi + delta;
diff --git a/net/core/filter.c b/net/core/filter.c
index cf0d27acf1d1..2336b90e8b26 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4911,8 +4911,15 @@ static bool xdp_is_valid_access(int off, int size,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
- if (type == BPF_WRITE)
+ if (type == BPF_WRITE) {
+ if (bpf_prog_is_dev_bound(prog->aux)) {
+ switch (off) {
+ case offsetof(struct xdp_md, rx_queue_index):
+ return __is_valid_xdp_access(off, size);
+ }
+ }
return false;
+ }
switch (off) {
case offsetof(struct xdp_md, data):
--
2.17.0
^ permalink raw reply related
* [PATCH bpf-next 2/2] nfp: bpf: support setting the RX queue index
From: Jakub Kicinski @ 2018-05-09 2:37 UTC (permalink / raw)
To: alexei.starovoitov, daniel; +Cc: davem, netdev, oss-drivers, Jakub Kicinski
In-Reply-To: <20180509023707.23601-1-jakub.kicinski@netronome.com>
BPF has access to all internal FW datapath structures. Including
the structure containing RX queue selection. With little coordination
with the datapath we can let the offloaded BPF select the RX queue.
We just need a way to tell the datapath that queue selection has already
been done and it shouldn't overwrite it. Define a bit to tell datapath
BPF already selected a queue (QSEL_SET), if the selected queue is not
enabled (>= number of enabled queues) datapath will perform normal RSS.
BPF queue selection on the NIC can be used to replace standard
datapath RSS with fully programmable BPF/XDP RSS.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
drivers/net/ethernet/netronome/nfp/bpf/fw.h | 1 +
drivers/net/ethernet/netronome/nfp/bpf/jit.c | 47 +++++++++++++++++++
drivers/net/ethernet/netronome/nfp/bpf/main.c | 11 +++++
drivers/net/ethernet/netronome/nfp/bpf/main.h | 8 ++++
.../net/ethernet/netronome/nfp/bpf/verifier.c | 28 ++++++++++-
drivers/net/ethernet/netronome/nfp/nfp_asm.h | 22 +++++----
6 files changed, 105 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
index 3dbc21653ce5..4c7972e3db63 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
@@ -50,6 +50,7 @@ enum bpf_cap_tlv_type {
NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2,
NFP_BPF_CAP_TYPE_MAPS = 3,
NFP_BPF_CAP_TYPE_RANDOM = 4,
+ NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5,
};
struct nfp_bpf_cap_tlv_func {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 326a2085d650..a4d3da215863 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -42,6 +42,7 @@
#include "main.h"
#include "../nfp_asm.h"
+#include "../nfp_net_ctrl.h"
/* --- NFP prog --- */
/* Foreach "multiple" entries macros provide pos and next<n> pointers.
@@ -1470,6 +1471,38 @@ nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0;
}
+static int
+nfp_queue_select(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ u32 jmp_tgt;
+
+ jmp_tgt = nfp_prog_current_offset(nfp_prog) + 5;
+
+ /* Make sure the queue id fits into FW field */
+ emit_alu(nfp_prog, reg_none(), reg_a(meta->insn.src_reg * 2),
+ ALU_OP_AND_NOT_B, reg_imm(0xff));
+ emit_br(nfp_prog, BR_BEQ, jmp_tgt, 2);
+
+ /* Set the 'queue selected' bit and the queue value */
+ emit_shf(nfp_prog, pv_qsel_set(nfp_prog),
+ pv_qsel_set(nfp_prog), SHF_OP_OR, reg_imm(1),
+ SHF_SC_L_SHF, PKT_VEL_QSEL_SET_BIT);
+ emit_ld_field(nfp_prog,
+ pv_qsel_val(nfp_prog), 0x1, reg_b(meta->insn.src_reg * 2),
+ SHF_SC_NONE, 0);
+ /* Delay slots end here, we will jump over next instruction if queue
+ * value fits into the field.
+ */
+ emit_ld_field(nfp_prog,
+ pv_qsel_val(nfp_prog), 0x1, reg_imm(NFP_NET_RXR_MAX),
+ SHF_SC_NONE, 0);
+
+ if (!nfp_prog_confirm_current_offset(nfp_prog, jmp_tgt))
+ return -EINVAL;
+
+ return 0;
+}
+
/* --- Callbacks --- */
static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
@@ -2160,6 +2193,17 @@ mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
false, wrp_lmem_store);
}
+static int mem_stx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ switch (meta->insn.off) {
+ case offsetof(struct xdp_md, rx_queue_index):
+ return nfp_queue_select(nfp_prog, meta);
+ }
+
+ WARN_ON_ONCE(1); /* verifier should have rejected bad accesses */
+ return -EOPNOTSUPP;
+}
+
static int
mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
unsigned int size)
@@ -2186,6 +2230,9 @@ static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
+ if (meta->ptr.type == PTR_TO_CTX)
+ if (nfp_prog->type == BPF_PROG_TYPE_XDP)
+ return mem_stx_xdp(nfp_prog, meta);
return mem_stx(nfp_prog, meta, 4);
}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index d72f9e7f42da..f1846d8f59cc 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -334,6 +334,13 @@ nfp_bpf_parse_cap_random(struct nfp_app_bpf *bpf, void __iomem *value,
return 0;
}
+static int
+nfp_bpf_parse_cap_qsel(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
+{
+ bpf->queue_select = true;
+ return 0;
+}
+
static int nfp_bpf_parse_capabilities(struct nfp_app *app)
{
struct nfp_cpp *cpp = app->pf->cpp;
@@ -376,6 +383,10 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
if (nfp_bpf_parse_cap_random(app->priv, value, length))
goto err_release_free;
break;
+ case NFP_BPF_CAP_TYPE_QUEUE_SELECT:
+ if (nfp_bpf_parse_cap_qsel(app->priv, value, length))
+ goto err_release_free;
+ break;
default:
nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
break;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 82682378d57f..8b143546ae85 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -82,10 +82,16 @@ enum static_regs {
enum pkt_vec {
PKT_VEC_PKT_LEN = 0,
PKT_VEC_PKT_PTR = 2,
+ PKT_VEC_QSEL_SET = 4,
+ PKT_VEC_QSEL_VAL = 6,
};
+#define PKT_VEL_QSEL_SET_BIT 4
+
#define pv_len(np) reg_lm(1, PKT_VEC_PKT_LEN)
#define pv_ctm_ptr(np) reg_lm(1, PKT_VEC_PKT_PTR)
+#define pv_qsel_set(np) reg_lm(1, PKT_VEC_QSEL_SET)
+#define pv_qsel_val(np) reg_lm(1, PKT_VEC_QSEL_VAL)
#define stack_reg(np) reg_a(STATIC_REG_STACK)
#define stack_imm(np) imm_b(np)
@@ -139,6 +145,7 @@ enum pkt_vec {
* @helpers.perf_event_output: output perf event to a ring buffer
*
* @pseudo_random: FW initialized the pseudo-random machinery (CSRs)
+ * @queue_select: BPF can set the RX queue ID in packet vector
*/
struct nfp_app_bpf {
struct nfp_app *app;
@@ -181,6 +188,7 @@ struct nfp_app_bpf {
} helpers;
bool pseudo_random;
+ bool queue_select;
};
enum nfp_bpf_map_use {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index e163f3cfa47d..844a9be6e55a 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -467,6 +467,30 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
return 0;
}
+static int
+nfp_bpf_check_store(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ struct bpf_verifier_env *env)
+{
+ const struct bpf_reg_state *reg = cur_regs(env) + meta->insn.dst_reg;
+
+ if (reg->type == PTR_TO_CTX) {
+ if (nfp_prog->type == BPF_PROG_TYPE_XDP) {
+ /* XDP ctx accesses must be 4B in size */
+ switch (meta->insn.off) {
+ case offsetof(struct xdp_md, rx_queue_index):
+ if (nfp_prog->bpf->queue_select)
+ goto exit_check_ptr;
+ pr_vlog(env, "queue selection not supported by FW\n");
+ return -EOPNOTSUPP;
+ }
+ }
+ pr_vlog(env, "unsupported store to context field\n");
+ return -EOPNOTSUPP;
+ }
+exit_check_ptr:
+ return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg);
+}
+
static int
nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
struct bpf_verifier_env *env)
@@ -522,8 +546,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
return nfp_bpf_check_ptr(nfp_prog, meta, env,
meta->insn.src_reg);
if (is_mbpf_store(meta))
- return nfp_bpf_check_ptr(nfp_prog, meta, env,
- meta->insn.dst_reg);
+ return nfp_bpf_check_store(nfp_prog, meta, env);
+
if (is_mbpf_xadd(meta))
return nfp_bpf_check_xadd(nfp_prog, meta, env);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index 5f2b2f24f4fa..faa4e131c136 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -183,16 +183,18 @@ enum shf_sc {
#define OP_ALU_DST_LMEXTN 0x80000000000ULL
enum alu_op {
- ALU_OP_NONE = 0x00,
- ALU_OP_ADD = 0x01,
- ALU_OP_NOT = 0x04,
- ALU_OP_ADD_2B = 0x05,
- ALU_OP_AND = 0x08,
- ALU_OP_SUB_C = 0x0d,
- ALU_OP_ADD_C = 0x11,
- ALU_OP_OR = 0x14,
- ALU_OP_SUB = 0x15,
- ALU_OP_XOR = 0x18,
+ ALU_OP_NONE = 0x00,
+ ALU_OP_ADD = 0x01,
+ ALU_OP_NOT = 0x04,
+ ALU_OP_ADD_2B = 0x05,
+ ALU_OP_AND = 0x08,
+ ALU_OP_AND_NOT_A = 0x0c,
+ ALU_OP_SUB_C = 0x0d,
+ ALU_OP_AND_NOT_B = 0x10,
+ ALU_OP_ADD_C = 0x11,
+ ALU_OP_OR = 0x14,
+ ALU_OP_SUB = 0x15,
+ ALU_OP_XOR = 0x18,
};
enum alu_dst_ab {
--
2.17.0
^ permalink raw reply related
* Re: [PATCH 2/2] alx: add disable_wol paramenter
From: AceLan Kao @ 2018-05-09 2:40 UTC (permalink / raw)
To: David Miller
Cc: Andrew Lunn, James Cliburn, Chris Snook, rakesh, netdev,
Linux-Kernel@Vger. Kernel. Org, Emily Chien
In-Reply-To: <CAFv23QmkLiuFA6C4YFsO3hoTEE--2bT4H2uxaMqoY30G4_QJGw@mail.gmail.com>
Hi,
I didn't get any response around one month.
I'm still here hoping you can consider accepting the WoL patch.
Without that patch, people have no chance to bump into the issue and
have no chance to fix it.
Moreover, it leads to the dkms package be spreaded around, and it'll
become a more annoying issue when UEFI secure boot is enabled[1].
Please re-consider it to enable WoL again and set it to disable by default,
so that we/user have a chance to examine the feature and have a chance
to find out a read fix for it.
Thanks.
1. https://bugzilla.kernel.org/show_bug.cgi?id=61651
Best regards,
AceLan Kao.
2018-04-24 11:45 GMT+08:00 AceLan Kao <acelan.kao@canonical.com>:
> Hi,
>
> May I know the final decision of this patch?
> Thanks.
>
> Best regards,
> AceLan Kao.
>
> 2018-04-10 10:40 GMT+08:00 AceLan Kao <acelan.kao@canonical.com>:
>> The problem is I don't have a machine with that wakeup issue, and I
>> need WoL feature.
>> Instead of spreading "alx with WoL" dkms package everywhere, I would
>> like to see it's supported in the driver and is disabled by default.
>>
>> Moreover, the wakeup issue may come from old Atheros chips, or result
>> from buggy BIOS.
>> With the WoL has been removed from the driver, no one will report
>> issue about that, and we don't have any chance to find a fix for it.
>>
>> Adding this feature back is not covering a paper on the issue, it
>> makes people have a chance to examine this feature.
>>
>> 2018-04-09 22:50 GMT+08:00 David Miller <davem@davemloft.net>:
>>> From: Andrew Lunn <andrew@lunn.ch>
>>> Date: Mon, 9 Apr 2018 14:39:10 +0200
>>>
>>>> On Mon, Apr 09, 2018 at 07:35:14PM +0800, AceLan Kao wrote:
>>>>> The WoL feature was reported broken and will lead to
>>>>> the system resume immediately after suspending.
>>>>> This symptom is not happening on every system, so adding
>>>>> disable_wol option and disable WoL by default to prevent the issue from
>>>>> happening again.
>>>>
>>>>> const char alx_drv_name[] = "alx";
>>>>>
>>>>> +/* disable WoL by default */
>>>>> +bool disable_wol = 1;
>>>>> +module_param(disable_wol, bool, 0);
>>>>> +MODULE_PARM_DESC(disable_wol, "Disable Wake on Lan feature");
>>>>> +
>>>>
>>>> Hi AceLan
>>>>
>>>> This seems like you are papering over the cracks. And module
>>>> parameters are not liked.
>>>>
>>>> Please try to find the real problem.
>>>
>>> Agreed.
^ permalink raw reply
* Re: [PATCH bpf-next 0/2] nfp: bpf: add programmable RSS support
From: Alexei Starovoitov @ 2018-05-09 2:42 UTC (permalink / raw)
To: Jakub Kicinski; +Cc: daniel, davem, netdev, oss-drivers
In-Reply-To: <20180509023707.23601-1-jakub.kicinski@netronome.com>
On Tue, May 08, 2018 at 07:37:05PM -0700, Jakub Kicinski wrote:
> Hi!
>
> This small series adds a feature which extends BPF offload beyond
> a pure host processing offload and firmly into the realm of
> heterogeneous processing. Allowing offloaded XDP programs to set
> the RX queue index opens the door for defining fully programmable
> RSS/n-tuple filter replacement. In fact the device datapath will
> skip the RSS processing completely if BPF decided on the queue
> already, making the XDP program replace part of the standard NIC
> datapath.
Absolutely love it!
Huge feature enabled by such tiny diff.
For the set:
Acked-by: Alexei Starovoitov <ast@kernel.org>
^ permalink raw reply
* [PATCH bpf] nfp: bpf: allow zero-length capabilities
From: Jakub Kicinski @ 2018-05-09 2:42 UTC (permalink / raw)
To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jakub Kicinski
Some BPF capabilities carry no value, they simply indicate feature
is present. Our capability parsing loop will exit early if last
capability is zero-length because it's looking for more than 8 bytes
of data (8B is our TLV header length). Allow the last capability to
be zero-length.
This bug would lead to driver failing to probe with the following error
if the last capability FW advertises is zero-length:
nfp: BPF capabilities left after parsing, parsed:92 total length:100
nfp: invalid BPF capabilities at offset:92
Note the "parsed" and "length" values are 8 apart.
No shipping FW runs into this issue, but we can't guarantee that will
remain the case.
Fixes: 77a844ee650c ("nfp: bpf: prepare for parsing BPF FW capabilities")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
drivers/net/ethernet/netronome/nfp/bpf/main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 1dc424685f4e..35fb31f682af 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -335,7 +335,7 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
return PTR_ERR(mem) == -ENOENT ? 0 : PTR_ERR(mem);
start = mem;
- while (mem - start + 8 < nfp_cpp_area_size(area)) {
+ while (mem - start + 8 <= nfp_cpp_area_size(area)) {
u8 __iomem *value;
u32 type, length;
--
2.17.0
^ permalink raw reply related
* Re: ICMP redirect and VRF
From: David Ahern @ 2018-05-09 2:47 UTC (permalink / raw)
To: Ben Greear, netdev
In-Reply-To: <eef63cf1-64db-d9ed-7aaf-b6b4e60bb07c@candelatech.com>
On 5/8/18 3:27 PM, Ben Greear wrote:
> While debugging some other problem today on a system using ip rules
> instead of
> VRF, I ran into a case where the remote router was sending back ICMP
> redirects.
>
> That got me thinking...where would these routes get stored in a VRF
> scenario?
>
> Would it magically go to the correct VRF routing table based on the
> incoming interface
> for the ICMP redirect response?
>
Yes. And I expect you to let me know if your mileage varies.
^ permalink raw reply
* Re: KASAN: use-after-free Read in work_is_static_object
From: Eric Biggers @ 2018-05-09 2:49 UTC (permalink / raw)
To: Dmitry Vyukov
Cc: syzbot, David Miller, Cong Wang, Tom Herbert, Eric Dumazet,
Eric Biggers, netdev, LKML, syzkaller-bugs
In-Reply-To: <CACT4Y+b2SHbAixYaGN-Q3zTBDskafTR6aXSMu=qXtEb4iwco+A@mail.gmail.com>
On Mon, Jan 08, 2018 at 12:58:11PM +0100, 'Dmitry Vyukov' via syzkaller-bugs wrote:
> On Mon, Jan 8, 2018 at 12:55 PM, Dmitry Vyukov <dvyukov@google.com> wrote:
> > On Mon, Jan 8, 2018 at 12:43 PM, syzbot
> > <syzbot+40396d275b34b0dd5dad@syzkaller.appspotmail.com> wrote:
> >> Hello,
> >>
> >> syzkaller hit the following crash on
> >> f66faae2f80a45feafc04ce63ef744ac4b6e8c05
> >> git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/master
> >> compiler: gcc (GCC) 7.1.1 20170620
> >> .config is attached
> >> Raw console output is attached.
> >> Unfortunately, I don't have any reproducer for this bug yet.
> >>
> >>
> >> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> >> Reported-by: syzbot+40396d275b34b0dd5dad@syzkaller.appspotmail.com
> >> It will help syzbot understand when the bug is fixed. See footer for
> >> details.
> >> If you forward the report, please keep this part and the footer.
> >
> >
> > This looks like an issue in kcm sockets, so +kcm maintainers.
>
> FTR, guilty file extraction was fixed to ignore kernel/workqueue.c:
> https://github.com/google/syzkaller/commit/1014e5506e35965f3bad13fabb08666134d0b273
> Presumably for bugs in workqueue usually the caller is guilty.
>
>
> >> device ip6_vti0 entered promiscuous mode
> >> ==================================================================
> >> BUG: KASAN: use-after-free in constant_test_bit
> >> arch/x86/include/asm/bitops.h:325 [inline]
> >> BUG: KASAN: use-after-free in work_is_static_object+0x39/0x40
> >> kernel/workqueue.c:443
> >> Read of size 8 at addr ffff8801beca5788 by task syz-executor2/12922
> >>
> >> CPU: 0 PID: 12922 Comm: syz-executor2 Not tainted 4.15.0-rc5+ #178
> >> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> >> Google 01/01/2011
> >> Call Trace:
> >> __dump_stack lib/dump_stack.c:17 [inline]
> >> dump_stack+0x194/0x257 lib/dump_stack.c:53
> >> print_address_description+0x73/0x250 mm/kasan/report.c:252
> >> kasan_report_error mm/kasan/report.c:351 [inline]
> >> kasan_report+0x25b/0x340 mm/kasan/report.c:409
> >> __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430
> >> constant_test_bit arch/x86/include/asm/bitops.h:325 [inline]
> >> work_is_static_object+0x39/0x40 kernel/workqueue.c:443
> >> debug_object_activate+0x36f/0x730 lib/debugobjects.c:470
> >> debug_work_activate kernel/workqueue.c:492 [inline]
> >> __queue_work+0x163/0x1230 kernel/workqueue.c:1381
> >> queue_work_on+0x16a/0x1c0 kernel/workqueue.c:1487
> >> queue_work include/linux/workqueue.h:488 [inline]
> >> strp_check_rcv+0x25/0x30 net/strparser/strparser.c:552
> >> kcm_attach net/kcm/kcmsock.c:1439 [inline]
> >> kcm_attach_ioctl net/kcm/kcmsock.c:1460 [inline]
> >> kcm_ioctl+0x82f/0x1690 net/kcm/kcmsock.c:1665
> >> sock_do_ioctl+0x65/0xb0 net/socket.c:956
> >> sock_ioctl+0x2c2/0x440 net/socket.c:1053
> >> vfs_ioctl fs/ioctl.c:46 [inline]
> >> do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686
> >> SYSC_ioctl fs/ioctl.c:701 [inline]
> >> SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692
> >> entry_SYSCALL_64_fastpath+0x23/0x9a
> >> RIP: 0033:0x452ac9
> >> RSP: 002b:00007f1bbd860c58 EFLAGS: 00000212 ORIG_RAX: 0000000000000010
> >> RAX: ffffffffffffffda RBX: 000000000071bea0 RCX: 0000000000452ac9
> >> RDX: 0000000020954ff8 RSI: 00000000000089e0 RDI: 0000000000000017
> >> RBP: 000000000000057b R08: 0000000000000000 R09: 0000000000000000
> >> R10: 0000000000000000 R11: 0000000000000212 R12: 00000000006f6428
> >> R13: 00000000ffffffff R14: 00007f1bbd8616d4 R15: 0000000000000000
> >>
> >> Allocated by task 12922:
> >> save_stack+0x43/0xd0 mm/kasan/kasan.c:447
> >> set_track mm/kasan/kasan.c:459 [inline]
> >> kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
> >> kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489
> >> kmem_cache_alloc+0x12e/0x760 mm/slab.c:3544
> >> kmem_cache_zalloc include/linux/slab.h:678 [inline]
> >> kcm_attach net/kcm/kcmsock.c:1394 [inline]
> >> kcm_attach_ioctl net/kcm/kcmsock.c:1460 [inline]
> >> kcm_ioctl+0x2d2/0x1690 net/kcm/kcmsock.c:1665
> >> sock_do_ioctl+0x65/0xb0 net/socket.c:956
> >> sock_ioctl+0x2c2/0x440 net/socket.c:1053
> >> vfs_ioctl fs/ioctl.c:46 [inline]
> >> do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686
> >> SYSC_ioctl fs/ioctl.c:701 [inline]
> >> SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692
> >> entry_SYSCALL_64_fastpath+0x23/0x9a
> >>
> >> Freed by task 12929:
> >> save_stack+0x43/0xd0 mm/kasan/kasan.c:447
> >> set_track mm/kasan/kasan.c:459 [inline]
> >> kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
> >> __cache_free mm/slab.c:3488 [inline]
> >> kmem_cache_free+0x83/0x2a0 mm/slab.c:3746
> >> kcm_unattach+0xe53/0x1510 net/kcm/kcmsock.c:1563
> >> kcm_unattach_ioctl net/kcm/kcmsock.c:1608 [inline]
> >> kcm_ioctl+0xe54/0x1690 net/kcm/kcmsock.c:1675
> >> sock_do_ioctl+0x65/0xb0 net/socket.c:956
> >> sock_ioctl+0x2c2/0x440 net/socket.c:1053
> >> vfs_ioctl fs/ioctl.c:46 [inline]
> >> do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686
> >> SYSC_ioctl fs/ioctl.c:701 [inline]
> >> SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692
> >> entry_SYSCALL_64_fastpath+0x23/0x9a
> >>
> >> The buggy address belongs to the object at ffff8801beca56c0
> >> which belongs to the cache kcm_psock_cache of size 544
> >> The buggy address is located 200 bytes inside of
> >> 544-byte region [ffff8801beca56c0, ffff8801beca58e0)
> >> The buggy address belongs to the page:
> >> page:000000005180a80a count:1 mapcount:0 mapping:0000000058aa9a5c index:0x0
> >> compound_mapcount: 0
> >> flags: 0x2fffc0000008100(slab|head)
> >> raw: 02fffc0000008100 ffff8801beca40c0 0000000000000000 000000010000000b
> >> raw: ffff8801d31e8a48 ffff8801d31e8a48 ffff8801d3f6a380 0000000000000000
> >> page dumped because: kasan: bad access detected
> >>
> >> Memory state around the buggy address:
> >> ffff8801beca5680: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
> >> ffff8801beca5700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> >>>
> >>> ffff8801beca5780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> >>
> >> ^
> >> ffff8801beca5800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> >> ffff8801beca5880: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
> >> ==================================================================
> >>
> >>
> >> ---
> >> This bug is generated by a dumb bot. It may contain errors.
> >> See https://goo.gl/tpsmEJ for details.
> >> Direct all questions to syzkaller@googlegroups.com.
> >>
> >> syzbot will keep track of this bug report.
> >> If you forgot to add the Reported-by tag, once the fix for this bug is
> >> merged
> >> into any tree, please reply to this email with:
> >> #syz fix: exact-commit-title
This only happened 3 times, with no reproducer, and last occurred 105 days ago
(Jan 23, on commit 2310035fa03f6). It looks *very* similar to
"KASAN: use-after-free Read in get_work_pool"
(https://syzkaller.appspot.com/bug?extid=ea75c0ffcd353d32515f064aaebefc5279e6161e)
which was fixed by commit 2cc683e88c0c99
("kcm: lock lower socket in kcm_attach"),
so I'll make an educated guess and say this one was fixed by that too...
#syz fix: kcm: lock lower socket in kcm_attach
- Eric
^ permalink raw reply
* Re: BUG: spinlock bad magic in tun_do_read
From: Cong Wang @ 2018-05-09 2:50 UTC (permalink / raw)
To: Eric Dumazet
Cc: syzbot, David Miller, Eric Dumazet, Jason Wang, LKML,
Michael S. Tsirkin, Linux Kernel Network Developers, Petar Penkov,
Sabrina Dubroca, syzkaller-bugs
In-Reply-To: <997afbee-1e89-aa5b-5b15-cad5a073cc38@gmail.com>
On Mon, May 7, 2018 at 11:04 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
> On 05/07/2018 10:54 PM, Cong Wang wrote:
>>
>> Yeah, we should return early before hitting this uninitialized ptr ring...
>> Something like:
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index ef33950a45d9..638c87a95247 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -2128,6 +2128,9 @@ static void *tun_ring_recv(struct tun_file
>> *tfile, int noblock, int *err)
>> void *ptr = NULL;
>> int error = 0;
>>
>> + if (!tfile->tx_ring.queue)
>> + goto out;
>> +
>>
>> Or, checking if tun is detached...
>>
>>
>
> tx_ring was properly initialized when first ptr_ring_consume() at line 2131 was attempted.
>
> The bug happens later at line 2143 , after a schedule() call, line 2155
>
> So a single check at function prologue wont solve the case the thread had to sleep,
> then some uninit happened.
Very good point. RTNL lock is supposed to protect cleanup path, but I don't
think we can acquire RTNL for tun_chr_read_iter() path...
^ permalink raw reply
* Re: [PATCH net] r8169: fix powering up RTL8168h
From: David Miller @ 2018-05-09 2:55 UTC (permalink / raw)
To: hkallweit1; +Cc: nic_swsd, ojab, netdev
In-Reply-To: <ff344a31-4cb8-1e49-7b5e-3a729125444b@gmail.com>
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 7 May 2018 21:11:21 +0200
> Since commit a92a08499b1f "r8169: improve runtime pm in general and
> suspend unused ports" interfaces w/o link are runtime-suspended after
> 10s. On systems where drivers take longer to load this can lead to the
> situation that the interface is runtime-suspended already when it's
> initially brought up.
> This shouldn't be a problem because rtl_open() resumes MAC/PHY.
> However with at least one chip version the interface doesn't properly
> come up, as reported here:
> https://bugzilla.kernel.org/show_bug.cgi?id=199549
>
> The vendor driver uses a delay to give certain chip versions some
> time to resume before starting the PHY configuration. So let's do
> the same. I don't know which chip versions may be affected,
> therefore apply this delay always.
>
> This patch was reported to fix the issue for RTL8168h.
> I was able to reproduce the issue on an Asus H310I-Plus which also
> uses a RTL8168h. Also in my case the patch fixed the issue.
>
> Reported-by: Slava Kardakov <ojab@ojab.ru>
> Tested-by: Slava Kardakov <ojab@ojab.ru>
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Applied and queued up for -stable.
> This patch will not apply to net-next as it conflicts with other
> changes which have been done in the meantime. So I'll send a
> separate patch for net-next.
That's fine, I'll deal with it when I next merge net into net-next.
Sending another copy of this patch for net-next is not the way to deal
with this. Just make me aware of the impending complict and I will
resolve it when I see it.
Thanks.
^ permalink raw reply
* [PATCH net-next] net: dsa: fix added_by_user switchdev notification
From: Vivien Didelot @ 2018-05-09 3:03 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, kernel, Vivien Didelot, Petr Machata, jiri, idosch,
ivecera, davem, stephen, andrew, f.fainelli, nikolay, bridge
Commit 161d82de1ff8 ("net: bridge: Notify about !added_by_user FDB
entries") causes the below oops when bringing up a slave interface,
because dsa_port_fdb_add is still scheduled, but with a NULL address.
To fix this, keep the dsa_slave_switchdev_event function agnostic of the
notified info structure and handle the added_by_user flag in the
specific dsa_slave_switchdev_event_work function.
[ 75.512263] Unable to handle kernel NULL pointer dereference at virtual address 00000000
[ 75.519063] pgd = (ptrval)
[ 75.520545] [00000000] *pgd=00000000
[ 75.522839] Internal error: Oops: 17 [#1] ARM
[ 75.525898] Modules linked in:
[ 75.527673] CPU: 0 PID: 9 Comm: kworker/u2:1 Not tainted 4.17.0-rc2 #78
[ 75.532988] Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree)
[ 75.538153] Workqueue: dsa_ordered dsa_slave_switchdev_event_work
[ 75.542970] PC is at mv88e6xxx_port_db_load_purge+0x60/0x1b0
[ 75.547341] LR is at mdiobus_read_nested+0x6c/0x78
[ 75.550833] pc : [<804cd5c0>] lr : [<804bba84>] psr: 60070013
[ 75.555796] sp : 9f54bd78 ip : 9f54bd87 fp : 9f54bddc
[ 75.559719] r10: 00000000 r9 : 0000000e r8 : 9f6a6010
[ 75.563643] r7 : 00000000 r6 : 81203048 r5 : 9f6a6010 r4 : 9f6a601c
[ 75.568867] r3 : 00000000 r2 : 00000000 r1 : 0000000d r0 : 00000000
[ 75.574094] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
[ 75.579933] Control: 10c53c7d Table: 9de20059 DAC: 00000051
[ 75.584384] Process kworker/u2:1 (pid: 9, stack limit = 0x(ptrval))
[ 75.589349] Stack: (0x9f54bd78 to 0x9f54c000)
[ 75.592406] bd60: 00000000 00000000
[ 75.599295] bd80: 00000391 9f299d10 9f299d68 8014317c 9f7f0000 8120af00 00006dc2 00000000
[ 75.606186] bda0: 8120af00 00000000 9f54bdec 1c9f5d92 8014317c 9f6a601c 9f6a6010 00000000
[ 75.613076] bdc0: 00000000 00000000 9dd1141c 8125a0b4 9f54be0c 9f54bde0 804cd8a8 804cd56c
[ 75.619966] bde0: 0000000e 80143680 00000001 9dce9c1c 81203048 9dce9c10 00000003 00000000
[ 75.626858] be00: 9f54be5c 9f54be10 806abcac 804cd864 9f54be54 80143664 8014317c 80143054
[ 75.633748] be20: ffcaa81d 00000000 812030b0 1c9f5d92 00000000 81203048 9f54beb4 00000003
[ 75.640639] be40: ffffffff 00000000 9dd1141c 8125a0b4 9f54be84 9f54be60 80138e98 806abb18
[ 75.647529] be60: 81203048 9ddc4000 9dce9c54 9f72a300 00000000 00000000 9f54be9c 9f54be88
[ 75.654420] be80: 801390bc 80138e50 00000000 9dce9c54 9f54beac 9f54bea0 806a9524 801390a0
[ 75.661310] bea0: 9f54bedc 9f54beb0 806a9c7c 806a950c 9f54becc 00000000 00000000 00000000
[ 75.668201] bec0: 9f540000 1c9f5d92 805fe604 9ddffc00 9f54befc 9f54bee0 806ab228 806a9c38
[ 75.675092] bee0: 806ab178 9ddffc00 9f4c1900 9f40d200 9f54bf34 9f54bf00 80131e30 806ab184
[ 75.681983] bf00: 9f40d214 9f54a038 9f40d200 9f40d200 9f4c1918 812119a0 9f40d214 9f54a038
[ 75.688873] bf20: 9f40d200 9f4c1900 9f54bf7c 9f54bf38 80132124 80131d1c 9f5f2dd8 00000000
[ 75.695764] bf40: 812119a0 9f54a038 812119a0 81259c5b 9f5f2dd8 9f5f2dc0 9f53dbc0 00000000
[ 75.702655] bf60: 9f4c1900 801320b4 9f5f2dd8 9f4f7e88 9f54bfac 9f54bf80 80137ad0 801320c0
[ 75.709544] bf80: 9f54a000 9f53dbc0 801379a0 00000000 00000000 00000000 00000000 00000000
[ 75.716434] bfa0: 00000000 9f54bfb0 801010e8 801379ac 00000000 00000000 00000000 00000000
[ 75.723324] bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[ 75.730206] bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000
[ 75.737083] Backtrace:
[ 75.738252] [<804cd560>] (mv88e6xxx_port_db_load_purge) from [<804cd8a8>] (mv88e6xxx_port_fdb_add+0x50/0x68)
[ 75.746795] r10:8125a0b4 r9:9dd1141c r8:00000000 r7:00000000 r6:00000000 r5:9f6a6010
[ 75.753323] r4:9f6a601c
[ 75.754570] [<804cd858>] (mv88e6xxx_port_fdb_add) from [<806abcac>] (dsa_switch_event+0x1a0/0x660)
[ 75.762238] r8:00000000 r7:00000003 r6:9dce9c10 r5:81203048 r4:9dce9c1c
[ 75.767655] [<806abb0c>] (dsa_switch_event) from [<80138e98>] (notifier_call_chain+0x54/0x94)
[ 75.774893] r10:8125a0b4 r9:9dd1141c r8:00000000 r7:ffffffff r6:00000003 r5:9f54beb4
[ 75.781423] r4:81203048
[ 75.782672] [<80138e44>] (notifier_call_chain) from [<801390bc>] (raw_notifier_call_chain+0x28/0x30)
[ 75.790514] r9:00000000 r8:00000000 r7:9f72a300 r6:9dce9c54 r5:9ddc4000 r4:81203048
[ 75.796982] [<80139094>] (raw_notifier_call_chain) from [<806a9524>] (dsa_port_notify+0x24/0x38)
[ 75.804483] [<806a9500>] (dsa_port_notify) from [<806a9c7c>] (dsa_port_fdb_add+0x50/0x6c)
[ 75.811371] [<806a9c2c>] (dsa_port_fdb_add) from [<806ab228>] (dsa_slave_switchdev_event_work+0xb0/0x10c)
[ 75.819635] r4:9ddffc00
[ 75.820885] [<806ab178>] (dsa_slave_switchdev_event_work) from [<80131e30>] (process_one_work+0x120/0x3a4)
[ 75.829241] r6:9f40d200 r5:9f4c1900 r4:9ddffc00 r3:806ab178
[ 75.833612] [<80131d10>] (process_one_work) from [<80132124>] (worker_thread+0x70/0x574)
[ 75.840415] r10:9f4c1900 r9:9f40d200 r8:9f54a038 r7:9f40d214 r6:812119a0 r5:9f4c1918
[ 75.846945] r4:9f40d200
[ 75.848191] [<801320b4>] (worker_thread) from [<80137ad0>] (kthread+0x130/0x160)
[ 75.854300] r10:9f4f7e88 r9:9f5f2dd8 r8:801320b4 r7:9f4c1900 r6:00000000 r5:9f53dbc0
[ 75.860830] r4:9f5f2dc0
[ 75.862076] [<801379a0>] (kthread) from [<801010e8>] (ret_from_fork+0x14/0x2c)
[ 75.867999] Exception stack(0x9f54bfb0 to 0x9f54bff8)
[ 75.871753] bfa0: 00000000 00000000 00000000 00000000
[ 75.878640] bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[ 75.885519] bfe0: 00000000 00000000 00000000 00000000 00000013 00000000
[ 75.890844] r10:00000000 r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:801379a0
[ 75.897377] r4:9f53dbc0 r3:9f54a000
[ 75.899663] Code: e3a02000 e3a03000 e14b26f4 e24bc055 (e5973000)
[ 75.904575] ---[ end trace fbca818a124dbf0d ]---
Fixes: 816a3bed9549 ("switchdev: Add fdb.added_by_user to switchdev notifications")
Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
---
@petr I expect the same issue with Rocker, but I haven't tested it.
net/dsa/slave.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index c287f1ef964c..746ab428a17a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1395,6 +1395,9 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
fdb_info = &switchdev_work->fdb_info;
+ if (!fdb_info->added_by_user)
+ break;
+
err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid);
if (err) {
netdev_dbg(dev, "fdb add failed err=%d\n", err);
@@ -1406,6 +1409,9 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
case SWITCHDEV_FDB_DEL_TO_DEVICE:
fdb_info = &switchdev_work->fdb_info;
+ if (!fdb_info->added_by_user)
+ break;
+
err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid);
if (err) {
netdev_dbg(dev, "fdb del failed err=%d\n", err);
@@ -1441,7 +1447,6 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
- struct switchdev_notifier_fdb_info *fdb_info = ptr;
struct dsa_switchdev_event_work *switchdev_work;
if (!dsa_slave_dev_check(dev))
@@ -1459,10 +1464,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
switch (event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- if (!fdb_info->added_by_user)
- break;
- if (dsa_slave_switchdev_fdb_work_init(switchdev_work,
- fdb_info))
+ if (dsa_slave_switchdev_fdb_work_init(switchdev_work, ptr))
goto err_fdb_work_init;
dev_hold(dev);
break;
--
2.17.0
^ permalink raw reply related
* Re: BUG: spinlock bad magic in tun_do_read
From: Jason Wang @ 2018-05-09 3:38 UTC (permalink / raw)
To: Cong Wang, Eric Dumazet
Cc: syzbot, David Miller, Eric Dumazet, LKML, Michael S. Tsirkin,
Linux Kernel Network Developers, Petar Penkov, Sabrina Dubroca,
syzkaller-bugs
In-Reply-To: <CAM_iQpW+vrecKuWTJ+zHt11ge7xPRWt=4GmZJxJ5Ffp_awV-ag@mail.gmail.com>
On 2018年05月09日 10:50, Cong Wang wrote:
> On Mon, May 7, 2018 at 11:04 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>>
>> On 05/07/2018 10:54 PM, Cong Wang wrote:
>>> Yeah, we should return early before hitting this uninitialized ptr ring...
>>> Something like:
>>>
>>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>>> index ef33950a45d9..638c87a95247 100644
>>> --- a/drivers/net/tun.c
>>> +++ b/drivers/net/tun.c
>>> @@ -2128,6 +2128,9 @@ static void *tun_ring_recv(struct tun_file
>>> *tfile, int noblock, int *err)
>>> void *ptr = NULL;
>>> int error = 0;
>>>
>>> + if (!tfile->tx_ring.queue)
>>> + goto out;
>>> +
>>>
>>> Or, checking if tun is detached...
>>>
>>>
>> tx_ring was properly initialized when first ptr_ring_consume() at line 2131 was attempted.
>>
>> The bug happens later at line 2143 , after a schedule() call, line 2155
>>
>> So a single check at function prologue wont solve the case the thread had to sleep,
>> then some uninit happened.
>
> Very good point. RTNL lock is supposed to protect cleanup path, but I don't
> think we can acquire RTNL for tun_chr_read_iter() path...
I think the root cause is we try to initialize ptr ring during TUNSETIFF
since the length depends on the dev->tx_queue_len and try to destroy it
when device is gone. We can solve this by initializing a zero size
ptr_ring during open() and resize if necessary. Then there no need for
any workaround like memset and checking against NULL.
Let me try to cook a patch for this.
Thanks
^ permalink raw reply
* Re: [RFC v3 4/5] virtio_ring: add event idx support in packed ring
From: Jason Wang @ 2018-05-09 3:43 UTC (permalink / raw)
To: Tiwei Bie
Cc: Michael S. Tsirkin, virtualization, linux-kernel, netdev, wexu,
jfreimann
In-Reply-To: <20180508094406.qjlaism3hqy4hvjd@debian>
On 2018年05月08日 17:44, Tiwei Bie wrote:
> On Tue, May 08, 2018 at 05:34:40PM +0800, Jason Wang wrote:
>> On 2018年05月08日 17:16, Tiwei Bie wrote:
>>> On Tue, May 08, 2018 at 03:16:53PM +0800, Jason Wang wrote:
>>>> On 2018年05月08日 14:44, Tiwei Bie wrote:
>>>>> On Tue, May 08, 2018 at 01:40:40PM +0800, Jason Wang wrote:
>>>>>> On 2018年05月08日 11:05, Jason Wang wrote:
>>>>>>>> Because in virtqueue_enable_cb_delayed(), we may set an
>>>>>>>> event_off which is bigger than new and both of them have
>>>>>>>> wrapped. And in this case, although new is smaller than
>>>>>>>> event_off (i.e. the third param -- old), new shouldn't
>>>>>>>> add vq->num, and actually we are expecting a very big
>>>>>>>> idx diff.
>>>>>>> Yes, so to calculate distance correctly between event and new, we just
>>>>>>> need to compare the warp counter and return false if it doesn't match
>>>>>>> without the need to try to add vq.num here.
>>>>>>>
>>>>>>> Thanks
>>>>>> Sorry, looks like the following should work, we need add vq.num if
>>>>>> used_wrap_counter does not match:
>>>>>>
>>>>>> static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
>>>>>> __u16 off_wrap, __u16 new,
>>>>>> __u16 old)
>>>>>> {
>>>>>> bool wrap = off_wrap >> 15;
>>>>>> int off = off_wrap & ~(1 << 15);
>>>>>> __u16 d1, d2;
>>>>>>
>>>>>> if (wrap != vq->used_wrap_counter)
>>>>>> d1 = new + vq->num - off - 1;
>>>>> Just to draw your attention (maybe you have already
>>>>> noticed this).
>>>> I miss this, thanks!
>>>>
>>>>> In this case (i.e. wrap != vq->used_wrap_counter),
>>>>> it's also possible that (off < new) is true. Because,
>>>>>
>>>>> when virtqueue_enable_cb_delayed_packed() is used,
>>>>> `off` is calculated in driver in a way like this:
>>>>>
>>>>> off = vq->last_used_idx + bufs;
>>>>> if (off >= vq->vring_packed.num) {
>>>>> off -= vq->vring_packed.num;
>>>>> wrap_counter ^= 1;
>>>>> }
>>>>>
>>>>> And when `new` (in vhost) is close to vq->num. The
>>>>> vq->last_used_idx + bufs (in driver) can be bigger
>>>>> than vq->vring_packed.num, and:
>>>>>
>>>>> 1. `off` will wrap;
>>>>> 2. wrap counters won't match;
>>>>> 3. off < new;
>>>>>
>>>>> And d1 (i.e. new + vq->num - off - 1) will be a value
>>>>> bigger than vq->num. I'm okay with this, although it's
>>>>> a bit weird.
>>>> So I'm considering something more compact by reusing vring_need_event() by
>>>> pretending a larger queue size and adding vq->num back when necessary:
>>>>
>>>> static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
>>>> __u16 off_wrap, __u16 new,
>>>> __u16 old)
>>>> {
>>>> bool wrap = vq->used_wrap_counter;
>>> If the wrap counter is obtained from the vq,
>>> I think `new` should also be obtained from
>>> the vq. Or the wrap counter should be carried
>>> in `new`.
>>>
>>>> int off = off_wrap & ~(1 << 15);
>>>> __u16 d1, d2;
>>>>
>>>> if (new < old) {
>>>> new += vq->num;
>>>> wrap ^= 1;
>>>> }
>>>>
>>>> if (wrap != off_wrap >> 15)
>>>> off += vq->num;
>>> When `new` and `old` wraps, and `off` doesn't wrap,
>>> wrap != (off_wrap >> 15) will be true. In this case,
>>> `off` is bigger than `new`, and what we should do
>>> is `off -= vq->num` instead of `off += vq->num`.
>> If I understand this correctly, if we track old correctly, it won't happen
>> if guest driver behave correctly. That means it should only happen for a
>> buggy driver (e.g trying to move off_wrap back).
> If vhost is faster than virtio driver, I guess above
> case may happen. The `old` and `new` will be updated
> each time we want to notify the driver. If the driver
> is slower, `old` and `new` in vhost may wrap before
> the `off` which is set by driver wraps.
>
> Best regards,
> Tiwei Bie
>
Oh, right.
But the code still work (in this case new - event_idx - 1 will
underflow). (And I admit it still looks ugly).
Thanks
^ permalink raw reply
* linux-next: manual merge of the net-next tree with the net tree
From: Stephen Rothwell @ 2018-05-09 4:12 UTC (permalink / raw)
To: David Miller, Networking
Cc: Linux-Next Mailing List, Linux Kernel Mailing List, Eric Dumazet,
Boris Pismenny
[-- Attachment #1: Type: text/plain, Size: 1454 bytes --]
Hi all,
Today's linux-next merge of the net-next tree got a conflict in:
net/tls/tls_main.c
between commit:
98f0a39529e5 ("tls: fix use after free in tls_sk_proto_close")
from the net tree and commit:
f66de3ee2c16 ("net/tls: Split conf to rx + tx")
from the net-next tree.
I fixed it up (I think - see below) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging. You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.
--
Cheers,
Stephen Rothwell
diff --cc net/tls/tls_main.c
index 20cd93be6236,4b57ddd72f34..000000000000
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@@ -254,8 -252,12 +254,9 @@@ static void tls_sk_proto_close(struct s
lock_sock(sk);
sk_proto_close = ctx->sk_proto_close;
- if (ctx->conf == TLS_BASE || ctx->conf == TLS_HW_RECORD) {
- if (ctx->tx_conf == TLS_HW_RECORD && ctx->rx_conf == TLS_HW_RECORD)
- goto skip_tx_cleanup;
-
- if (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE) {
- kfree(ctx);
- ctx = NULL;
++ if ((ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE) ||
++ (ctx->tx_conf == TLS_HW_RECORD && ctx->rx_conf == TLS_HW_RECORD)) {
+ free_ctx = true;
goto skip_tx_cleanup;
}
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply
* linux-next: manual merge of the net-next tree with the net tree
From: Stephen Rothwell @ 2018-05-09 4:19 UTC (permalink / raw)
To: David Miller, Networking
Cc: Linux-Next Mailing List, Linux Kernel Mailing List, Anders Roxell
[-- Attachment #1: Type: text/plain, Size: 1715 bytes --]
Hi all,
Today's linux-next merge of the net-next tree got a conflict in:
tools/testing/selftests/net/Makefile
between commit:
1751eb42ddb5 ("selftests: net: use TEST_PROGS_EXTENDED")
from the net tree and commits:
a7b15ab887e5 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net")
from the net-next tree.
I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging. You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.
--
Cheers,
Stephen Rothwell
diff --cc tools/testing/selftests/net/Makefile
index 3ff81a478dbe,73af45773938..000000000000
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@@ -5,10 -5,13 +5,13 @@@ CFLAGS = -Wall -Wl,--no-as-needed -O2
CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
- TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh
+ TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh udpgso.sh
+ TEST_PROGS += udpgso_bench.sh
-TEST_GEN_PROGS_EXTENDED := in_netns.sh
+TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+ TEST_GEN_FILES += tcp_mmap tcp_inq
+ TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply
* Re: linux-next: manual merge of the bpf-next tree with the s390 tree
From: Stephen Rothwell @ 2018-05-09 4:21 UTC (permalink / raw)
To: Networking, Martin Schwidefsky, Heiko Carstens, David Miller
Cc: Daniel Borkmann, Alexei Starovoitov, Linux-Next Mailing List,
Linux Kernel Mailing List
In-Reply-To: <20180508102638.1e19b7f2@canb.auug.org.au>
[-- Attachment #1: Type: text/plain, Size: 936 bytes --]
Hi all,
On Tue, 8 May 2018 10:26:38 +1000 Stephen Rothwell <sfr@canb.auug.org.au> wrote:
>
> Today's linux-next merge of the bpf-next tree got a conflict in:
>
> arch/s390/net/bpf_jit.S
>
> between commit:
>
> de5cb6eb514e ("s390: use expoline thunks in the BPF JIT")
>
> from the s390 tree and commit:
>
> e1cf4befa297 ("bpf, s390x: remove ld_abs/ld_ind")
>
> from the bpf-next tree.
>
> I fixed it up (I just removed the file as the latter does) and can
> carry the fix as necessary. This is now fixed as far as linux-next is
> concerned, but any non trivial conflicts should be mentioned to your
> upstream maintainer when your tree is submitted for merging. You may
> also want to consider cooperating with the maintainer of the conflicting
> tree to minimise any particularly complex conflicts.
This is now a conflict between the net-next and s390 trees.
--
Cheers,
Stephen Rothwell
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply
* Re: KASAN: use-after-free Read in ip6_xmit
From: Eric Biggers @ 2018-05-09 4:38 UTC (permalink / raw)
To: syzbot
Cc: davem, kuznet, linux-kernel, netdev, syzkaller-bugs, yoshfuji,
Boris Pismenny
In-Reply-To: <001a113ee06a9a5d3b0561f1342d@google.com>
On Thu, Jan 04, 2018 at 02:58:01AM -0800, syzbot wrote:
> Hello,
>
> syzkaller hit the following crash on
> 0e08c463db387a2adcb0243b15ab868a73f87807
> git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
> compiler: gcc (GCC) 7.1.1 20170620
> .config is attached
> Raw console output is attached.
> C reproducer is attached
> syzkaller reproducer is attached. See https://goo.gl/kgGztJ
> for information about syzkaller reproducers
>
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+56029fd3642567f395f0@syzkaller.appspotmail.com
> It will help syzbot understand when the bug is fixed. See footer for
> details.
> If you forward the report, please keep this part and the footer.
>
> audit: type=1400 audit(1514737122.010:7): avc: denied { map } for
> pid=3153 comm="syzkaller920384" path="/root/syzkaller920384627" dev="sda1"
> ino=16481 scontext=unconfined_u:system_r:insmod_t:s0-s0:c0.c1023
> tcontext=unconfined_u:object_r:user_home_t:s0 tclass=file permissive=1
> ==================================================================
> BUG: KASAN: use-after-free in ip6_dst_idev include/net/ip6_fib.h:189
> [inline]
> BUG: KASAN: use-after-free in ip6_xmit+0x1f92/0x1fc0
> net/ipv6/ip6_output.c:248
> Read of size 8 at addr ffff8801ca6f9f18 by task syzkaller920384/3153
>
> CPU: 1 PID: 3153 Comm: syzkaller920384 Not tainted 4.15.0-rc4-next-20171221+
> #78
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
> __dump_stack lib/dump_stack.c:17 [inline]
> dump_stack+0x194/0x257 lib/dump_stack.c:53
> print_address_description+0x73/0x250 mm/kasan/report.c:252
> kasan_report_error mm/kasan/report.c:351 [inline]
> kasan_report+0x25b/0x340 mm/kasan/report.c:409
> __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430
> ip6_dst_idev include/net/ip6_fib.h:189 [inline]
> ip6_xmit+0x1f92/0x1fc0 net/ipv6/ip6_output.c:248
> inet6_csk_xmit+0x2fc/0x580 net/ipv6/inet6_connection_sock.c:139
> tcp_transmit_skb+0x1b12/0x38b0 net/ipv4/tcp_output.c:1176
> tcp_send_syn_data net/ipv4/tcp_output.c:3456 [inline]
> tcp_connect+0x1ed5/0x4090 net/ipv4/tcp_output.c:3495
> tcp_v4_connect+0x15ef/0x1e70 net/ipv4/tcp_ipv4.c:257
> __inet_stream_connect+0x2d4/0xf00 net/ipv4/af_inet.c:620
> tcp_sendmsg_fastopen net/ipv4/tcp.c:1167 [inline]
> tcp_sendmsg_locked+0x27e4/0x3b30 net/ipv4/tcp.c:1212
> tcp_sendmsg+0x2f/0x50 net/ipv4/tcp.c:1459
> inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:764
> sock_sendmsg_nosec net/socket.c:628 [inline]
> sock_sendmsg+0xca/0x110 net/socket.c:638
> SYSC_sendto+0x361/0x5c0 net/socket.c:1719
> SyS_sendto+0x40/0x50 net/socket.c:1687
> entry_SYSCALL_64_fastpath+0x1f/0x96
> RIP: 0033:0x43fda9
> RSP: 002b:00007ffc9b8bd818 EFLAGS: 00000217 ORIG_RAX: 000000000000002c
> RAX: ffffffffffffffda RBX: ffffffffffffffff RCX: 000000000043fda9
> RDX: 0000000000000000 RSI: 0000000020aa1000 RDI: 0000000000000003
> RBP: 00000000006ca018 R08: 0000000020aa1000 R09: 0000000000000010
> R10: 0000000023ffffff R11: 0000000000000217 R12: 0000000000401710
> R13: 00000000004017a0 R14: 0000000000000000 R15: 0000000000000000
>
> Allocated by task 3140:
> save_stack+0x43/0xd0 mm/kasan/kasan.c:447
> set_track mm/kasan/kasan.c:459 [inline]
> kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
> kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489
> kmem_cache_alloc+0x12e/0x760 mm/slab.c:3545
> dst_alloc+0x11f/0x1a0 net/core/dst.c:104
> rt_dst_alloc+0xe9/0x520 net/ipv4/route.c:1500
> __mkroute_output net/ipv4/route.c:2242 [inline]
> ip_route_output_key_hash_rcu+0xa40/0x2c10 net/ipv4/route.c:2470
> ip_route_output_key_hash+0x20b/0x370 net/ipv4/route.c:2299
> __ip_route_output_key include/net/route.h:125 [inline]
> ip_route_connect include/net/route.h:300 [inline]
> __ip4_datagram_connect+0xa67/0x1240 net/ipv4/datagram.c:51
> __ip6_datagram_connect+0x709/0xf90 net/ipv6/datagram.c:157
> ip6_datagram_connect+0x2f/0x50 net/ipv6/datagram.c:274
> inet_dgram_connect+0x16b/0x1f0 net/ipv4/af_inet.c:542
> SYSC_connect+0x213/0x4a0 net/socket.c:1611
> SyS_connect+0x24/0x30 net/socket.c:1592
> entry_SYSCALL_64_fastpath+0x1f/0x96
>
> Freed by task 0:
> save_stack+0x43/0xd0 mm/kasan/kasan.c:447
> set_track mm/kasan/kasan.c:459 [inline]
> kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
> __cache_free mm/slab.c:3489 [inline]
> kmem_cache_free+0x83/0x2a0 mm/slab.c:3747
> dst_destroy+0x219/0x310 net/core/dst.c:140
> dst_destroy_rcu+0x16/0x20 net/core/dst.c:153
> __rcu_reclaim kernel/rcu/rcu.h:172 [inline]
> rcu_do_batch kernel/rcu/tree.c:2675 [inline]
> invoke_rcu_callbacks kernel/rcu/tree.c:2934 [inline]
> __rcu_process_callbacks kernel/rcu/tree.c:2901 [inline]
> rcu_process_callbacks+0xd6c/0x17f0 kernel/rcu/tree.c:2918
> __do_softirq+0x2d7/0xb85 kernel/softirq.c:285
>
> The buggy address belongs to the object at ffff8801ca6f9f00
> which belongs to the cache ip_dst_cache of size 168
> The buggy address is located 24 bytes inside of
> 168-byte region [ffff8801ca6f9f00, ffff8801ca6f9fa8)
> The buggy address belongs to the page:
> page:00000000637e5443 count:1 mapcount:0 mapping:0000000000ddf2d5
> index:0xffff8801ca6f9000
> flags: 0x2fffc0000000100(slab)
> raw: 02fffc0000000100 ffff8801ca6f9000 ffff8801ca6f9000 000000010000000a
> raw: ffff8801d794f138 ffffea0007515320 ffff8801d6d724c0 0000000000000000
> page dumped because: kasan: bad access detected
>
> Memory state around the buggy address:
> ffff8801ca6f9e00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> ffff8801ca6f9e80: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc
> > ffff8801ca6f9f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> ^
> ffff8801ca6f9f80: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc
> ffff8801ca6fa000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> ==================================================================
>
>
> ---
> This bug is generated by a dumb bot. It may contain errors.
> See https://goo.gl/tpsmEJ for details.
> Direct all questions to syzkaller@googlegroups.com.
>
> syzbot will keep track of this bug report.
> If you forgot to add the Reported-by tag, once the fix for this bug is
> merged
> into any tree, please reply to this email with:
> #syz fix: exact-commit-title
No longer reproducible, seems to have first fixed by commit d91c3e17f75f21
("net/tls: Only attach to sockets in ESTABLISHED state"), but commit
c113187d38ff85d ("tls: Use correct sk->sk_prot for IPV6") also appears related.
That fixes it too if I revert the first commit, and the KASAN report indicates
that an 'struct rtable' (IPv4-related) was accessed as if it were an
'struct rt6_info' (IPv6-related). So telling syzbot:
#syz fix: tls: Use correct sk->sk_prot for IPV6
I also had simplified this reproducer a bit, so I'm pasting it below in case
anyone still wants it:
#include <netinet/in.h>
#include <unistd.h>
#define SOL_TCP 6
#define TCP_ULP 31
int main()
{
int tcp_fd;
struct sockaddr_in addr = {
.sin_family = AF_INET,
.sin_port = htobe16(0x4e22),
.sin_addr = { htobe32(0x7f000001) }
};
tcp_fd = socket(AF_INET6, SOCK_STREAM, 0);
setsockopt(tcp_fd, SOL_TCP, TCP_ULP, "tls", 4);
sendto(tcp_fd, NULL, 0, MSG_FASTOPEN, (void *)&addr, sizeof(addr));
}
- Eric
^ permalink raw reply
* Re: KASAN: out-of-bounds Read in ip6_xmit
From: Eric Biggers @ 2018-05-09 4:45 UTC (permalink / raw)
To: syzbot
Cc: davem, kuznet, linux-kernel, netdev, syzkaller-bugs, yoshfuji,
Paolo Abeni
In-Reply-To: <001a11433bee67ed4c0563db12f9@google.com>
On Sun, Jan 28, 2018 at 11:24:01AM -0800, syzbot wrote:
> Hello,
>
> syzbot hit the following crash on net-next commit
> 6bb46bc57c8e9ce947cc605e555b7204b44d2b10 (Fri Jan 26 16:00:23 2018 +0000)
> Merge branch 'cxgb4-fix-dump-collection-when-firmware-crashed'
>
> Unfortunately, I don't have any reproducer for this crash yet.
> Raw console output is attached.
> compiler: gcc (GCC) 7.1.1 20170620
> .config is attached.
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+c8e66da874feb11aaca6@syzkaller.appspotmail.com
> It will help syzbot understand when the bug is fixed. See footer for
> details.
> If you forward the report, please keep this part and the footer.
>
> ==================================================================
> BUG: KASAN: out-of-bounds in ip6_dst_idev include/net/ip6_fib.h:192 [inline]
> BUG: KASAN: out-of-bounds in ip6_xmit+0x1f76/0x2260
> net/ipv6/ip6_output.c:264
> Read of size 8 at addr ffff8801bcc8cc18 by task syz-executor2/11459
>
> CPU: 0 PID: 11459 Comm: syz-executor2 Not tainted 4.15.0-rc9+ #212
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
> __dump_stack lib/dump_stack.c:17 [inline]
> dump_stack+0x194/0x257 lib/dump_stack.c:53
> print_address_description+0x73/0x250 mm/kasan/report.c:252
> kasan_report_error mm/kasan/report.c:351 [inline]
> kasan_report+0x25b/0x340 mm/kasan/report.c:409
> __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430
> ip6_dst_idev include/net/ip6_fib.h:192 [inline]
> ip6_xmit+0x1f76/0x2260 net/ipv6/ip6_output.c:264
> inet6_csk_xmit+0x2fc/0x580 net/ipv6/inet6_connection_sock.c:139
> l2tp_xmit_core net/l2tp/l2tp_core.c:1096 [inline]
> l2tp_xmit_skb+0x105f/0x1410 net/l2tp/l2tp_core.c:1191
> pppol2tp_sendmsg+0x470/0x670 net/l2tp/l2tp_ppp.c:341
> sock_sendmsg_nosec net/socket.c:630 [inline]
> sock_sendmsg+0xca/0x110 net/socket.c:640
> ___sys_sendmsg+0x767/0x8b0 net/socket.c:2046
> __sys_sendmsg+0xe5/0x210 net/socket.c:2080
> SYSC_sendmsg net/socket.c:2091 [inline]
> SyS_sendmsg+0x2d/0x50 net/socket.c:2087
> entry_SYSCALL_64_fastpath+0x29/0xa0
> RIP: 0033:0x453299
> RSP: 002b:00007fcfef194c58 EFLAGS: 00000212 ORIG_RAX: 000000000000002e
> RAX: ffffffffffffffda RBX: 000000000071bf58 RCX: 0000000000453299
> RDX: 0000000000000081 RSI: 000000002037ffc8 RDI: 0000000000000014
> RBP: 000000000000036f R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000212 R12: 00000000006f4308
> R13: 00000000ffffffff R14: 00007fcfef1956d4 R15: 000000000000000b
>
> Allocated by task 11466:
> save_stack+0x43/0xd0 mm/kasan/kasan.c:447
> set_track mm/kasan/kasan.c:459 [inline]
> kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
> kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489
> kmem_cache_alloc+0x12e/0x760 mm/slab.c:3544
> dst_alloc+0x11f/0x1a0 net/core/dst.c:104
> rt_dst_alloc+0xe9/0x520 net/ipv4/route.c:1497
> ip_route_input_slow net/ipv4/route.c:2006 [inline]
> ip_route_input_rcu+0x1076/0x3200 net/ipv4/route.c:2137
> ip_route_input_noref+0xf5/0x1e0 net/ipv4/route.c:2083
> ip_rcv_finish+0x3a6/0x2040 net/ipv4/ip_input.c:348
> NF_HOOK include/linux/netfilter.h:288 [inline]
> ip_rcv+0xc5a/0x1840 net/ipv4/ip_input.c:493
> __netif_receive_skb_core+0x1a41/0x3460 net/core/dev.c:4547
> __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4612
> netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4686
> netif_receive_skb+0xae/0x390 net/core/dev.c:4710
> tun_rx_batched.isra.53+0x5ee/0x870 drivers/net/tun.c:1558
> tun_get_user+0x25de/0x3940 drivers/net/tun.c:1958
> tun_chr_write_iter+0xb9/0x160 drivers/net/tun.c:1986
> call_write_iter include/linux/fs.h:1772 [inline]
> do_iter_readv_writev+0x525/0x7f0 fs/read_write.c:653
> do_iter_write+0x154/0x540 fs/read_write.c:932
> vfs_writev+0x18a/0x340 fs/read_write.c:977
> do_writev+0xfc/0x2a0 fs/read_write.c:1012
> SYSC_writev fs/read_write.c:1085 [inline]
> SyS_writev+0x27/0x30 fs/read_write.c:1082
> entry_SYSCALL_64_fastpath+0x29/0xa0
>
> Freed by task 7176:
> save_stack+0x43/0xd0 mm/kasan/kasan.c:447
> set_track mm/kasan/kasan.c:459 [inline]
> kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
> __cache_free mm/slab.c:3488 [inline]
> kmem_cache_free+0x83/0x2a0 mm/slab.c:3746
> dst_destroy+0x257/0x370 net/core/dst.c:140
> dst_destroy_rcu+0x16/0x20 net/core/dst.c:153
> __rcu_reclaim kernel/rcu/rcu.h:195 [inline]
> rcu_do_batch kernel/rcu/tree.c:2758 [inline]
> invoke_rcu_callbacks kernel/rcu/tree.c:3012 [inline]
> __rcu_process_callbacks kernel/rcu/tree.c:2979 [inline]
> rcu_process_callbacks+0xd6c/0x17f0 kernel/rcu/tree.c:2996
> __do_softirq+0x2d7/0xb85 kernel/softirq.c:285
>
> The buggy address belongs to the object at ffff8801bcc8cc00
> which belongs to the cache ip_dst_cache of size 168
> The buggy address is located 24 bytes inside of
> 168-byte region [ffff8801bcc8cc00, ffff8801bcc8cca8)
> The buggy address belongs to the page:
> page:ffffea0006f32300 count:1 mapcount:0 mapping:ffff8801bcc8c000 index:0x0
> flags: 0x2fffc0000000100(slab)
> raw: 02fffc0000000100 ffff8801bcc8c000 0000000000000000 0000000100000010
> raw: ffffea00074da720 ffffea000743cb20 ffff8801d6fe34c0 0000000000000000
> page dumped because: kasan: bad access detected
>
> Memory state around the buggy address:
> ffff8801bcc8cb00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> ffff8801bcc8cb80: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc
> > ffff8801bcc8cc00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> ^
> ffff8801bcc8cc80: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc
> ffff8801bcc8cd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> ==================================================================
>
>
> ---
> This bug is generated by a dumb bot. It may contain errors.
> See https://goo.gl/tpsmEJ for details.
> Direct all questions to syzkaller@googlegroups.com.
>
> syzbot will keep track of this bug report.
> If you forgot to add the Reported-by tag, once the fix for this bug is
> merged
> into any tree, please reply to this email with:
> #syz fix: exact-commit-title
No reproducer and last occurred 58 days ago (on commit f44b1886a5f876c8).
Probably was fixed by commit b954f94023dcc61:
#syz fix: l2tp: fix races with ipv4-mapped ipv6 addresses
- Eric
^ permalink raw reply
* linux-next: build warning after merge of the bpf-next tree
From: Stephen Rothwell @ 2018-05-09 4:49 UTC (permalink / raw)
To: Daniel Borkmann, Alexei Starovoitov, Networking
Cc: Linux-Next Mailing List, Linux Kernel Mailing List, David Ahern
[-- Attachment #1: Type: text/plain, Size: 2854 bytes --]
Hi all,
After merging the bpf-next tree, today's linux-next build (x86_64
allmodconfig) produced this warning:
In file included from include/linux/dma-mapping.h:5:0,
from include/linux/skbuff.h:34,
from include/linux/if_ether.h:23,
from include/uapi/linux/bpf.h:13,
from include/linux/bpf-cgroup.h:6,
from include/linux/cgroup-defs.h:22,
from include/linux/cgroup.h:28,
from include/linux/perf_event.h:57,
from include/linux/trace_events.h:10,
from include/trace/trace_events.h:20,
from include/trace/define_trace.h:96,
from drivers/android/binder_trace.h:387,
from drivers/android/binder.c:5702:
include/linux/sizes.h:24:0: warning: "SZ_1K" redefined
#define SZ_1K 0x00000400
drivers/android/binder.c:116:0: note: this is the location of the previous definition
#define SZ_1K 0x400
In file included from include/linux/dma-mapping.h:5:0,
from include/linux/skbuff.h:34,
from include/linux/if_ether.h:23,
from include/uapi/linux/bpf.h:13,
from include/linux/bpf-cgroup.h:6,
from include/linux/cgroup-defs.h:22,
from include/linux/cgroup.h:28,
from include/linux/perf_event.h:57,
from include/linux/trace_events.h:10,
from include/trace/trace_events.h:20,
from include/trace/define_trace.h:96,
from drivers/android/binder_trace.h:387,
from drivers/android/binder.c:5702:
include/linux/sizes.h:37:0: warning: "SZ_4M" redefined
#define SZ_4M 0x00400000
drivers/android/binder.c:120:0: note: this is the location of the previous definition
#define SZ_4M 0x400000
fs/ecryptfs/miscdev.c:206:0: warning: "PKT_TYPE_OFFSET" redefined
#define PKT_TYPE_OFFSET 0
In file included from include/linux/if_ether.h:23:0,
from include/uapi/linux/bpf.h:13,
from include/linux/bpf-cgroup.h:6,
from include/linux/cgroup-defs.h:22,
from include/linux/cgroup.h:28,
from include/linux/writeback.h:183,
from include/linux/backing-dev.h:16,
from fs/ecryptfs/ecryptfs_kernel.h:41,
from fs/ecryptfs/miscdev.c:30:
include/linux/skbuff.h:753:0: note: this is the location of the previous definition
#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset)
Introduced by commit
9c38f3c8b153 ("bpf: Provide helper to do forwarding lookups in kernel FIB table")
--
Cheers,
Stephen Rothwell
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply
* Re: BUG: please report to dccp@vger.kernel.org => prev = 0, last = 0 at net/dccp/ccids/lib/packet_history.c:LINE/tfrc_rx_his
From: Eric Biggers @ 2018-05-09 5:05 UTC (permalink / raw)
To: dccp, Gerrit Renker
Cc: syzbot, davem, garsilva, linux-kernel, netdev, syzkaller-bugs
In-Reply-To: <000000000000fedad9056b7f07ce@google.com>
On Sat, May 05, 2018 at 05:57:02PM -0700, syzbot wrote:
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit: c1c07416cdd4 Merge tag 'kbuild-fixes-v4.17' of git://git.k..
> git tree: upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=13d5de47800000
> kernel config: https://syzkaller.appspot.com/x/.config?x=5a1dc06635c10d27
> dashboard link: https://syzkaller.appspot.com/bug?extid=99858724c0ba555a12ea
> compiler: gcc (GCC) 8.0.1 20180413 (experimental)
> syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=170afde7800000
> C reproducer: https://syzkaller.appspot.com/x/repro.c?x=141b4be7800000
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+99858724c0ba555a12ea@syzkaller.appspotmail.com
>
> random: sshd: uninitialized urandom read (32 bytes read)
> random: sshd: uninitialized urandom read (32 bytes read)
> random: sshd: uninitialized urandom read (32 bytes read)
> random: sshd: uninitialized urandom read (32 bytes read)
> BUG: please report to dccp@vger.kernel.org => prev = 0, last = 0 at
> net/dccp/ccids/lib/packet_history.c:425/tfrc_rx_hist_sample_rtt()
> CPU: 0 PID: 4495 Comm: syz-executor551 Not tainted 4.17.0-rc3+ #34
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
> <IRQ>
> __dump_stack lib/dump_stack.c:77 [inline]
> dump_stack+0x1b9/0x294 lib/dump_stack.c:113
> tfrc_rx_hist_sample_rtt.cold.3+0x54/0x5c
> net/dccp/ccids/lib/packet_history.c:422
> ccid3_hc_rx_packet_recv+0x5c8/0xed0 net/dccp/ccids/ccid3.c:765
> ccid_hc_rx_packet_recv net/dccp/ccid.h:185 [inline]
> dccp_deliver_input_to_ccids+0xf0/0x280 net/dccp/input.c:180
> dccp_rcv_established+0x87/0xb0 net/dccp/input.c:378
> dccp_v4_do_rcv+0x153/0x180 net/dccp/ipv4.c:654
> sk_backlog_rcv include/net/sock.h:909 [inline]
> __sk_receive_skb+0x3a2/0xd60 net/core/sock.c:513
> dccp_v4_rcv+0x10e5/0x1f3f net/dccp/ipv4.c:875
> ip_local_deliver_finish+0x2e3/0xd80 net/ipv4/ip_input.c:215
> NF_HOOK include/linux/netfilter.h:288 [inline]
> ip_local_deliver+0x1e1/0x720 net/ipv4/ip_input.c:256
> dst_input include/net/dst.h:450 [inline]
> ip_rcv_finish+0x81b/0x2200 net/ipv4/ip_input.c:396
> NF_HOOK include/linux/netfilter.h:288 [inline]
> ip_rcv+0xb70/0x143d net/ipv4/ip_input.c:492
> __netif_receive_skb_core+0x26f5/0x3630 net/core/dev.c:4592
> __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4657
> process_backlog+0x219/0x760 net/core/dev.c:5337
> napi_poll net/core/dev.c:5735 [inline]
> net_rx_action+0x7b7/0x1930 net/core/dev.c:5801
> __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
> do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1046
> </IRQ>
> do_softirq.part.17+0x14d/0x190 kernel/softirq.c:329
> do_softirq arch/x86/include/asm/preempt.h:23 [inline]
> __local_bh_enable_ip+0x1ec/0x230 kernel/softirq.c:182
> local_bh_enable include/linux/bottom_half.h:32 [inline]
> rcu_read_unlock_bh include/linux/rcupdate.h:728 [inline]
> ip_finish_output2+0xab2/0x1840 net/ipv4/ip_output.c:231
> ip_finish_output+0x828/0xf80 net/ipv4/ip_output.c:317
> NF_HOOK_COND include/linux/netfilter.h:277 [inline]
> ip_output+0x21b/0x850 net/ipv4/ip_output.c:405
> dst_output include/net/dst.h:444 [inline]
> ip_local_out+0xc5/0x1b0 net/ipv4/ip_output.c:124
> ip_queue_xmit+0x9d7/0x1f70 net/ipv4/ip_output.c:504
> dccp_transmit_skb+0x999/0x12e0 net/dccp/output.c:142
> dccp_xmit_packet+0x250/0x790 net/dccp/output.c:281
> dccp_write_xmit+0x190/0x1f0 net/dccp/output.c:363
> dccp_sendmsg+0x8c7/0x1020 net/dccp/proto.c:818
> inet_sendmsg+0x19f/0x690 net/ipv4/af_inet.c:798
> sock_sendmsg_nosec net/socket.c:629 [inline]
> sock_sendmsg+0xd5/0x120 net/socket.c:639
> ___sys_sendmsg+0x525/0x940 net/socket.c:2117
> __sys_sendmmsg+0x240/0x6f0 net/socket.c:2212
> __do_sys_sendmmsg net/socket.c:2241 [inline]
> __se_sys_sendmmsg net/socket.c:2238 [inline]
> __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2238
> do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
> entry_SYSCALL_64_after_hwframe+0x49/0xbe
> RIP: 0033:0x445d09
> RSP: 002b:00007f3c7eff5d88 EFLAGS: 00000293 ORIG_RAX: 0000000000000133
> RAX: ffffffffffffffda RBX: 00000000006dac40 RCX: 0000000000445d09
> RDX: 0000000000000001 RSI: 000000
>
>
> ---
> This bug is generated by a bot. It may contain errors.
> See https://goo.gl/tpsmEJ for more information about syzbot.
> syzbot engineers can be reached at syzkaller@googlegroups.com.
>
> syzbot will keep track of this bug report.
> If you forgot to add the Reported-by tag, once the fix for this bug is
> merged
> into any tree, please reply to this email with:
> #syz fix: exact-commit-title
> If you want to test a patch for this bug, please reply with:
> #syz test: git://repo/address.git branch
> and provide the patch inline or as an attachment.
> To mark this as a duplicate of another syzbot report, please reply with:
> #syz dup: exact-subject-of-another-report
There's already a bug report with this title, this one just had a few characters
truncated from the end. Dmitry, is that intentional? The other one is
https://groups.google.com/forum/#!msg/syzkaller-bugs/u5nq3PdPkIc/bBFjKHXPAgAJ:
#syz dup: BUG: please report to dc...@vger.kernel.org => prev = 0, last = 0 at net/dccp/ccids/lib/packet_history.c:LINE/tfrc_rx_hist_sample_rtt()
Anyway, this is apparently a DCCP bug, and as I posted on the other thread it's
easily reproducible with the following program. Gerrit, are you still the DCCP
maintainer, or is the MAINTAINERS file outdated?
#include <linux/dccp.h>
#include <linux/in.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <unistd.h>
int main()
{
struct sockaddr_in addr = { .sin_family = AF_INET };
socklen_t addrlen = sizeof(addr);
int fd;
while (fork())
wait(NULL);
fd = socket(AF_INET, SOCK_DCCP, 0);
bind(fd, (void *)&addr, addrlen);
getsockname(fd, (void *)&addr, &addrlen);
listen(fd, 100);
if (fork()) {
fd = socket(AF_INET, SOCK_DCCP, 0);
setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_CCID, "\x03", 1);
connect(fd, (void *)&addr, sizeof(addr));
} else {
fd = accept(fd, NULL, 0);
}
for (int i = 0; i < 1000; i++)
write(fd, "X", 1);
}
- Eric
^ permalink raw reply
* Re: BUG: please report to dccp@vger.kernel.org => prev = 0, last = 0 at net/dccp/ccids/lib/packet_history.c:LINE/tfrc_rx_his
From: Dmitry Vyukov @ 2018-05-09 5:23 UTC (permalink / raw)
To: Eric Biggers
Cc: dccp, Gerrit Renker, syzbot, David Miller, Gustavo A . R . Silva,
LKML, netdev, syzkaller-bugs
In-Reply-To: <20180509050509.GE711@sol.localdomain>
On Wed, May 9, 2018 at 7:05 AM, Eric Biggers <ebiggers3@gmail.com> wrote:
> On Sat, May 05, 2018 at 05:57:02PM -0700, syzbot wrote:
>> Hello,
>>
>> syzbot found the following crash on:
>>
>> HEAD commit: c1c07416cdd4 Merge tag 'kbuild-fixes-v4.17' of git://git.k..
>> git tree: upstream
>> console output: https://syzkaller.appspot.com/x/log.txt?x=13d5de47800000
>> kernel config: https://syzkaller.appspot.com/x/.config?x=5a1dc06635c10d27
>> dashboard link: https://syzkaller.appspot.com/bug?extid=99858724c0ba555a12ea
>> compiler: gcc (GCC) 8.0.1 20180413 (experimental)
>> syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=170afde7800000
>> C reproducer: https://syzkaller.appspot.com/x/repro.c?x=141b4be7800000
>>
>> IMPORTANT: if you fix the bug, please add the following tag to the commit:
>> Reported-by: syzbot+99858724c0ba555a12ea@syzkaller.appspotmail.com
>>
>> random: sshd: uninitialized urandom read (32 bytes read)
>> random: sshd: uninitialized urandom read (32 bytes read)
>> random: sshd: uninitialized urandom read (32 bytes read)
>> random: sshd: uninitialized urandom read (32 bytes read)
>> BUG: please report to dccp@vger.kernel.org => prev = 0, last = 0 at
>> net/dccp/ccids/lib/packet_history.c:425/tfrc_rx_hist_sample_rtt()
>> CPU: 0 PID: 4495 Comm: syz-executor551 Not tainted 4.17.0-rc3+ #34
>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
>> Google 01/01/2011
>> Call Trace:
>> <IRQ>
>> __dump_stack lib/dump_stack.c:77 [inline]
>> dump_stack+0x1b9/0x294 lib/dump_stack.c:113
>> tfrc_rx_hist_sample_rtt.cold.3+0x54/0x5c
>> net/dccp/ccids/lib/packet_history.c:422
>> ccid3_hc_rx_packet_recv+0x5c8/0xed0 net/dccp/ccids/ccid3.c:765
>> ccid_hc_rx_packet_recv net/dccp/ccid.h:185 [inline]
>> dccp_deliver_input_to_ccids+0xf0/0x280 net/dccp/input.c:180
>> dccp_rcv_established+0x87/0xb0 net/dccp/input.c:378
>> dccp_v4_do_rcv+0x153/0x180 net/dccp/ipv4.c:654
>> sk_backlog_rcv include/net/sock.h:909 [inline]
>> __sk_receive_skb+0x3a2/0xd60 net/core/sock.c:513
>> dccp_v4_rcv+0x10e5/0x1f3f net/dccp/ipv4.c:875
>> ip_local_deliver_finish+0x2e3/0xd80 net/ipv4/ip_input.c:215
>> NF_HOOK include/linux/netfilter.h:288 [inline]
>> ip_local_deliver+0x1e1/0x720 net/ipv4/ip_input.c:256
>> dst_input include/net/dst.h:450 [inline]
>> ip_rcv_finish+0x81b/0x2200 net/ipv4/ip_input.c:396
>> NF_HOOK include/linux/netfilter.h:288 [inline]
>> ip_rcv+0xb70/0x143d net/ipv4/ip_input.c:492
>> __netif_receive_skb_core+0x26f5/0x3630 net/core/dev.c:4592
>> __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4657
>> process_backlog+0x219/0x760 net/core/dev.c:5337
>> napi_poll net/core/dev.c:5735 [inline]
>> net_rx_action+0x7b7/0x1930 net/core/dev.c:5801
>> __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
>> do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1046
>> </IRQ>
>> do_softirq.part.17+0x14d/0x190 kernel/softirq.c:329
>> do_softirq arch/x86/include/asm/preempt.h:23 [inline]
>> __local_bh_enable_ip+0x1ec/0x230 kernel/softirq.c:182
>> local_bh_enable include/linux/bottom_half.h:32 [inline]
>> rcu_read_unlock_bh include/linux/rcupdate.h:728 [inline]
>> ip_finish_output2+0xab2/0x1840 net/ipv4/ip_output.c:231
>> ip_finish_output+0x828/0xf80 net/ipv4/ip_output.c:317
>> NF_HOOK_COND include/linux/netfilter.h:277 [inline]
>> ip_output+0x21b/0x850 net/ipv4/ip_output.c:405
>> dst_output include/net/dst.h:444 [inline]
>> ip_local_out+0xc5/0x1b0 net/ipv4/ip_output.c:124
>> ip_queue_xmit+0x9d7/0x1f70 net/ipv4/ip_output.c:504
>> dccp_transmit_skb+0x999/0x12e0 net/dccp/output.c:142
>> dccp_xmit_packet+0x250/0x790 net/dccp/output.c:281
>> dccp_write_xmit+0x190/0x1f0 net/dccp/output.c:363
>> dccp_sendmsg+0x8c7/0x1020 net/dccp/proto.c:818
>> inet_sendmsg+0x19f/0x690 net/ipv4/af_inet.c:798
>> sock_sendmsg_nosec net/socket.c:629 [inline]
>> sock_sendmsg+0xd5/0x120 net/socket.c:639
>> ___sys_sendmsg+0x525/0x940 net/socket.c:2117
>> __sys_sendmmsg+0x240/0x6f0 net/socket.c:2212
>> __do_sys_sendmmsg net/socket.c:2241 [inline]
>> __se_sys_sendmmsg net/socket.c:2238 [inline]
>> __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2238
>> do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
>> entry_SYSCALL_64_after_hwframe+0x49/0xbe
>> RIP: 0033:0x445d09
>> RSP: 002b:00007f3c7eff5d88 EFLAGS: 00000293 ORIG_RAX: 0000000000000133
>> RAX: ffffffffffffffda RBX: 00000000006dac40 RCX: 0000000000445d09
>> RDX: 0000000000000001 RSI: 000000
>>
>>
>> ---
>> This bug is generated by a bot. It may contain errors.
>> See https://goo.gl/tpsmEJ for more information about syzbot.
>> syzbot engineers can be reached at syzkaller@googlegroups.com.
>>
>> syzbot will keep track of this bug report.
>> If you forgot to add the Reported-by tag, once the fix for this bug is
>> merged
>> into any tree, please reply to this email with:
>> #syz fix: exact-commit-title
>> If you want to test a patch for this bug, please reply with:
>> #syz test: git://repo/address.git branch
>> and provide the patch inline or as an attachment.
>> To mark this as a duplicate of another syzbot report, please reply with:
>> #syz dup: exact-subject-of-another-report
>
> There's already a bug report with this title, this one just had a few characters
> truncated from the end. Dmitry, is that intentional? The other one is
> https://groups.google.com/forum/#!msg/syzkaller-bugs/u5nq3PdPkIc/bBFjKHXPAgAJ:
>
> #syz dup: BUG: please report to dc...@vger.kernel.org => prev = 0, last = 0 at net/dccp/ccids/lib/packet_history.c:LINE/tfrc_rx_hist_sample_rtt()
I think this happened when we started truncating kernel crash titles
to 120 columns, so it's intentional.
However, the dup command did not pass. It's hard to understand who
received what today, but this suggests that somebody altered email in
the command to dc...@vger.kernel.org:
https://groups.google.com/forum/message/raw?msg=syzkaller-bugs/GMndq4-h7BI/VIz4aBEOAwAJ
We can also mark the old one as invalid.
> Anyway, this is apparently a DCCP bug, and as I posted on the other thread it's
> easily reproducible with the following program. Gerrit, are you still the DCCP
> maintainer, or is the MAINTAINERS file outdated?
>
> #include <linux/dccp.h>
> #include <linux/in.h>
> #include <sys/socket.h>
> #include <sys/wait.h>
> #include <unistd.h>
>
> int main()
> {
> struct sockaddr_in addr = { .sin_family = AF_INET };
> socklen_t addrlen = sizeof(addr);
> int fd;
>
> while (fork())
> wait(NULL);
> fd = socket(AF_INET, SOCK_DCCP, 0);
> bind(fd, (void *)&addr, addrlen);
> getsockname(fd, (void *)&addr, &addrlen);
> listen(fd, 100);
> if (fork()) {
> fd = socket(AF_INET, SOCK_DCCP, 0);
> setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_CCID, "\x03", 1);
> connect(fd, (void *)&addr, sizeof(addr));
> } else {
> fd = accept(fd, NULL, 0);
> }
> for (int i = 0; i < 1000; i++)
> write(fd, "X", 1);
> }
>
> - Eric
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox