Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH bpf 2/2] bpf: enforce usage of __aligned_u64 in the UAPI header
From: Song Liu @ 2018-05-29 17:35 UTC (permalink / raw)
  To: Eugene Syromiatnikov
  Cc: netdev, open list, Martin KaFai Lau, Daniel Borkmann,
	Alexei Starovoitov, David S. Miller, Jiri Olsa, Ingo Molnar,
	Lawrence Brakmo, Andrey Ignatov, Jakub Kicinski, John Fastabend,
	Dmitry V. Levin
In-Reply-To: <20180527112847.GA18311@asgard.redhat.com>

On Sun, May 27, 2018 at 4:28 AM, Eugene Syromiatnikov <esyr@redhat.com> wrote:
> Use __aligned_u64 instead of __u64 everywhere in the UAPI header,
> similarly to v4.17-rc1~94^2~58^2 "RDMA: Change all uapi headers to use
> __aligned_u64 instead of __u64".
>
> This commit doesn't change structure layouts, but imposes additional
> alignment requirements on struct bpf_stack_build_id, struct
> bpf_sock_ops, struct bpf_perf_event_value, and struct
> bpf_raw_tracepoint_args; of them only struct bpf_sock_ops and struct
> bpf_perf_event_value have 64-bit fields that were present in a released
> kernel without this additional alignment requirement (bytes_received
> and bytes_acked were added to struct bpf_sock_ops in commit
> v4.16-rc1~123^2~33^2~5^2~4, struct bpf_perf_event_value was added
> in commit v4.15-rc1~84^2~532^2~3).
>
> Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
> ---
>  include/uapi/linux/bpf.h       | 22 +++++++++++-----------
>  tools/include/uapi/linux/bpf.h | 22 +++++++++++-----------
>  2 files changed, 22 insertions(+), 22 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 903010a..174e99a 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -259,8 +259,8 @@ struct bpf_stack_build_id {
>         __s32           status;
>         unsigned char   build_id[BPF_BUILD_ID_SIZE];
>         union {
> -               __u64   offset;
> -               __u64   ip;
> +               __aligned_u64   offset;
> +               __aligned_u64   ip;
>         };
>  };
>
> @@ -288,7 +288,7 @@ union bpf_attr {
>                         __aligned_u64 value;
>                         __aligned_u64 next_key;
>                 };
> -               __u64           flags;
> +               __aligned_u64   flags;
>         };
>
>         struct { /* anonymous struct used by BPF_PROG_LOAD command */
> @@ -360,7 +360,7 @@ union bpf_attr {
>         } query;
>
>         struct {
> -               __u64 name;
> +               __aligned_u64 name;
>                 __u32 prog_fd;
>         } raw_tracepoint;
>  } __attribute__((aligned(8)));
> @@ -1011,7 +1011,7 @@ struct bpf_prog_info {
>         __u32 xlated_prog_len;
>         __aligned_u64 jited_prog_insns;
>         __aligned_u64 xlated_prog_insns;
> -       __u64 load_time;        /* ns since boottime */
> +       __aligned_u64 load_time;        /* ns since boottime */
>         __u32 created_by_uid;
>         __u32 nr_map_ids;
>         __aligned_u64 map_ids;
> @@ -1101,8 +1101,8 @@ struct bpf_sock_ops {
>         __u32 lost_out;
>         __u32 sacked_out;
>         __u32 sk_txhash;
> -       __u64 bytes_received;
> -       __u64 bytes_acked;
> +       __aligned_u64 bytes_received;
> +       __aligned_u64 bytes_acked;
>  };
>
>  /* Definitions for bpf_sock_ops_cb_flags */
> @@ -1189,9 +1189,9 @@ enum {
>  #define TCP_BPF_SNDCWND_CLAMP  1002    /* Set sndcwnd_clamp */
>
>  struct bpf_perf_event_value {
> -       __u64 counter;
> -       __u64 enabled;
> -       __u64 running;
> +       __aligned_u64 counter;
> +       __aligned_u64 enabled;
> +       __aligned_u64 running;
>  };
>
>  #define BPF_DEVCG_ACC_MKNOD    (1ULL << 0)
> @@ -1209,7 +1209,7 @@ struct bpf_cgroup_dev_ctx {
>  };
>
>  struct bpf_raw_tracepoint_args {
> -       __u64 args[0];
> +       __aligned_u64 args[0];
>  };
>
>  #endif /* _UAPI__LINUX_BPF_H__ */
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 903010a..174e99a 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -259,8 +259,8 @@ struct bpf_stack_build_id {
>         __s32           status;
>         unsigned char   build_id[BPF_BUILD_ID_SIZE];
>         union {
> -               __u64   offset;
> -               __u64   ip;
> +               __aligned_u64   offset;
> +               __aligned_u64   ip;
>         };
>  };
>
> @@ -288,7 +288,7 @@ union bpf_attr {
>                         __aligned_u64 value;
>                         __aligned_u64 next_key;
>                 };
> -               __u64           flags;
> +               __aligned_u64   flags;
>         };
>
>         struct { /* anonymous struct used by BPF_PROG_LOAD command */
> @@ -360,7 +360,7 @@ union bpf_attr {
>         } query;
>
>         struct {
> -               __u64 name;
> +               __aligned_u64 name;
>                 __u32 prog_fd;
>         } raw_tracepoint;
>  } __attribute__((aligned(8)));
> @@ -1011,7 +1011,7 @@ struct bpf_prog_info {
>         __u32 xlated_prog_len;
>         __aligned_u64 jited_prog_insns;
>         __aligned_u64 xlated_prog_insns;
> -       __u64 load_time;        /* ns since boottime */
> +       __aligned_u64 load_time;        /* ns since boottime */
>         __u32 created_by_uid;
>         __u32 nr_map_ids;
>         __aligned_u64 map_ids;
> @@ -1101,8 +1101,8 @@ struct bpf_sock_ops {
>         __u32 lost_out;
>         __u32 sacked_out;
>         __u32 sk_txhash;
> -       __u64 bytes_received;
> -       __u64 bytes_acked;
> +       __aligned_u64 bytes_received;
> +       __aligned_u64 bytes_acked;
>  };
>
>  /* Definitions for bpf_sock_ops_cb_flags */
> @@ -1189,9 +1189,9 @@ enum {
>  #define TCP_BPF_SNDCWND_CLAMP  1002    /* Set sndcwnd_clamp */
>
>  struct bpf_perf_event_value {
> -       __u64 counter;
> -       __u64 enabled;
> -       __u64 running;
> +       __aligned_u64 counter;
> +       __aligned_u64 enabled;
> +       __aligned_u64 running;
>  };
>
>  #define BPF_DEVCG_ACC_MKNOD    (1ULL << 0)
> @@ -1209,7 +1209,7 @@ struct bpf_cgroup_dev_ctx {
>  };
>
>  struct bpf_raw_tracepoint_args {
> -       __u64 args[0];
> +       __aligned_u64 args[0];
>  };
>
>  #endif /* _UAPI__LINUX_BPF_H__ */
> --
> 2.1.4
>

I think these changes are not necessary. Is it a general guidance to
only use 64-bit aligned
variables in UAPI headers?

Thanks,
Song

^ permalink raw reply

* Re: [PATCH bpf-next 04/11] bpf: show prog and map id in fdinfo
From: Jesper Dangaard Brouer @ 2018-05-29 17:27 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: brouer, netdev
In-Reply-To: <20180528004344.3606-5-daniel@iogearbox.net>

On Mon, 28 May 2018 02:43:37 +0200
Daniel Borkmann <daniel@iogearbox.net> wrote:

> Its trivial and straight forward to expose it for scripts that can
> then use it along with bpftool in order to inspect an individual
> application's used maps and progs. Right now we dump some basic
> information in the fdinfo file but with the help of the map/prog
> id full introspection becomes possible now.
> 
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> Acked-by: Alexei Starovoitov <ast@kernel.org>
> ---

AFAICR iproute uses this proc fdinfo, for pinned maps.  Have you tested
if this change is handled gracefully by tc ?

>  kernel/bpf/syscall.c | 12 ++++++++----
>  1 file changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 388d4fe..79341e8 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -326,13 +326,15 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
>  		   "value_size:\t%u\n"
>  		   "max_entries:\t%u\n"
>  		   "map_flags:\t%#x\n"
> -		   "memlock:\t%llu\n",
> +		   "memlock:\t%llu\n"
> +		   "map_id:\t%u\n",
>  		   map->map_type,
>  		   map->key_size,
>  		   map->value_size,
>  		   map->max_entries,
>  		   map->map_flags,
> -		   map->pages * 1ULL << PAGE_SHIFT);
> +		   map->pages * 1ULL << PAGE_SHIFT,
> +		   map->id);
>  
>  	if (owner_prog_type) {
>  		seq_printf(m, "owner_prog_type:\t%u\n",
> @@ -1069,11 +1071,13 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
>  		   "prog_type:\t%u\n"
>  		   "prog_jited:\t%u\n"
>  		   "prog_tag:\t%s\n"
> -		   "memlock:\t%llu\n",
> +		   "memlock:\t%llu\n"
> +		   "prog_id:\t%u\n",
>  		   prog->type,
>  		   prog->jited,
>  		   prog_tag,
> -		   prog->pages * 1ULL << PAGE_SHIFT);
> +		   prog->pages * 1ULL << PAGE_SHIFT,
> +		   prog->aux->id);
>  }
>  #endif
>  



-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [PATCH bpf-next 05/11] bpf: avoid retpoline for lookup/update/delete calls on maps
From: Jesper Dangaard Brouer @ 2018-05-29 17:23 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: brouer, netdev
In-Reply-To: <20180528004344.3606-6-daniel@iogearbox.net>

On Mon, 28 May 2018 02:43:38 +0200
Daniel Borkmann <daniel@iogearbox.net> wrote:

> While some of the BPF map lookup helpers provide a ->map_gen_lookup()
> callback for inlining the map lookup altogether it is not available
> for every map, so the remaining ones have to call bpf_map_lookup_elem()
> helper which does a dispatch to map->ops->map_lookup_elem(). In
> times of retpolines, this will control and trap speculative execution
> rather than letting it do its work for the indirect call and will
> therefore cause a slowdown. Likewise, bpf_map_update_elem() and
> bpf_map_delete_elem() do not have an inlined version and need to call
> into their map->ops->map_update_elem() resp. map->ops->map_delete_elem()
> handlers.
> 
> Before:
> 
>   # bpftool p d x i 1

I would really appreciate if we can use the long options in these kind
of examples.  It makes the command "self-documenting" and searchable by
google.

Here it would be:

 # bpftool prog dump xlated id 1

>     0: (bf) r2 = r10
>     1: (07) r2 += -8
>     2: (7a) *(u64 *)(r2 +0) = 0
>     3: (18) r1 = map[id:1]
>     5: (85) call __htab_map_lookup_elem#232656
>     6: (15) if r0 == 0x0 goto pc+4
>     7: (71) r1 = *(u8 *)(r0 +35)
>     8: (55) if r1 != 0x0 goto pc+1
>     9: (72) *(u8 *)(r0 +35) = 1
>    10: (07) r0 += 56
>    11: (15) if r0 == 0x0 goto pc+4
>    12: (bf) r2 = r0
>    13: (18) r1 = map[id:1]
>    15: (85) call bpf_map_delete_elem#215008  <-- indirect call via
>    16: (95) exit                                 helper
> 
> After:
> 
>   # bpftool p d x i 1

Same here

>     0: (bf) r2 = r10
>     1: (07) r2 += -8
>     2: (7a) *(u64 *)(r2 +0) = 0
>     3: (18) r1 = map[id:1]
>     5: (85) call __htab_map_lookup_elem#233328
>     6: (15) if r0 == 0x0 goto pc+4
>     7: (71) r1 = *(u8 *)(r0 +35)
>     8: (55) if r1 != 0x0 goto pc+1
>     9: (72) *(u8 *)(r0 +35) = 1
>    10: (07) r0 += 56
>    11: (15) if r0 == 0x0 goto pc+4
>    12: (bf) r2 = r0
>    13: (18) r1 = map[id:1]
>    15: (85) call htab_lru_map_delete_elem#238240  <-- direct call
>    16: (95) exit
> 


-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [PATCH bpf 1/2] bpf: fix alignment of netns_dev/netns_ino fields in bpf_{map,prog}_info
From: Song Liu @ 2018-05-29 17:17 UTC (permalink / raw)
  To: Eugene Syromiatnikov
  Cc: netdev, open list, Martin KaFai Lau, Daniel Borkmann,
	Alexei Starovoitov, David S. Miller, Jiri Olsa, Ingo Molnar,
	Lawrence Brakmo, Andrey Ignatov, Jakub Kicinski, John Fastabend,
	Dmitry V. Levin
In-Reply-To: <20180527112842.GA18204@asgard.redhat.com>

On Sun, May 27, 2018 at 4:28 AM, Eugene Syromiatnikov <esyr@redhat.com> wrote:
> Recent introduction of netns_dev/netns_ino to bpf_map_info/bpf_prog info
> has broken compat, as offsets of these fields are different in 32-bit
> and 64-bit ABIs.  One fix (other than implementing compat support in
> syscall in order to handle this discrepancy) is to use __aligned_u64
> instead of __u64 for these fields.
>
> Reported-by: Dmitry V. Levin <ldv@altlinux.org>
> Fixes: 52775b33bb507 ("bpf: offload: report device information about
> offloaded maps")
> Fixes: 675fc275a3a2d ("bpf: offload: report device information for
> offloaded programs")
>
> Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
> ---
>  include/uapi/linux/bpf.h       | 8 ++++----
>  tools/include/uapi/linux/bpf.h | 8 ++++----
>  2 files changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index c5ec897..903010a 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1017,8 +1017,8 @@ struct bpf_prog_info {
>         __aligned_u64 map_ids;
>         char name[BPF_OBJ_NAME_LEN];
>         __u32 ifindex;
> -       __u64 netns_dev;
> -       __u64 netns_ino;
> +       __aligned_u64 netns_dev;
> +       __aligned_u64 netns_ino;
>  } __attribute__((aligned(8)));

Shall we add a __u32 padding variable before netns_dev? We can use it
for in the future.

Thanks,
Song

>
>  struct bpf_map_info {
> @@ -1030,8 +1030,8 @@ struct bpf_map_info {
>         __u32 map_flags;
>         char  name[BPF_OBJ_NAME_LEN];
>         __u32 ifindex;
> -       __u64 netns_dev;
> -       __u64 netns_ino;
> +       __aligned_u64 netns_dev;
> +       __aligned_u64 netns_ino;
>  } __attribute__((aligned(8)));
>
>  /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index c5ec897..903010a 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1017,8 +1017,8 @@ struct bpf_prog_info {
>         __aligned_u64 map_ids;
>         char name[BPF_OBJ_NAME_LEN];
>         __u32 ifindex;
> -       __u64 netns_dev;
> -       __u64 netns_ino;
> +       __aligned_u64 netns_dev;
> +       __aligned_u64 netns_ino;
>  } __attribute__((aligned(8)));
>
>  struct bpf_map_info {
> @@ -1030,8 +1030,8 @@ struct bpf_map_info {
>         __u32 map_flags;
>         char  name[BPF_OBJ_NAME_LEN];
>         __u32 ifindex;
> -       __u64 netns_dev;
> -       __u64 netns_ino;
> +       __aligned_u64 netns_dev;
> +       __aligned_u64 netns_ino;
>  } __attribute__((aligned(8)));
>
>  /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
> --
> 2.1.4
>

^ permalink raw reply

* Re: [PATCH bpf-next 03/11] bpf: fixup error message from gpl helpers on license mismatch
From: Jesper Dangaard Brouer @ 2018-05-29 17:16 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: brouer, netdev
In-Reply-To: <20180528004344.3606-4-daniel@iogearbox.net>

On Mon, 28 May 2018 02:43:36 +0200
Daniel Borkmann <daniel@iogearbox.net> wrote:

> Stating 'proprietary program' in the error is just silly since it
> can also be a different open source license than that which is just
> not compatible.
> 
> Reference: https://twitter.com/majek04/status/998531268039102465
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> Acked-by: Alexei Starovoitov <ast@kernel.org>

Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>

Thank you for cleaning up this confusion :-)

> ---
>  kernel/bpf/verifier.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 1fd9667b..4f4786e 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -2462,7 +2462,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
>  
>  	/* eBPF programs must be GPL compatible to use GPL-ed functions */
>  	if (!env->prog->gpl_compatible && fn->gpl_only) {
> -		verbose(env, "cannot call GPL only function from proprietary program\n");
> +		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
>  		return -EINVAL;
>  	}
>  



-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [PATCH net] sctp: not allow to set rto_min with a value below 200 msecs
From: Marcelo Ricardo Leitner @ 2018-05-29 17:06 UTC (permalink / raw)
  To: Neal Cardwell
  Cc: Michael Tuexen, nhorman, Dmitry Vyukov, lucien.xin, Netdev,
	linux-sctp, David Miller, dsa, Eric Dumazet, syzkaller
In-Reply-To: <CADVnQymCNEtQFThsbQ0u7jirx24i45f5L7P-2dPf8omkshqwmg@mail.gmail.com>

On Tue, May 29, 2018 at 12:03:46PM -0400, Neal Cardwell wrote:
> On Tue, May 29, 2018 at 11:45 AM Marcelo Ricardo Leitner <
> marcelo.leitner@gmail.com> wrote:
> > - patch2 - fix rtx attack vector
> >    - Add the floor value to rto_min to HZ/20 (which fits the values
> >      that Michael shared on the other email)
> 
> I would encourage allowing minimum RTO values down to 5ms, if the ACK
> policy in the receiver makes this feasible. Our experience is that in
> datacenter environments it can be advantageous to allow timer-based loss
> recoveries using timeout values as low as 5ms, e.g.:

Thanks Neal. On Xin's tests, the hearbeat timer becomes an issue at
~25ms already. Xin, can you share more details on the hw, which CPU
was used?

Anyway, what about we add a floor to rto_max too, so that RTO can
actually grow into something bigger that don't hog the CPU? Like:
rto_min floor = 5ms
rto_max floor = 50ms

  Marcelo

^ permalink raw reply

* [PATCH v2 net-next] net: remove bypassed check in sch_direct_xmit()
From: Song Liu @ 2018-05-29 17:03 UTC (permalink / raw)
  To: netdev
  Cc: sergei.shtylyov, Song Liu, kernel-team, John Fastabend,
	Alexei Starovoitov, David S . Miller

Checking netif_xmit_frozen_or_stopped() at the end of sch_direct_xmit()
is being bypassed. This is because "ret" from sch_direct_xmit() will be
either NETDEV_TX_OK or NETDEV_TX_BUSY, and only ret == NETDEV_TX_OK == 0
will reach the condition:

    if (ret && netif_xmit_frozen_or_stopped(txq))
        return false;

This patch cleans up the code by removing the whole condition.

For more discussion about this, please refer to
   https://marc.info/?t=152727195700008

Signed-off-by: Song Liu <songliubraving@fb.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 760ab1b..69078c8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -346,9 +346,6 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		return false;
 	}
 
-	if (ret && netif_xmit_frozen_or_stopped(txq))
-		return false;
-
 	return true;
 }
 
-- 
2.9.5

^ permalink raw reply related

* Re: [PATCH net-next] net: remove bypassed check in sch_direct_xmit()
From: Song Liu @ 2018-05-29 17:02 UTC (permalink / raw)
  To: Sergei Shtylyov
  Cc: netdev@vger.kernel.org, Kernel Team, John Fastabend,
	Alexei Starovoitov, David S . Miller
In-Reply-To: <00399207-8196-1766-330d-624f2ed35c89@cogentembedded.com>



> On May 29, 2018, at 1:58 AM, Sergei Shtylyov <sergei.shtylyov@cogentembedded.com> wrote:
> 
> Hello!
> 
> On 5/29/2018 12:36 AM, Song Liu wrote:
> 
>> Check sch_direct_xmit() at the end of sch_direct_xmit() will be bypassed.
> 
>   "Checking netif_xmit_frozen_or_stopped()", perhaps? Else it doesn't make much sense...

Thanks Sergei!

Sending v2 with fix. 

Song

> 
>> This is because "ret" from sch_direct_xmit() will be either NETDEV_TX_OK
>> or NETDEV_TX_BUSY, and only ret == NETDEV_TX_OK == 0 will reach the
>> condition:
>>     if (ret && netif_xmit_frozen_or_stopped(txq))
>>         return false;
>> This patch cleans up the code by removing  the whole condition.
>> For more discussion about this, please refer to
>>    https://marc.info/?t=152727195700008
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> Cc: John Fastabend <john.fastabend@gmail.com>
>> Cc: Alexei Starovoitov <ast@kernel.org>
>> Cc: David S. Miller <davem@davemloft.net>
> [...]
> 
> MBR, Sergei

^ permalink raw reply

* Re: [PATCH net] net: sched: check netif_xmit_frozen_or_stopped() in sch_direct_xmit()
From: Song Liu @ 2018-05-29 16:51 UTC (permalink / raw)
  To: David Miller; +Cc: Networking, Kernel Team, john.fastabend@gmail.com
In-Reply-To: <20180529.100255.625454563789387164.davem@davemloft.net>



> On May 29, 2018, at 7:02 AM, David Miller <davem@davemloft.net> wrote:
> 
> From: Song Liu <songliubraving@fb.com>
> Date: Fri, 25 May 2018 11:11:44 -0700
> 
>> Summary:
>> 
>> At the end of sch_direct_xmit(), we are in the else path of
>> !dev_xmit_complete(ret), which means ret == NETDEV_TX_OK. The following
>> condition will always fail and netif_xmit_frozen_or_stopped() is not
>> checked at all.
>> 
>>    if (ret && netif_xmit_frozen_or_stopped(txq))
>>         return false;
>> 
>> In this patch, this condition is fixed as:
>> 
>>    if (netif_xmit_frozen_or_stopped(txq))
>>         return false;
>> 
>> and further simplifies the code as:
>> 
>>    return !netif_xmit_frozen_or_stopped(txq);
>> 
>> Fixes: 29b86cdac00a ("net: sched: remove remaining uses for qdisc_qlen in xmit path")
>> Cc: John Fastabend <john.fastabend@gmail.com>
>> Cc: David S. Miller <davem@davemloft.net>
>> Signed-off-by: Song Liu <songliubraving@fb.com>
> 
> I expect a new version of this patch which removes the test entirely.

The new version of it is here: http://patchwork.ozlabs.org/patch/921708/

Thanks,
Song

^ permalink raw reply

* Re: [PATCH net] sctp: not allow to set rto_min with a value below 200 msecs
From: Neal Cardwell @ 2018-05-29 16:03 UTC (permalink / raw)
  To: marcelo.leitner
  Cc: Michael Tuexen, nhorman, Dmitry Vyukov, lucien.xin, Netdev,
	linux-sctp, David Miller, dsa, Eric Dumazet, syzkaller
In-Reply-To: <20180529154514.GC3788@localhost.localdomain>

On Tue, May 29, 2018 at 11:45 AM Marcelo Ricardo Leitner <
marcelo.leitner@gmail.com> wrote:
> - patch2 - fix rtx attack vector
>    - Add the floor value to rto_min to HZ/20 (which fits the values
>      that Michael shared on the other email)

I would encourage allowing minimum RTO values down to 5ms, if the ACK
policy in the receiver makes this feasible. Our experience is that in
datacenter environments it can be advantageous to allow timer-based loss
recoveries using timeout values as low as 5ms, e.g.:


https://www.ietf.org/proceedings/97/slides/slides-97-tcpm-tcp-options-for-low-latency-00.pdf
   https://tools.ietf.org/html/draft-wang-tcpm-low-latency-opt-00

cheers,
neal

^ permalink raw reply

* Re: [PATCH bpf-next] bpf: hide the unused 'off' variable
From: John Fastabend @ 2018-05-29 15:53 UTC (permalink / raw)
  To: Arnd Bergmann, YueHaibing
  Cc: David Miller, Alexei Starovoitov, Daniel Borkmann, Networking,
	Linux Kernel Mailing List
In-Reply-To: <CAK8P3a0jnU69RQ8jLF82GofE=fgENMVoWkObYvro82f+q=p_XA@mail.gmail.com>

On 05/29/2018 03:35 AM, Arnd Bergmann wrote:
> On Tue, May 29, 2018 at 4:40 AM, YueHaibing <yuehaibing@huawei.com> wrote:
>> The local variable is only used while CONFIG_IPV6 enabled
>>
>> net/core/filter.c: In function ‘sk_msg_convert_ctx_access’:
>> net/core/filter.c:6489:6: warning: unused variable ‘off’ [-Wunused-variable]
>>   int off;
>>       ^
>> This puts it into #ifdef.
>>
>> Fixes: 303def35f64e ("bpf: allow sk_msg programs to read sock fields")
>> Signed-off-by: YueHaibing <yuehaibing@huawei.com>
> 
> I was about to send the same patch and found you had already sent one.
> 
> Acked-by: Arnd Bergmann <arnd@arndb.de>
> 

Thanks! I'm curious why kbuild bot didn't catch this. Will
try to dig into that in a bit.

Acked-by: John Fastabend <john.fastabend@gmail.com>

^ permalink raw reply

* Re: [PATCH bpf v2 0/5] fix test_sockmap
From: John Fastabend @ 2018-05-29 15:48 UTC (permalink / raw)
  To: Prashant Bhole, Alexei Starovoitov, Daniel Borkmann
  Cc: David S . Miller, Shuah Khan, netdev, linux-kselftest
In-Reply-To: <20180528043803.4824-1-bhole_prashant_q7@lab.ntt.co.jp>

On 05/27/2018 09:37 PM, Prashant Bhole wrote:
> This series fixes error handling, timeout and data verification in
> test_sockmap. Previously it was not able to detect failure/timeout in
> RX/TX thread because error was not notified to the main thread.
> 
> Also slightly improved test output by printing parameter values (cork,
> apply, start, end) so that parameters for all tests are displayed.
> 
> Prashant Bhole (5):
>   selftests/bpf: test_sockmap, check test failure
>   selftests/bpf: test_sockmap, join cgroup in selftest mode
>   selftests/bpf: test_sockmap, fix test timeout
>   selftests/bpf: test_sockmap, fix data verification
>   selftests/bpf: test_sockmap, print additional test options
> 
>  tools/testing/selftests/bpf/test_sockmap.c | 76 +++++++++++++++++-----
>  1 file changed, 58 insertions(+), 18 deletions(-)
> 

After first patch "check test failure" how do we handle the case
where test is known to cause timeouts because we are specifically testing
these cases. This is the 'cork' parameter we discussed in the last
series. It looks like with this series the test may still throw an
error?

Thanks,
John

^ permalink raw reply

* Re: [PATCH net] sctp: not allow to set rto_min with a value below 200 msecs
From: Marcelo Ricardo Leitner @ 2018-05-29 15:45 UTC (permalink / raw)
  To: Michael Tuexen
  Cc: Neil Horman, Dmitry Vyukov, Xin Long, network dev, linux-sctp,
	David Miller, David Ahern, Eric Dumazet, syzkaller
In-Reply-To: <559FFFDD-E508-4936-9544-CACE606AF40F@lurchi.franken.de>

On Tue, May 29, 2018 at 03:06:06PM +0200, Michael Tuexen wrote:
> > On 29. May 2018, at 13:41, Neil Horman <nhorman@tuxdriver.com> wrote:
> > 
> > On Mon, May 28, 2018 at 04:43:15PM -0300, Marcelo Ricardo Leitner wrote:
> >> On Sat, May 26, 2018 at 09:01:00PM -0400, Neil Horman wrote:
> >>> On Sat, May 26, 2018 at 05:50:39PM +0200, Dmitry Vyukov wrote:
> >>>> On Sat, May 26, 2018 at 5:42 PM, Michael Tuexen
> >>>> <michael.tuexen@lurchi.franken.de> wrote:
> >>>>>> On 25. May 2018, at 21:13, Neil Horman <nhorman@tuxdriver.com> wrote:
> >>>>>> 
> >>>>>> On Sat, May 26, 2018 at 01:41:02AM +0800, Xin Long wrote:
> >>>>>>> syzbot reported a rcu_sched self-detected stall on CPU which is caused
> >>>>>>> by too small value set on rto_min with SCTP_RTOINFO sockopt. With this
> >>>>>>> value, hb_timer will get stuck there, as in its timer handler it starts
> >>>>>>> this timer again with this value, then goes to the timer handler again.
> >>>>>>> 
> >>>>>>> This problem is there since very beginning, and thanks to Eric for the
> >>>>>>> reproducer shared from a syzbot mail.
> >>>>>>> 
> >>>>>>> This patch fixes it by not allowing to set rto_min with a value below
> >>>>>>> 200 msecs, which is based on TCP's, by either setsockopt or sysctl.
> >>>>>>> 
> >>>>>>> Reported-by: syzbot+3dcd59a1f907245f891f@syzkaller.appspotmail.com
> >>>>>>> Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> >>>>>>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> >>>>>>> ---
> >>>>>>> include/net/sctp/constants.h |  1 +
> >>>>>>> net/sctp/socket.c            | 10 +++++++---
> >>>>>>> net/sctp/sysctl.c            |  3 ++-
> >>>>>>> 3 files changed, 10 insertions(+), 4 deletions(-)
> >>>>>>> 
> >>>>>>> diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
> >>>>>>> index 20ff237..2ee7a7b 100644
> >>>>>>> --- a/include/net/sctp/constants.h
> >>>>>>> +++ b/include/net/sctp/constants.h
> >>>>>>> @@ -277,6 +277,7 @@ enum { SCTP_MAX_GABS = 16 };
> >>>>>>> #define SCTP_RTO_INITIAL     (3 * 1000)
> >>>>>>> #define SCTP_RTO_MIN         (1 * 1000)
> >>>>>>> #define SCTP_RTO_MAX         (60 * 1000)
> >>>>>>> +#define SCTP_RTO_HARD_MIN   200
> >>>>>>> 
> >>>>>>> #define SCTP_RTO_ALPHA          3   /* 1/8 when converted to right shifts. */
> >>>>>>> #define SCTP_RTO_BETA           2   /* 1/4 when converted to right shifts. */
> >>>>>>> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> >>>>>>> index ae7e7c6..6ef12c7 100644
> >>>>>>> --- a/net/sctp/socket.c
> >>>>>>> +++ b/net/sctp/socket.c
> >>>>>>> @@ -3029,7 +3029,8 @@ static int sctp_setsockopt_nodelay(struct sock *sk, char __user *optval,
> >>>>>>> * be changed.
> >>>>>>> *
> >>>>>>> */
> >>>>>>> -static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigned int optlen)
> >>>>>>> +static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval,
> >>>>>>> +                               unsigned int optlen)
> >>>>>>> {
> >>>>>>>     struct sctp_rtoinfo rtoinfo;
> >>>>>>>     struct sctp_association *asoc;
> >>>>>>> @@ -3056,10 +3057,13 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne
> >>>>>>>     else
> >>>>>>>             rto_max = asoc ? asoc->rto_max : sp->rtoinfo.srto_max;
> >>>>>>> 
> >>>>>>> -    if (rto_min)
> >>>>>>> +    if (rto_min) {
> >>>>>>> +            if (rto_min < SCTP_RTO_HARD_MIN)
> >>>>>>> +                    return -EINVAL;
> >>>>>>>             rto_min = asoc ? msecs_to_jiffies(rto_min) : rto_min;
> >>>>>>> -    else
> >>>>>>> +    } else {
> >>>>>>>             rto_min = asoc ? asoc->rto_min : sp->rtoinfo.srto_min;
> >>>>>>> +    }
> >>>>>>> 
> >>>>>>>     if (rto_min > rto_max)
> >>>>>>>             return -EINVAL;
> >>>>>>> diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
> >>>>>>> index 33ca5b7..7ec854a 100644
> >>>>>>> --- a/net/sctp/sysctl.c
> >>>>>>> +++ b/net/sctp/sysctl.c
> >>>>>>> @@ -52,6 +52,7 @@ static int rto_alpha_min = 0;
> >>>>>>> static int rto_beta_min = 0;
> >>>>>>> static int rto_alpha_max = 1000;
> >>>>>>> static int rto_beta_max = 1000;
> >>>>>>> +static int rto_hard_min = SCTP_RTO_HARD_MIN;
> >>>>>>> 
> >>>>>>> static unsigned long max_autoclose_min = 0;
> >>>>>>> static unsigned long max_autoclose_max =
> >>>>>>> @@ -116,7 +117,7 @@ static struct ctl_table sctp_net_table[] = {
> >>>>>>>             .maxlen         = sizeof(unsigned int),
> >>>>>>>             .mode           = 0644,
> >>>>>>>             .proc_handler   = proc_sctp_do_rto_min,
> >>>>>>> -            .extra1         = &one,
> >>>>>>> +            .extra1         = &rto_hard_min,
> >>>>>>>             .extra2         = &init_net.sctp.rto_max
> >>>>>>>     },
> >>>>>>>     {
> >>>>>>> --
> >>>>>>> 2.1.0
> >>>>>>> 
> >>>>>>> --
> >>>>>>> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
> >>>>>>> the body of a message to majordomo@vger.kernel.org
> >>>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >>>>>>> 
> >>>>>> Patch looks fine, you probably want to note this hard minimum in man(7) sctp as
> >>>>>> well
> >>>>>> 
> >>>>> I'm aware of some signalling networks which use RTO.min of smaller values than 200ms.
> >>>>> So could this be reduced?
> >>>> 
> >>>> Hi Michael,
> >>>> 
> >>>> What value do they use?
> >>>> 
> >>>> Xin, Neil, is there more principled way of ensuring that a timer won't
> >>>> cause a hard CPU stall? There are slow machines and there are slow
> >>>> kernels (in particular syzbot kernel has tons of debug configs
> >>>> enabled). 200ms _should_ not cause problems because we did not see
> >>>> them with tcp. But it's hard to say what's the low limit as we are
> >>>> trying to put a hard upper bound on execution time of a complex
> >>>> section of code. Is there something like cond_resched for timers?
> >>> Unfortunately, Theres not really a way to do conditional rescheduling of timers,
> >>> additionally, we have a problem because the timer is reset as a side effect of
> >>> the SCTP state machine, and so the execution time between timer updates has a
> >>> signifcant amount of jitter (meaning its a pretty hard value to calibrate,
> >>> unless you just select a 'safe' large value for the floor).
> >>> 
> >>> What we might could do (though this might impact the protocol function is change
> >>> the timer update side effects to simply set a flag, and consistently update the
> >>> timers on exit from sctp_do_sm, so they don't re-arm until all state machine
> >>> processing is complete.  Anyone have any thoughts on that?
> >> 
> >> I was reviewing all this again and I'm thinking that we are missing
> >> the real point. With the parameters that reproducer [1] has, setting
> >> those very low RTO parameters, it causes the timer to actually
> >> busyloop on the heartbeats, as Xin had explained.
> >> 
> >> But thing is, it busy loops not just because RTO is too low, but
> >> because hbinterval was not accounted.
> >> 
> >> /* What is the next timeout value for this transport? */
> >> unsigned long sctp_transport_timeout(struct sctp_transport *trans)
> >> {
> >>        /* RTO + timer slack +/- 50% of RTO */
> >>        unsigned long timeout = trans->rto >> 1;  <-- [a]
> >> 
> >>        if (trans->state != SCTP_UNCONFIRMED &&
> >>            trans->state != SCTP_PF)             <--- [2]
> >>                timeout += trans->hbinterval;
> >> 
> >>        return timeout;
> >> }
> >> 
> >> The if() in [2] is to speed up path verification before using them, as
> >> per the commit changelog. Secondary paths added on processing the
> >> cookie are created with status SCTP_UNCONFIRMED, and HB timers are
> >> started in the sequence:
> >> sctp_sf_do_5_1D_ce
> >>   -> sctp_process_init
> >>     |> sctp_process_param
> >>     | -> sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED)
> >>     '> sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
> >> 
> >> which starts the timer using only the small RTO for secondary paths:
> >> static void sctp_cmd_hb_timers_start(struct sctp_cmd_seq *cmds,
> >>                                     struct sctp_association *asoc)
> >> {
> >>        struct sctp_transport *t;
> >> 
> >>        /* Start a heartbeat timer for each transport on the association.
> >>         * hold a reference on the transport to make sure none of
> >>         * the needed data structures go away.
> >>         */
> >>        list_for_each_entry(t, &asoc->peer.transport_addr_list, transports)
> >>                sctp_transport_reset_hb_timer(t);
> >> }
> >> 
> >> But if the system is too busy generating HBs, it likely won't process
> >> incoming HB ACKs, which would stop the loop as it would mark the
> >> transport as Active.
> >> 
> >> I'm now thinking a better fix would be to have a specific way to
> >> kickstart these initial heartbeets, and then always use hbinterval on
> >> subsequent ones.
> >> 
> > I like the idea, but I don't think we can just use the hbinterval to set the
> > timeout.  That said, it seems like we should always be using the HB interval,
> > not just on unconfirmed or partially failed transports.  From the RFC:
> > 
> > On an idle destination address that is allowed to heartbeat, it is
> >   recommended that a HEARTBEAT chunk is sent once per RTO of that
> >   destination address plus the protocol parameter 'HB.interval', with
> >   jittering of +/- 50% of the RTO value, and exponential backoff of the
> >   RTO if the previous HEARTBEAT is unanswered
> Aren't we talking about the path confirmation procedure?
> This is described in https://tools.ietf.org/html/rfc4960#section-5.4
> where it is stated:
> 
>    In each RTO, a probe may be sent on an active UNCONFIRMED path in an
>    attempt to move it to the CONFIRMED state.  If during this probing
>    the path becomes inactive, this rate is lowered to the normal
>    HEARTBEAT rate.  At the expiration of the RTO timer, the error
>    counter of any path that was probed but not CONFIRMED is incremented
>    by one and subjected to path failure detection, as defined in Section 8.2.
>    When probing UNCONFIRMED addresses, however, the association
>    overall error count is NOT incremented.
> 
> So during path confirmation there is no requirement to add HB.interval.

Right.

> 
> Best regards
> Michael
> > 
> > It seems like we should be adding it to the timer expiration universally.  By my
> > read, we've never done this quite right.  And yes, I agree, if we account this
> > properly, we will avoid this issue.
> > 
> > Its also probably important to note here, that, like RTO.min currently, there is
> > no hard floor to the heartbeat interval, and the RFC is silent on what it should
> > be.  So it would be possible to still find ourselves in this situation if we set
> > the interval to 0 from userspace.  Is it worth considering a floor on the
> > minimum hb interval of the rto is to have no floor?

Seems so, yes. I was discussing this with Xin Long offline and he
suggested that we can add a floor to hb timeouts (not interval) with
this:

diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 47f82bd..9f66708 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -634,7 +634,7 @@ unsigned long sctp_transport_timeout(struct sctp_transport *trans)
 	    trans->state != SCTP_PF)
 		timeout += trans->hbinterval;

-	return timeout;
+	return max_t(unsigned long, timeout, HZ/5);
 }

 /* Reset transport variables to their initial values */

This avoids the issue at hand and without forcing a rto_min value.

But as you were anticipating, there are other vectors that can be
exploited to trigger something like this. The one I could think of, is
to set rto_min=1 rto_max=2 and pathmaxrxt=<large value>. This is
likely to get us into rtxing the same packet over and over based on
timer/softirq, and it doesn't even need root for that.
 
Seems a more complete fix is:
- patch1 - fix issue at hand
  - Use the max_t above
- patch2 - fix rtx attack vector
  - Add the floor value to rto_min to HZ/20 (which fits the values
    that Michael shared on the other email)
- patch3 - speed up initial HB again
  - change sctp_cmd_hb_timers_start() so hb timers are kickstarted
    when the association is established. AFAICT RFC doesn't specify
    when these initial ones should be sent, and I see no issues with
    speeding them up.

WDYT?

Michael, what is the lowest heartbeat interval you have ever seen?
Hopefully it's bigger than 200ms. :)

Best,
  Marcelo

^ permalink raw reply related

* Re: STMMAC driver with TSO enabled issue
From: Bhadram Varka @ 2018-05-29 15:43 UTC (permalink / raw)
  To: Jose Abreu, netdev@vger.kernel.org, Joao Pinto
In-Reply-To: <2f6d4a0e-43cd-a67b-1f0a-5d358ac05c83@synopsys.com>

Hi Jose,

On 5/28/2018 4:26 PM, Jose Abreu wrote:
> Hi Bhadram,
> 
> On 28-05-2018 10:15, Bhadram Varka wrote:
>> Hi Jose,
>>
>> On 5/25/2018 8:02 PM, Jose Abreu wrote:
>>> On 25-05-2018 15:25, Bhadram Varka wrote:
>>>> Hi Jose,
>>>>
>>>> On 5/25/2018 7:35 PM, Jose Abreu wrote:
>>>>> Hi Bhadram,
>>>>>
>>>>> On 25-05-2018 05:41, Bhadram Varka wrote:
>>>>>> Hi Jose,
>>>>>>
>>>>>> On 5/24/2018 3:01 PM, Jose Abreu wrote:
>>>>>>> Hi Bhadram,
>>>>>>>
>>>>>>> On 24-05-2018 06:58, Bhadram Varka wrote:
>>>>>>>>
>>>>>>>> After some time if check Tx descriptor status - then I see
>>>>>>>> only
>>>>>>>> below
>>>>>>>>
>>>>>>>> [..]
>>>>>>>> [85788.286730] 027 [0x827951b0]: 0xf854f000 0x0 0x16d8
>>>>>>>> 0x90000000
>>>>>>>>
>>>>>>>> index 025 and 026 descriptors processed but not index 027.
>>>>>>>>
>>>>>>>> At this stage Tx DMA is always in below state -
>>>>>>>>
>>>>>>>> ■ 3'b011: Running (Reading Data from system memory
>>>>>>>> buffer and queuing it to the Tx buffer (Tx FIFO))
>>>>>>>
>>>>>>> Thats strange, I think the descriptors look okay though. I
>>>>>>> will
>>>>>>> need the registers values (before the lock) and, if
>>>>>>> possible, the
>>>>>>> git bisect output.
>>>>>>
>>>>>> Attaching the register dump file after the issue observed.
>>>>>> Please check once.
>>>>>>
>>>>>
>>>>> ----->8-----
>>>>> 0x112c = 0x0000003F
>>>>> 0x11ac = 0x0000003F
>>>>> 0x122c = 0x0000003F
>>>>> 0x12ac = 0x0000003F
>>>>>
>>>>> 0x1130 = 0x0000003F
>>>>> 0x11b0 = 0x0000003F
>>>>> 0x1230 = 0x0000003F
>>>>> 0x12b0 = 0x0000003F
>>>>> ----->8-----
>>>>>
>>>>> This can't be right, it should be DMA_{RX/TX}_SIZE - 1 =
>>>>> 511. Did
>>>>> you change these values in the code?
>>>>>
>>>>
>>>> Yes. I have changed the descriptor length to 64 - so that
>>>> searching for the current descriptor status would be easy.
>>>
>>> Ok, it shouldn't impact anything. The only thing I'm remembering
>>> now is that you can have TSO not enabled in all DMA channels (HW
>>> configuration allows this). Please check if TSO in single-queue
>>> works.
>> TSO works fine if only single queue enabled. I don't see any
>> limitation from HW side because TSO works fine with other
>> driver which we received from Synopsys with IP drop.
> 
> You need to check with HW team if TSO is enabled for all channels
> because you can have TSO channels < DMA channels and there is no
> way to confirm this in the registers. Also check if received
> driver is routing packets to queue != 0.
> 

Root caused the issue to TxPBL settings. In current configuration driver 
using the TxPBL = 32 which will be fine for single channel but its not 
recommended settings for multi-queue scenario. Recommended setting for 
TxPBL is half of the queue size.

o Total MTL Tx queue size - 16KB
o For multi-queue - total size divided by number of queues -
(16KB/4) = 4KB for each queue.
o So we need to set the TxPBL value so that we can place memory request 
for 2KB from system memory. For this to achieve we need to set TxPBL=16.

Thanks for the help in debugging.

-- 
Thanks,
Bhadram.

^ permalink raw reply

* Re: [PATCH bpf-next 06/11] bpf: add bpf_skb_cgroup_id helper
From: Daniel Borkmann @ 2018-05-29 15:43 UTC (permalink / raw)
  To: Quentin Monnet, ast; +Cc: netdev
In-Reply-To: <ada6df29-600c-9e86-3843-36951f0f226e@netronome.com>

On 05/29/2018 02:15 PM, Quentin Monnet wrote:
> Hi Daniel,
> 
> 2018-05-28 02:43 UTC+0200 ~ Daniel Borkmann <daniel@iogearbox.net>
>> Add a new bpf_skb_cgroup_id() helper that allows to retrieve the
>> cgroup id from the skb's socket. This is useful in particular to
>> enable bpf_get_cgroup_classid()-like behavior for cgroup v1 in
>> cgroup v2 by allowing ID based matching on egress. This can in
>> particular be used in combination with applying policy e.g. from
>> map lookups, and also complements the older bpf_skb_under_cgroup()
>> interface. In user space the cgroup id for a given path can be
>> retrieved through the f_handle as demonstrated in [0] recently.
>>
>>   [0] https://lkml.org/lkml/2018/5/22/1190
>>
>> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
>> Acked-by: Alexei Starovoitov <ast@kernel.org>
>> ---
>>  include/uapi/linux/bpf.h | 17 ++++++++++++++++-
>>  net/core/filter.c        | 29 +++++++++++++++++++++++++++--
>>  2 files changed, 43 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 9b8c6e3..e2853aa 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -2004,6 +2004,20 @@ union bpf_attr {
>>   * 		direct packet access.
>>   *	Return
>>   * 		0 on success, or a negative error in case of failure.
>> + *
>> + * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
>> + * 	Description
>> + * 		Return the cgroup v2 id of the socket associated with the *skb*.
>> + * 		This is roughly similar to the **bpf_get_cgroup_classid**\ ()
>> + * 		helper for cgroup v1 by providing a tag resp. identifier that
>> + * 		can be matched on or used for map lookups e.g. to implement
>> + * 		policy. The cgroup v2 id of a given path in the hierarchy is
>> + * 		exposed in user space through the f_handle API in order to get
>> + * 		to the same 64-bit id.
>> + *
>> + * 		This helper can be used on TC egress path, but not on ingress.
> 
> Nitpick: Maybe mention that the kernel must be built with
> CONFIG_SOCK_CGROUP_DATA option for the helper to be available?

Yeah that's fine. I was planning on a minor respin anyway some time today,
so I'll also update the description along with it.

Cheers,
Daniel

^ permalink raw reply

* Re: [PATCH bpf v2 1/5] selftests/bpf: test_sockmap, check test failure
From: John Fastabend @ 2018-05-29 15:37 UTC (permalink / raw)
  To: Prashant Bhole, Alexei Starovoitov, Daniel Borkmann
  Cc: David S . Miller, Shuah Khan, netdev, linux-kselftest
In-Reply-To: <20180528043803.4824-2-bhole_prashant_q7@lab.ntt.co.jp>

On 05/27/2018 09:37 PM, Prashant Bhole wrote:
> Test failures are not identified because exit code of RX/TX threads
> is not checked. Also threads are not returning correct exit code.
> 
> - Return exit code from threads depending on test execution status
> - In main thread, check the exit code of RX/TX threads
> 
> Fixes: 16962b2404ac ("bpf: sockmap, add selftests")
> Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
> ---

Acked-by: John Fastabend <john.fastabend@gmail.com>

>  tools/testing/selftests/bpf/test_sockmap.c | 25 ++++++++++++++++------
>  1 file changed, 19 insertions(+), 6 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
> index eb17fae458e6..34feb74c95c4 100644
> --- a/tools/testing/selftests/bpf/test_sockmap.c
> +++ b/tools/testing/selftests/bpf/test_sockmap.c
> @@ -429,8 +429,8 @@ static int sendmsg_test(struct sockmap_options *opt)
>  	struct msg_stats s = {0};
>  	int iov_count = opt->iov_count;
>  	int iov_buf = opt->iov_length;
> +	int rx_status, tx_status;
>  	int cnt = opt->rate;
> -	int status;
>  
>  	errno = 0;
>  
> @@ -442,7 +442,7 @@ static int sendmsg_test(struct sockmap_options *opt)
>  	rxpid = fork();
>  	if (rxpid == 0) {
>  		if (opt->drop_expected)
> -			exit(1);
> +			exit(0);
>  
>  		if (opt->sendpage)
>  			iov_count = 1;
> @@ -463,7 +463,7 @@ static int sendmsg_test(struct sockmap_options *opt)
>  				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
>  				s.bytes_sent, sent_Bps, sent_Bps/giga,
>  				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
> -		exit(1);
> +		exit(err ? 1 : 0);
>  	} else if (rxpid == -1) {
>  		perror("msg_loop_rx: ");
>  		return errno;
> @@ -491,14 +491,27 @@ static int sendmsg_test(struct sockmap_options *opt)
>  				"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
>  				s.bytes_sent, sent_Bps, sent_Bps/giga,
>  				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
> -		exit(1);
> +		exit(err ? 1 : 0);
>  	} else if (txpid == -1) {
>  		perror("msg_loop_tx: ");
>  		return errno;
>  	}
>  
> -	assert(waitpid(rxpid, &status, 0) == rxpid);
> -	assert(waitpid(txpid, &status, 0) == txpid);
> +	assert(waitpid(rxpid, &rx_status, 0) == rxpid);
> +	assert(waitpid(txpid, &tx_status, 0) == txpid);
> +	if (WIFEXITED(rx_status)) {
> +		err = WEXITSTATUS(rx_status);
> +		if (err) {
> +			fprintf(stderr, "rx thread exited with err %d. ", err);
> +			goto out;
> +		}
> +	}
> +	if (WIFEXITED(tx_status)) {
> +		err = WEXITSTATUS(tx_status);
> +		if (err)
> +			fprintf(stderr, "tx thread exited with err %d. ", err);
> +	}
> +out:
>  	return err;
>  }
>  
> 

^ permalink raw reply

* Re: [PATCH net] vhost_net: flush batched heads before trying to busy polling
From: Michael S. Tsirkin @ 2018-05-29 15:27 UTC (permalink / raw)
  To: Jason Wang; +Cc: kvm, virtualization, netdev, linux-kernel
In-Reply-To: <1527574699-13047-1-git-send-email-jasowang@redhat.com>

On Tue, May 29, 2018 at 02:18:19PM +0800, Jason Wang wrote:
> After commit e2b3b35eb989 ("vhost_net: batch used ring update in rx"),
> we tend to batch updating used heads. But it doesn't flush batched
> heads before trying to do busy polling, this will cause vhost to wait
> for guest TX which waits for the used RX. Fixing by flush batched
> heads before busy loop.
> 
> 1 byte TCP_RR performance recovers from 13107.83 to 50402.65.
> 
> Fixes: e2b3b35eb989 ("vhost_net: batch used ring update in rx")
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Acked-by: Michael S. Tsirkin <mst@redhat.com>

> ---
>  drivers/vhost/net.c | 37 ++++++++++++++++++++++++-------------
>  1 file changed, 24 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 986058a..eeaf673 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -105,7 +105,9 @@ struct vhost_net_virtqueue {
>  	/* vhost zerocopy support fields below: */
>  	/* last used idx for outstanding DMA zerocopy buffers */
>  	int upend_idx;
> -	/* first used idx for DMA done zerocopy buffers */
> +	/* For TX, first used idx for DMA done zerocopy buffers
> +	 * For RX, number of batched heads
> +	 */
>  	int done_idx;
>  	/* an array of userspace buffers info */
>  	struct ubuf_info *ubuf_info;
> @@ -626,6 +628,18 @@ static int sk_has_rx_data(struct sock *sk)
>  	return skb_queue_empty(&sk->sk_receive_queue);
>  }
>  
> +static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
> +{
> +	struct vhost_virtqueue *vq = &nvq->vq;
> +	struct vhost_dev *dev = vq->dev;
> +
> +	if (!nvq->done_idx)
> +		return;
> +
> +	vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
> +	nvq->done_idx = 0;
> +}
> +
>  static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
>  {
>  	struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX];
> @@ -635,6 +649,8 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
>  	int len = peek_head_len(rvq, sk);
>  
>  	if (!len && vq->busyloop_timeout) {
> +		/* Flush batched heads first */
> +		vhost_rx_signal_used(rvq);
>  		/* Both tx vq and rx socket were polled here */
>  		mutex_lock_nested(&vq->mutex, 1);
>  		vhost_disable_notify(&net->dev, vq);
> @@ -762,7 +778,7 @@ static void handle_rx(struct vhost_net *net)
>  	};
>  	size_t total_len = 0;
>  	int err, mergeable;
> -	s16 headcount, nheads = 0;
> +	s16 headcount;
>  	size_t vhost_hlen, sock_hlen;
>  	size_t vhost_len, sock_len;
>  	struct socket *sock;
> @@ -790,8 +806,8 @@ static void handle_rx(struct vhost_net *net)
>  	while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) {
>  		sock_len += sock_hlen;
>  		vhost_len = sock_len + vhost_hlen;
> -		headcount = get_rx_bufs(vq, vq->heads + nheads, vhost_len,
> -					&in, vq_log, &log,
> +		headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
> +					vhost_len, &in, vq_log, &log,
>  					likely(mergeable) ? UIO_MAXIOV : 1);
>  		/* On error, stop handling until the next kick. */
>  		if (unlikely(headcount < 0))
> @@ -862,12 +878,9 @@ static void handle_rx(struct vhost_net *net)
>  			vhost_discard_vq_desc(vq, headcount);
>  			goto out;
>  		}
> -		nheads += headcount;
> -		if (nheads > VHOST_RX_BATCH) {
> -			vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
> -						    nheads);
> -			nheads = 0;
> -		}
> +		nvq->done_idx += headcount;
> +		if (nvq->done_idx > VHOST_RX_BATCH)
> +			vhost_rx_signal_used(nvq);
>  		if (unlikely(vq_log))
>  			vhost_log_write(vq, vq_log, log, vhost_len);
>  		total_len += vhost_len;
> @@ -878,9 +891,7 @@ static void handle_rx(struct vhost_net *net)
>  	}
>  	vhost_net_enable_vq(net, vq);
>  out:
> -	if (nheads)
> -		vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
> -					    nheads);
> +	vhost_rx_signal_used(nvq);
>  	mutex_unlock(&vq->mutex);
>  }
>  
> -- 
> 2.7.4

^ permalink raw reply

* [PATCH net-next] tcp: minor optimization around tcp_hdr() usage in receive path
From: Yafang Shao @ 2018-05-29 15:27 UTC (permalink / raw)
  To: edumazet, davem; +Cc: netdev, linux-kernel, Yafang Shao

This is additional to the
commit ea1627c20c34 ("tcp: minor optimizations around tcp_hdr() usage").
At this point, skb->data is same with tcp_hdr() as tcp header has not
been pulled yet. So use the less expensive one to get the tcp header.

Remove the third parameter of tcp_rcv_established() and put it into
the function body.

Furthermore, the local variables are listed as a reverse christmas tree :)

Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/net/tcp.h          | 3 +--
 include/trace/events/tcp.h | 5 +++--
 net/ipv4/tcp_input.c       | 6 +++---
 net/ipv4/tcp_ipv4.c        | 2 +-
 net/ipv6/tcp_ipv6.c        | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 952d842..029a51b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -334,8 +334,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 void tcp_delack_timer_handler(struct sock *sk);
 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
-void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			 const struct tcphdr *th);
+void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
 void tcp_rcv_space_adjust(struct sock *sk);
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
 void tcp_twsk_destructor(struct sock *sk);
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 703abb6..ac55b32 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -248,8 +248,9 @@
 	),
 
 	TP_fast_assign(
-		const struct tcp_sock *tp = tcp_sk(sk);
+		const struct tcphdr *th = (const struct tcphdr *)skb->data;
 		const struct inet_sock *inet = inet_sk(sk);
+		const struct tcp_sock *tp = tcp_sk(sk);
 
 		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
 		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
@@ -261,7 +262,7 @@
 		__entry->dport = ntohs(inet->inet_dport);
 		__entry->mark = skb->mark;
 
-		__entry->data_len = skb->len - tcp_hdrlen(skb);
+		__entry->data_len = skb->len - __tcp_hdrlen(th);
 		__entry->snd_nxt = tp->snd_nxt;
 		__entry->snd_una = tp->snd_una;
 		__entry->snd_cwnd = tp->snd_cwnd;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1191cac..d5ffb57 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5390,11 +5390,11 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
  *	the rest is checked inline. Fast processing is turned on in
  *	tcp_data_queue when everything is OK.
  */
-void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			 const struct tcphdr *th)
+void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 {
-	unsigned int len = skb->len;
+	const struct tcphdr *th = (const struct tcphdr *)skb->data;
 	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned int len = skb->len;
 
 	/* TCP congestion window tracking */
 	trace_tcp_probe(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index adbdb50..749b0ef 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1486,7 +1486,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 				sk->sk_rx_dst = NULL;
 			}
 		}
-		tcp_rcv_established(sk, skb, tcp_hdr(skb));
+		tcp_rcv_established(sk, skb);
 		return 0;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7d47c2b..8764a63 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1322,7 +1322,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 			}
 		}
 
-		tcp_rcv_established(sk, skb, tcp_hdr(skb));
+		tcp_rcv_established(sk, skb);
 		if (opt_skb)
 			goto ipv6_pktoptions;
 		return 0;
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH v2 net] tun: Fix NULL pointer dereference in XDP redirect
From: David Miller @ 2018-05-29 15:06 UTC (permalink / raw)
  To: makita.toshiaki; +Cc: netdev, jasowang
In-Reply-To: <1527503869-2412-1-git-send-email-makita.toshiaki@lab.ntt.co.jp>

From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Mon, 28 May 2018 19:37:49 +0900

> Calling XDP redirection requires bh disabled. Softirq can call another
> XDP function and redirection functions, then the percpu static variable
> ri->map can be overwritten to NULL.
 ...
> v2:
>  - Removed preempt_disable/enable since local_bh_disable will prevent
>    preemption as well, feedback from Jason Wang.
> 
> Fixes: 761876c857cb ("tap: XDP support")
> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>

Applied and queued up for -stable.

^ permalink raw reply

* Re: [PATCH net] be2net: Fix error detection logic for BE3
From: David Miller @ 2018-05-29 14:58 UTC (permalink / raw)
  To: suresh.reddy; +Cc: netdev
In-Reply-To: <20180528052606.21267-1-suresh.reddy@broadcom.com>

From: Suresh Reddy <suresh.reddy@broadcom.com>
Date: Mon, 28 May 2018 01:26:06 -0400

> Check for 0xE00 (RECOVERABLE_ERR) along with ARMFW UE (0x0)
> in be_detect_error() to know whether the error is valid error or not
> 
> Fixes: 673c96e5a ("be2net: Fix UE detection logic for BE3")
> Signed-off-by: Suresh Reddy <suresh.reddy@broadcom.com>

Applied and queued up for -stable.

^ permalink raw reply

* [PATCH iproute2 v2] ipaddress: strengthen check on 'label' input
From: Patrick Talbert @ 2018-05-29 14:57 UTC (permalink / raw)
  To: netdev; +Cc: stephen

As mentioned in the ip-address man page, an address label must
be equal to the device name or prefixed by the device name
followed by a colon. Currently the only check on this input is
to see if the device name appears at the beginning of the label
string.

This commit adds an additional check to ensure label == dev or
continues with a colon.

Signed-off-by: Patrick Talbert <ptalbert@redhat.com>
Suggested-by: Stephen Hemminger <stephen@networkplumber.org>
---
 ip/ipaddress.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/ip/ipaddress.c b/ip/ipaddress.c
index 00da14c..fce2008 100644
--- a/ip/ipaddress.c
+++ b/ip/ipaddress.c
@@ -2040,6 +2040,22 @@ static bool ipaddr_is_multicast(inet_prefix *a)
 		return false;
 }
 
+static bool is_valid_label(const char *dev, const char *label)
+{
+	char alias[strlen(dev) + 1];
+
+	if (strlen(label) < strlen(dev))
+		return false;
+
+	strcpy(alias, dev);
+	strcat(alias, ":");
+	if (strncmp(label, dev, strlen(dev)) == 0 ||
+	    strncmp(label, alias, strlen(alias)) == 0)
+		return true;
+	else
+		return false;
+}
+
 static int ipaddr_modify(int cmd, int flags, int argc, char **argv)
 {
 	struct {
@@ -2174,8 +2190,9 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv)
 		fprintf(stderr, "Not enough information: \"dev\" argument is required.\n");
 		return -1;
 	}
-	if (l && matches(d, l) != 0) {
-		fprintf(stderr, "\"dev\" (%s) must match \"label\" (%s).\n", d, l);
+	if (l && ! is_valid_label(d, l)) {
+		fprintf(stderr, "\"label\" (%s) must match \"dev\" (%s) or be prefixed by"
+			" \"dev\" with a colon.\n", l, d);
 		return -1;
 	}
 
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH v2] Revert "alx: remove WoL support"
From: David Miller @ 2018-05-29 14:57 UTC (permalink / raw)
  To: acelan.kao
  Cc: jcliburn, chris.snook, rakesh, netdev, emily.chien, andrew,
	linux-kernel, johannes, js
In-Reply-To: <CAFv23QnGJz8YUwzqmzHii=Obg8D9sb25Z4A+FFv2ze7HYXXG8g@mail.gmail.com>

From: AceLan Kao <acelan.kao@canonical.com>
Date: Mon, 28 May 2018 13:06:31 +0800

> Hi all,
> 
> Just inform you a news reported by a user who confirmed the wake up
> issue can't be reproduce by the new kernel.
> https://bugzilla.kernel.org/show_bug.cgi?id=61651#c126
> 
> <quote>
> Guillaume de Jabrun 2018-05-27 15:12:54 UTC
> 
> I am using this patch for a long time. I was experiencing the "wake up
> twice" bug, but with recent kernel version, I don't have this issue
> anymore.
> I can't tell which version actually fix the bug (I don't remember..).
> </quote>

Ok, please resubmit the revert and let's see what happens.

^ permalink raw reply

* Re: [PATCH] net: qmi_wwan: Add Netgear Aircard 779S
From: David Miller @ 2018-05-29 14:56 UTC (permalink / raw)
  To: josh; +Cc: joshuajhill, bjorn, netdev, linux-usb, linux-kernel
In-Reply-To: <1527466241-28446-1-git-send-email-josh@joshuajhill.com>

From: Josh Hill <josh@joshuajhill.com>
Date: Sun, 27 May 2018 20:10:41 -0400

> Add support for Netgear Aircard 779S
> 
> Signed-off-by: Josh Hill <josh@joshuajhill.com>

Applied and queued up for -stable.

^ permalink raw reply

* Re: [PATCH v2 net-next] tcp: use data length instead of skb->len in tcp_probe
From: Yafang Shao @ 2018-05-29 14:56 UTC (permalink / raw)
  To: David Miller; +Cc: Song Liu, netdev, LKML
In-Reply-To: <20180529.105455.676464019941488974.davem@davemloft.net>

On Tue, May 29, 2018 at 10:54 PM, David Miller <davem@davemloft.net> wrote:
> From: Yafang Shao <laoar.shao@gmail.com>
> Date: Tue, 29 May 2018 22:36:50 +0800
>
>> On Tue, May 29, 2018 at 10:15 PM, David Miller <davem@davemloft.net> wrote:
>>> From: Yafang Shao <laoar.shao@gmail.com>
>>> Date: Fri, 25 May 2018 18:14:05 +0800
>>>
>>>> skb->len is meaningless to user.
>>>> data length could be more helpful, with which we can easily filter out
>>>> the packet without payload.
>>>>
>>>> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
>>>
>>> Applied, thank you.
>>
>> Hi Dave,
>>
>> There's an update on this patch.
>> Pls.  see V4.
>>
>> And I will send a V5 patch per Eric's suggestion.
>
> Sorry, I pushed it out already.  You'll need to send me relative changes.

OK

^ permalink raw reply

* Re: [PATCH v2 net-next] tcp: use data length instead of skb->len in tcp_probe
From: David Miller @ 2018-05-29 14:54 UTC (permalink / raw)
  To: laoar.shao; +Cc: songliubraving, netdev, linux-kernel
In-Reply-To: <CALOAHbAzy_KfT0FcU9fumPKZgSaSOAAfY755TGxsJU7toB_1dA@mail.gmail.com>

From: Yafang Shao <laoar.shao@gmail.com>
Date: Tue, 29 May 2018 22:36:50 +0800

> On Tue, May 29, 2018 at 10:15 PM, David Miller <davem@davemloft.net> wrote:
>> From: Yafang Shao <laoar.shao@gmail.com>
>> Date: Fri, 25 May 2018 18:14:05 +0800
>>
>>> skb->len is meaningless to user.
>>> data length could be more helpful, with which we can easily filter out
>>> the packet without payload.
>>>
>>> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
>>
>> Applied, thank you.
> 
> Hi Dave,
> 
> There's an update on this patch.
> Pls.  see V4.
> 
> And I will send a V5 patch per Eric's suggestion.

Sorry, I pushed it out already.  You'll need to send me relative changes.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox