Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH bpf-next 00/10] CO-RE offset relocations
From: Song Liu @ 2019-07-29 20:36 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Yonghong Song, Andrii Nakryiko, Kernel Team
In-Reply-To: <CAPhsuW4hd2NJU5VZAwXDTMwrJRA4O-O2iNm8OywtJd0EZd5DmA@mail.gmail.com>

On Mon, Jul 29, 2019 at 1:20 PM Song Liu <liu.song.a23@gmail.com> wrote:
>
> On Wed, Jul 24, 2019 at 1:34 PM Andrii Nakryiko <andriin@fb.com> wrote:
> >
> > This patch set implements central part of CO-RE (Compile Once - Run
> > Everywhere, see [0] and [1] for slides and video): relocating field offsets.
> > Most of the details are written down as comments to corresponding parts of the
> > code.
> >
> > Patch #1 adds loading of .BTF.ext offset relocations section and macros to
> > work with its contents.
> > Patch #2 implements CO-RE relocations algorithm in libbpf.
> > Patches #3-#10 adds selftests validating various parts of relocation handling,
> > type compatibility, etc.
> >
> > For all tests to work, you'll need latest Clang/LLVM supporting
> > __builtin_preserve_access_index intrinsic, used for recording offset
> > relocations. Kernel on which selftests run should have BTF information built
> > in (CONFIG_DEBUG_INFO_BTF=y).
> >
> >   [0] http://vger.kernel.org/bpfconf2019.html#session-2
> >   [1] http://vger.kernel.org/lpc-bpf2018.html#session-2CO-RE relocations
> >
> > This patch set implements central part of CO-RE (Compile Once - Run
> > Everywhere, see [0] and [1] for slides and video): relocating field offsets.
> > Most of the details are written down as comments to corresponding parts of the
> > code.
> >
> > Patch #1 adds loading of .BTF.ext offset relocations section and macros to
> > work with its contents.
> > Patch #2 implements CO-RE relocations algorithm in libbpf.
> > Patches #3-#10 adds selftests validating various parts of relocation handling,
> > type compatibility, etc.
> >
> > For all tests to work, you'll need latest Clang/LLVM supporting
> > __builtin_preserve_access_index intrinsic, used for recording offset
> > relocations. Kernel on which selftests run should have BTF information built
> > in (CONFIG_DEBUG_INFO_BTF=y).
> >
> >   [0] http://vger.kernel.org/bpfconf2019.html#session-2
> >   [1] http://vger.kernel.org/lpc-bpf2018.html#session-2
> >
> > Andrii Nakryiko (10):
> >   libbpf: add .BTF.ext offset relocation section loading
> >   libbpf: implement BPF CO-RE offset relocation algorithm
> >   selftests/bpf: add CO-RE relocs testing setup
> >   selftests/bpf: add CO-RE relocs struct flavors tests
> >   selftests/bpf: add CO-RE relocs nesting tests
> >   selftests/bpf: add CO-RE relocs array tests
> >   selftests/bpf: add CO-RE relocs enum/ptr/func_proto tests
> >   selftests/bpf: add CO-RE relocs modifiers/typedef tests
> >   selftest/bpf: add CO-RE relocs ptr-as-array tests
> >   selftests/bpf: add CO-RE relocs ints tests
> >
> >  tools/lib/bpf/btf.c                           |  64 +-
> >  tools/lib/bpf/btf.h                           |   4 +
> >  tools/lib/bpf/libbpf.c                        | 866 +++++++++++++++++-
> >  tools/lib/bpf/libbpf.h                        |   1 +
> >  tools/lib/bpf/libbpf_internal.h               |  91 ++
> >  .../selftests/bpf/prog_tests/core_reloc.c     | 363 ++++++++
> >  .../bpf/progs/btf__core_reloc_arrays.c        |   3 +
> >  .../btf__core_reloc_arrays___diff_arr_dim.c   |   3 +
> >  ...btf__core_reloc_arrays___diff_arr_val_sz.c |   3 +
> >  .../btf__core_reloc_arrays___err_non_array.c  |   3 +
> >  ...btf__core_reloc_arrays___err_too_shallow.c |   3 +
> >  .../btf__core_reloc_arrays___err_too_small.c  |   3 +
> >  ..._core_reloc_arrays___err_wrong_val_type1.c |   3 +
> >  ..._core_reloc_arrays___err_wrong_val_type2.c |   3 +
> >  .../bpf/progs/btf__core_reloc_flavors.c       |   3 +
> >  .../btf__core_reloc_flavors__err_wrong_name.c |   3 +
> >  .../bpf/progs/btf__core_reloc_ints.c          |   3 +
> >  .../bpf/progs/btf__core_reloc_ints___bool.c   |   3 +
> >  .../btf__core_reloc_ints___err_bitfield.c     |   3 +
> >  .../btf__core_reloc_ints___err_wrong_sz_16.c  |   3 +
> >  .../btf__core_reloc_ints___err_wrong_sz_32.c  |   3 +
> >  .../btf__core_reloc_ints___err_wrong_sz_64.c  |   3 +
> >  .../btf__core_reloc_ints___err_wrong_sz_8.c   |   3 +
> >  .../btf__core_reloc_ints___reverse_sign.c     |   3 +
> >  .../bpf/progs/btf__core_reloc_mods.c          |   3 +
> >  .../progs/btf__core_reloc_mods___mod_swap.c   |   3 +
> >  .../progs/btf__core_reloc_mods___typedefs.c   |   3 +
> >  .../bpf/progs/btf__core_reloc_nesting.c       |   3 +
> >  .../btf__core_reloc_nesting___anon_embed.c    |   3 +
> >  ...f__core_reloc_nesting___dup_compat_types.c |   5 +
> >  ...core_reloc_nesting___err_array_container.c |   3 +
> >  ...tf__core_reloc_nesting___err_array_field.c |   3 +
> >  ...e_reloc_nesting___err_dup_incompat_types.c |   4 +
> >  ...re_reloc_nesting___err_missing_container.c |   3 +
> >  ...__core_reloc_nesting___err_missing_field.c |   3 +
> >  ..._reloc_nesting___err_nonstruct_container.c |   3 +
> >  ...e_reloc_nesting___err_partial_match_dups.c |   4 +
> >  .../btf__core_reloc_nesting___err_too_deep.c  |   3 +
> >  .../btf__core_reloc_nesting___extra_nesting.c |   3 +
> >  ..._core_reloc_nesting___struct_union_mixup.c |   3 +
> >  .../bpf/progs/btf__core_reloc_primitives.c    |   3 +
> >  ...f__core_reloc_primitives___diff_enum_def.c |   3 +
> >  ..._core_reloc_primitives___diff_func_proto.c |   3 +
> >  ...f__core_reloc_primitives___diff_ptr_type.c |   3 +
> >  ...tf__core_reloc_primitives___err_non_enum.c |   3 +
> >  ...btf__core_reloc_primitives___err_non_int.c |   3 +
> >  ...btf__core_reloc_primitives___err_non_ptr.c |   3 +
> >  .../bpf/progs/btf__core_reloc_ptr_as_arr.c    |   3 +
> >  .../btf__core_reloc_ptr_as_arr___diff_sz.c    |   3 +
> >  .../selftests/bpf/progs/core_reloc_types.h    | 642 +++++++++++++
> >  .../bpf/progs/test_core_reloc_arrays.c        |  58 ++
> >  .../bpf/progs/test_core_reloc_flavors.c       |  65 ++
> >  .../bpf/progs/test_core_reloc_ints.c          |  48 +
> >  .../bpf/progs/test_core_reloc_kernel.c        |  39 +
> >  .../bpf/progs/test_core_reloc_mods.c          |  68 ++
> >  .../bpf/progs/test_core_reloc_nesting.c       |  48 +
> >  .../bpf/progs/test_core_reloc_primitives.c    |  50 +
> >  .../bpf/progs/test_core_reloc_ptr_as_arr.c    |  34 +
> >  58 files changed, 2527 insertions(+), 47 deletions(-)
> >  create mode 100644 tools/testing/selftests/bpf/prog_tests/core_reloc.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type1.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type2.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___bool.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_bitfield.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_16.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_32.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_64.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_8.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___reverse_sign.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_enum_def.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_func_proto.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_ptr_type.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_enum.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_int.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_ptr.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr___diff_sz.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/core_reloc_types.h
> >  create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_ints.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c
>
> We have created a lot of small files. Would it be cleaner if we can
> somehow put these
> data in one file (maybe different sections?).

After reading more, I guess you have tried this and end up with current
design: keep most struct defines in core_reloc_types.h.

>
> Alternatively, maybe create a folder for these files:
>   tools/testing/selftests/bpf/progs/core/

I guess this would still make it cleaner.

Thanks,
Song

^ permalink raw reply

* Re: [PATCH bpf-next 04/10] selftests/bpf: add CO-RE relocs struct flavors tests
From: Song Liu @ 2019-07-29 20:37 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Yonghong Song, Andrii Nakryiko, Kernel Team
In-Reply-To: <20190724192742.1419254-5-andriin@fb.com>

On Wed, Jul 24, 2019 at 1:34 PM Andrii Nakryiko <andriin@fb.com> wrote:
>
> Add tests verifying that BPF program can use various struct/union
> "flavors" to extract data from the same target struct/union.
>
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>

Acked-by: Song Liu <songliubraving@fb.com>

^ permalink raw reply

* [PATCH net-next] can: fix ioctl function removal
From: Oliver Hartkopp @ 2019-07-29 20:40 UTC (permalink / raw)
  To: davem, netdev; +Cc: linux-can, Oliver Hartkopp, kernel test robot

Commit 60649d4e0af ("can: remove obsolete empty ioctl() handler") replaced the
almost empty can_ioctl() function with sock_no_ioctl() which always returns
-EOPNOTSUPP.

Even though we don't have any ioctl() functions on socket/network layer we need
to return -ENOIOCTLCMD to be able to forward ioctl commands like SIOCGIFINDEX
to the network driver layer.

This patch fixes the wrong return codes in the CAN network layer protocols.

Reported-by: kernel test robot <rong.a.chen@intel.com>
Fixes: 60649d4e0af ("can: remove obsolete empty ioctl() handler")
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
 net/can/bcm.c | 9 ++++++++-
 net/can/raw.c | 9 ++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/net/can/bcm.c b/net/can/bcm.c
index 8da986b19d88..bf1d0bbecec8 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1680,6 +1680,13 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 	return size;
 }
 
+int bcm_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd,
+			 unsigned long arg)
+{
+	/* no ioctls for socket layer -> hand it down to NIC layer */
+	return -ENOIOCTLCMD;
+}
+
 static const struct proto_ops bcm_ops = {
 	.family        = PF_CAN,
 	.release       = bcm_release,
@@ -1689,7 +1696,7 @@ static const struct proto_ops bcm_ops = {
 	.accept        = sock_no_accept,
 	.getname       = sock_no_getname,
 	.poll          = datagram_poll,
-	.ioctl         = sock_no_ioctl,
+	.ioctl         = bcm_sock_no_ioctlcmd,
 	.gettstamp     = sock_gettstamp,
 	.listen        = sock_no_listen,
 	.shutdown      = sock_no_shutdown,
diff --git a/net/can/raw.c b/net/can/raw.c
index ff720272f7b7..da386f1fa815 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -837,6 +837,13 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 	return size;
 }
 
+int raw_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd,
+			 unsigned long arg)
+{
+	/* no ioctls for socket layer -> hand it down to NIC layer */
+	return -ENOIOCTLCMD;
+}
+
 static const struct proto_ops raw_ops = {
 	.family        = PF_CAN,
 	.release       = raw_release,
@@ -846,7 +853,7 @@ static const struct proto_ops raw_ops = {
 	.accept        = sock_no_accept,
 	.getname       = raw_getname,
 	.poll          = datagram_poll,
-	.ioctl         = sock_no_ioctl,
+	.ioctl         = raw_sock_no_ioctlcmd,
 	.gettstamp     = sock_gettstamp,
 	.listen        = sock_no_listen,
 	.shutdown      = sock_no_shutdown,
-- 
2.20.1


^ permalink raw reply related

* Re: [bpf-next,v2 0/6] Introduce a BPF helper to generate SYN cookies
From: Alexei Starovoitov @ 2019-07-29 20:47 UTC (permalink / raw)
  To: Petar Penkov
  Cc: netdev, bpf, davem, ast, daniel, edumazet, lmb, sdf, toke,
	Petar Penkov
In-Reply-To: <20190729165918.92933-1-ppenkov.kernel@gmail.com>

On Mon, Jul 29, 2019 at 09:59:12AM -0700, Petar Penkov wrote:
> From: Petar Penkov <ppenkov@google.com>
> 
> This patch series introduces a BPF helper function that allows generating SYN
> cookies from BPF. Currently, this helper is enabled at both the TC hook and the
> XDP hook.
> 
> The first two patches in the series add/modify several TCP helper functions to
> allow for SKB-less operation, as is the case at the XDP hook.
> 
> The third patch introduces the bpf_tcp_gen_syncookie helper function which
> generates a SYN cookie for either XDP or TC programs. The return value of
> this function contains both the MSS value, encoded in the cookie, and the
> cookie itself.
> 
> The last three patches sync tools/ and add a test. 
> 
> Performance evaluation:
> I sent 10Mpps to a fixed port on a host with 2 10G bonded Mellanox 4 NICs from
> random IPv6 source addresses. Without XDP I observed 7.2Mpps (syn-acks) being
> sent out if the IPv6 packets carry 20 bytes of TCP options or 7.6Mpps if they
> carry no options. If I attached a simple program that checks if a packet is
> IPv6/TCP/SYN, looks up the socket, issues a cookie, and sends it back out after
> swapping src/dest, recomputing the checksum, and setting the ACK flag, I
> observed 10Mpps being sent back out.

Is it 10m because trafic gen is 10m?
What is cpu utilization at this rate?
Is it cpu or nic limited if you crank up the syn flood?
Original 7M with all cores or single core?

The patch set looks good to me.
I'd like Eric to review it one more time before applying.


^ permalink raw reply

* Re: [PATCH 1/3 net-next] linux: Add skb_frag_t page_offset accessors
From: Jakub Kicinski @ 2019-07-29 20:50 UTC (permalink / raw)
  To: Jonathan Lemon; +Cc: willy, davem, kernel-team, netdev
In-Reply-To: <20190729171941.250569-2-jonathan.lemon@gmail.com>

On Mon, 29 Jul 2019 10:19:39 -0700, Jonathan Lemon wrote:
> Add skb_frag_off(), skb_frag_off_add(), skb_frag_off_set(),
> and skb_frag_off_set_from() accessors for page_offset.
> 
> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> ---
>  include/linux/skbuff.h | 61 ++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 56 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 718742b1c505..7d94a78067ee 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -331,7 +331,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
>  }
>  
>  /**
> - * skb_frag_size_add - Incrementes the size of a skb fragment by %delta
> + * skb_frag_size_add - Increments the size of a skb fragment by %delta
>   * @frag: skb fragment
>   * @delta: value to add
>   */
> @@ -2857,6 +2857,46 @@ static inline void skb_propagate_pfmemalloc(struct page *page,
>  		skb->pfmemalloc = true;
>  }
>  
> +/**
> + * skb_frag_off - Returns the offset of a skb fragment
> + * @frag: the paged fragment
> + */
> +static inline unsigned int skb_frag_off(const skb_frag_t *frag)
> +{
> +	return frag->page_offset;
> +}
> +
> +/**
> + * skb_frag_off_add - Increments the offset of a skb fragment by %delta

I realize you're following the existing code, but should we perhaps use
the latest kdoc syntax? '()' after function name, and args should have
'@' prefix, '%' would be for constants.

> + * @frag: skb fragment
> + * @delta: value to add
> + */
> +static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
> +{
> +	frag->page_offset += delta;
> +}
> +
> +/**
> + * skb_frag_off_set - Sets the offset of a skb fragment
> + * @frag: skb fragment
> + * @offset: offset of fragment
> + */
> +static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
> +{
> +	frag->page_offset = offset;
> +}
> +
> +/**
> + * skb_frag_off_set_from - Sets the offset of a skb fragment from another fragment
> + * @fragto: skb fragment where offset is set
> + * @fragfrom: skb fragment offset is copied from
> + */
> +static inline void skb_frag_off_set_from(skb_frag_t *fragto,
> +					 const skb_frag_t *fragfrom)

skb_frag_off_copy() ?

> +{
> +	fragto->page_offset = fragfrom->page_offset;
> +}
> +
>  /**
>   * skb_frag_page - retrieve the page referred to by a paged fragment
>   * @frag: the paged fragment
> @@ -2923,7 +2963,7 @@ static inline void skb_frag_unref(struct sk_buff *skb, int f)
>   */
>  static inline void *skb_frag_address(const skb_frag_t *frag)
>  {
> -	return page_address(skb_frag_page(frag)) + frag->page_offset;
> +	return page_address(skb_frag_page(frag)) + skb_frag_off(frag);
>  }
>  
>  /**
> @@ -2939,7 +2979,18 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
>  	if (unlikely(!ptr))
>  		return NULL;
>  
> -	return ptr + frag->page_offset;
> +	return ptr + skb_frag_off(frag);
> +}
> +
> +/**
> + * skb_frag_page_set_from - sets the page in a fragment from another fragment

skb_frag_page_copy() ?

> + * @fragto: skb fragment where page is set
> + * @fragfrom: skb fragment page is copied from
> + */
> +static inline void skb_frag_page_set_from(skb_frag_t *fragto,
> +					  const skb_frag_t *fragfrom)
> +{
> +	fragto->bv_page = fragfrom->bv_page;
>  }
>  
>  /**

^ permalink raw reply

* Re: [PATCH net] net/mlx5e: Fix unnecessary flow_block_cb_is_busy call
From: David Miller @ 2019-07-29 20:53 UTC (permalink / raw)
  To: saeedm; +Cc: wenxu, netdev
In-Reply-To: <b7a5de0ae2464df31ed39fee71020ba063a7a90f.camel@mellanox.com>

From: Saeed Mahameed <saeedm@mellanox.com>
Date: Mon, 29 Jul 2019 18:25:26 +0000

> Dave let me know if you want me to take it to my branch.

Please do, thanks Saeed.

^ permalink raw reply

* Re: [PATCH net v2] mlxsw: spectrum_ptp: Increase parsing depth when PTP is enabled
From: David Miller @ 2019-07-29 20:55 UTC (permalink / raw)
  To: petrm; +Cc: netdev, jiri, idosch
In-Reply-To: <b1584bdec4a0a36a2567a43dc0973dd8f3a05dec.1564424420.git.petrm@mellanox.com>

From: Petr Machata <petrm@mellanox.com>
Date: Mon, 29 Jul 2019 18:26:14 +0000

> Spectrum systems have a configurable limit on how far into the packet they
> parse. By default, the limit is 96 bytes.
> 
> An IPv6 PTP packet is layered as Ethernet/IPv6/UDP (14+40+8 bytes), and
> sequence ID of a PTP event is only available 32 bytes into payload, for a
> total of 94 bytes. When an additional 802.1q header is present as
> well (such as when ptp4l is running on a VLAN port), the parsing limit is
> exceeded. Such packets are not recognized as PTP, and are not timestamped.
> 
> Therefore generalize the current VXLAN-specific parsing depth setting to
> allow reference-counted requests from other modules as well. Keep it in the
> VXLAN module, because the MPRS register also configures UDP destination
> port number used for VXLAN, and is thus closely tied to the VXLAN code
> anyway.
> 
> Then invoke the new interfaces from both VXLAN (in obvious places), as well
> as from PTP code, when the (global) timestamping configuration changes from
> disabled to enabled or vice versa.
> 
> Fixes: 8748642751ed ("mlxsw: spectrum: PTP: Support SIOCGHWTSTAMP, SIOCSHWTSTAMP ioctls")
> Signed-off-by: Petr Machata <petrm@mellanox.com>
> ---
> 
> Notes:
>     v2:
>     - Preserve RXT in mlxsw_sp1_ptp_mtpppc_update()

Applied, thanks Petr.

^ permalink raw reply

* Re: [PATCH bpf-next v5 0/6] xdp: Add devmap_hash map type
From: Alexei Starovoitov @ 2019-07-29 20:57 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Daniel Borkmann, Alexei Starovoitov, Network Development,
	David Miller, Jesper Dangaard Brouer, Jakub Kicinski,
	Björn Töpel, Yonghong Song
In-Reply-To: <CAADnVQJpYeQ68V5BE2r3BhbraBh7G8dSd8zknFUJxtW4GwNkuA@mail.gmail.com>

On Fri, Jul 26, 2019 at 7:26 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Fri, Jul 26, 2019 at 9:06 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote:
> >
> > This series adds a new map type, devmap_hash, that works like the existing
> > devmap type, but using a hash-based indexing scheme. This is useful for the use
> > case where a devmap is indexed by ifindex (for instance for use with the routing
> > table lookup helper). For this use case, the regular devmap needs to be sized
> > after the maximum ifindex number, not the number of devices in it. A hash-based
> > indexing scheme makes it possible to size the map after the number of devices it
> > should contain instead.
> >
> > This was previously part of my patch series that also turned the regular
> > bpf_redirect() helper into a map-based one; for this series I just pulled out
> > the patches that introduced the new map type.
> >
> > Changelog:
> >
> > v5:
> >
> > - Dynamically set the number of hash buckets by rounding up max_entries to the
> >   nearest power of two (mirroring the regular hashmap), as suggested by Jesper.
>
> fyi I'm waiting for Jesper to review this new version.

Now applied.

Toke,
please consider adding proper selftest for it.
        fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP_HASH, sizeof(key),
sizeof(value),
                            2, 0);
        if (fd < 0) {
                printf("Failed to create devmap_hash '%s'!\n", strerror(errno));
                exit(1);
        }
        close(fd);
is not really a test.

^ permalink raw reply

* Re: ip route JSON format is unparseable for "unreachable" routes
From: David Ahern @ 2019-07-29 20:57 UTC (permalink / raw)
  To: Michael Ziegler, netdev
In-Reply-To: <6e88311b-5edc-4c62-1581-0f5b160a5f4e@michaelziegler.name>

On 7/28/19 5:09 AM, Michael Ziegler wrote:
> Hi,
> 
> I created a couple "unreachable" routes on one of my systems, like such:
> 
>> ip route add unreachable 10.0.0.0/8     metric 255
>> ip route add unreachable 192.168.0.0/16 metric 255
> 
> Unfortunately this results in unparseable JSON output from "ip":
> 
>> # ip -j route show  | jq .
>> parse error: Objects must consist of key:value pairs at line 1, column 84
> 
> The offending JSON objects are these:
> 
>> {"unreachable","dst":"10.0.0.0/8","metric":255,"flags":[]}
>> {"unreachable","dst":"192.168.0.0/16","metric":255,"flags":[]}
> "unreachable" cannot appear on its own here, it needs to be some kind of
> field.
> 
> The manpage says to report here, thus I do :) I've searched the
> archives, but I wasn't able to find any existing bug reports about this.
> I'm running version
> 

it's a problem printing the route type in general - any route not of
type 'unicast'. I will send a patch

^ permalink raw reply

* Re: [PATCH 03/12] block: bio_release_pages: use flags arg instead of bool
From: Jerome Glisse @ 2019-07-29 20:57 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: john.hubbard, Andrew Morton, Alexander Viro, Anna Schumaker,
	David S . Miller, Dominique Martinet, Eric Van Hensbergen,
	Jason Gunthorpe, Jason Wang, Jens Axboe, Latchesar Ionkov,
	Michael S . Tsirkin, Miklos Szeredi, Trond Myklebust,
	Christoph Hellwig, Matthew Wilcox, linux-mm, LKML, ceph-devel,
	kvm, linux-block, linux-cifs, linux-fsdevel, linux-nfs,
	linux-rdma, netdev, samba-technical, v9fs-developer,
	virtualization, John Hubbard, Minwoo Im
In-Reply-To: <20190724053053.GA18330@infradead.org>

On Tue, Jul 23, 2019 at 10:30:53PM -0700, Christoph Hellwig wrote:
> On Tue, Jul 23, 2019 at 09:25:09PM -0700, john.hubbard@gmail.com wrote:
> > From: John Hubbard <jhubbard@nvidia.com>
> > 
> > In commit d241a95f3514 ("block: optionally mark pages dirty in
> > bio_release_pages"), new "bool mark_dirty" argument was added to
> > bio_release_pages.
> > 
> > In upcoming work, another bool argument (to indicate that the pages came
> > from get_user_pages) is going to be added. That's one bool too many,
> > because it's not desirable have calls of the form:
> 
> All pages releases by bio_release_pages should come from
> get_get_user_pages, so I don't really see the point here.

No they do not all comes from GUP for see various callers
of bio_check_pages_dirty() for instance iomap_dio_zero()

I have carefully tracked down all this and i did not do
anyconvertion just for the fun of it :)

Cheers,
Jérôme

^ permalink raw reply

* Re: [RFC] net: phy: read link status twice when phy_check_link_status()
From: Heiner Kallweit @ 2019-07-29 20:57 UTC (permalink / raw)
  To: liuyonglong, andrew, davem
  Cc: netdev, linux-kernel, linuxarm, salil.mehta, yisen.zhuang,
	shiju.jose
In-Reply-To: <4b4ba599-f160-39e7-d611-45ac53268389@huawei.com>

On 29.07.2019 05:59, liuyonglong wrote:
> 
> 
> On 2019/7/27 2:14, Heiner Kallweit wrote:
>> On 26.07.2019 11:53, Yonglong Liu wrote:
>>> According to the datasheet of Marvell phy and Realtek phy, the
>>> copper link status should read twice, or it may get a fake link
>>> up status, and cause up->down->up at the first time when link up.
>>> This happens more oftem at Realtek phy.
>>>
>> This is not correct, there is no fake link up status.
>> Read the comment in genphy_update_link, only link-down events
>> are latched. Means if the first read returns link up, then there
>> is no need for a second read. And in polling mode we don't do a
>> second read because we want to detect also short link drops.
>>
>> It would be helpful if you could describe your actual problem
>> and whether you use polling or interrupt mode.
>>
> 
> [   44.498633] hns3 0000:bd:00.1 eth5: net open
> [   44.504273] hns3 0000:bd:00.1: reg=0x1, data=0x79ad -> called from phy_start_aneg
> [   44.532348] hns3 0000:bd:00.1: reg=0x1, data=0x798d -> called from phy_state_machine,update link.

This should not happen. The PHY indicates link up w/o having aneg finished.

> 
> According to the datasheet:
> reg 1.5=0 now, means copper auto-negotiation not complete
> reg 1.2=1 now, means link is up
> 
> We can see that, when we read the link up, the auto-negotiation
> is not complete yet, so the speed is invalid.
> 
> I don't know why this happen, maybe this state is keep from bios?
> Or we may do something else in the phy initialize to fix it?
> And also confuse that why read twice can fix it?
> 
I suppose that basically any delay would do.

> [   44.554063] hns3 0000:bd:00.1: invalid speed (-1)
> [   44.560412] hns3 0000:bd:00.1 eth5: failed to adjust link.
> [   45.194870] hns3 0000:bd:00.1 eth5: link up
> [   45.574095] hns3 0000:bd:00.1: phyid=3, reg=0x1, data=0x7989
> [   46.150051] hns3 0000:bd:00.1 eth5: link down
> [   46.598074] hns3 0000:bd:00.1: phyid=3, reg=0x1, data=0x7989
> [   47.622075] hns3 0000:bd:00.1: phyid=3, reg=0x1, data=0x79a9
> [   48.646077] hns3 0000:bd:00.1: phyid=3, reg=0x1, data=0x79ad
> [   48.934050] hns3 0000:bd:00.1 eth5: link up
> [   49.702140] hns3 0000:bd:00.1: phyid=3, reg=0x1, data=0x79ad
> 
>>> I add a fake status read, and can solve this problem.
>>>
>>> I also see that in genphy_update_link(), had delete the fake
>>> read in polling mode, so I don't know whether my solution is
>>> correct.
>>>

Can you test whether the following fixes the issue for you?
Also it would be interesting which exact PHY models you tested
and whether you built the respective PHY drivers or whether you
rely on the genphy driver. Best use the second patch to get the
needed info. It may make sense anyway to add the call to
phy_attached_info() to the hns3 driver.


diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 6b5cb87f3..fbecfe210 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1807,7 +1807,8 @@ int genphy_read_status(struct phy_device *phydev)
 
 	linkmode_zero(phydev->lp_advertising);
 
-	if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
+	if (phydev->autoneg == AUTONEG_ENABLE &&
+	    (phydev->autoneg_complete || phydev->link)) {
 		if (phydev->is_gigabit_capable) {
 			lpagb = phy_read(phydev, MII_STAT1000);
 			if (lpagb < 0)
-- 
2.22.0


diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index abb1b4385..dc4dfd460 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -231,6 +231,8 @@ int hclge_mac_connect_phy(struct hnae3_handle *handle)
 	linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
 			   phydev->advertising);
 
+	phy_attached_info(phydev);
+
 	return 0;
 }
 
-- 
2.22.0




>>> Or provide a phydev->drv->read_status functions for the phy I
>>> used is more acceptable?
>>>
>>> Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
>>> ---
>>>  drivers/net/phy/phy.c | 8 ++++++++
>>>  1 file changed, 8 insertions(+)
>>>
>>> diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
>>> index ef7aa73..0c03edc 100644
>>> --- a/drivers/net/phy/phy.c
>>> +++ b/drivers/net/phy/phy.c
>>> @@ -1,4 +1,7 @@
>>>  // SPDX-License-Identifier: GPL-2.0+
>>> +	err = phy_read_status(phydev);
>>> +	if (err)
>>> +		return err;
>>
>> This seems to be completely wrong at that place.
>>
> 
> Sorry, this can be ignore.
> 
>>>  /* Framework for configuring and reading PHY devices
>>>   * Based on code in sungem_phy.c and gianfar_phy.c
>>>   *
>>> @@ -525,6 +528,11 @@ static int phy_check_link_status(struct phy_device *phydev)
>>>  
>>>  	WARN_ON(!mutex_is_locked(&phydev->lock));
>>>  
>>> +	/* Do a fake read */
>>> +	err = phy_read(phydev, MII_BMSR);
>>> +	if (err < 0)
>>> +		return err;
>>> +
>>>  	err = phy_read_status(phydev);
>>>  	if (err)
>>>  		return err;
>>>
>>
>>
>> .
>>
> 
> 


^ permalink raw reply related

* Re: [PATCH 1/3 net-next] linux: Add skb_frag_t page_offset accessors
From: Jonathan Lemon @ 2019-07-29 21:02 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: davem, kernel-team, netdev, Matthew Wilcox
In-Reply-To: <20190729135043.0d9a9dcb@cakuba.netronome.com>



On 29 Jul 2019, at 13:50, Jakub Kicinski wrote:

> On Mon, 29 Jul 2019 10:19:39 -0700, Jonathan Lemon wrote:
>> Add skb_frag_off(), skb_frag_off_add(), skb_frag_off_set(),
>> and skb_frag_off_set_from() accessors for page_offset.
>>
>> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
>> ---
>>  include/linux/skbuff.h | 61 
>> ++++++++++++++++++++++++++++++++++++++----
>>  1 file changed, 56 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index 718742b1c505..7d94a78067ee 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -331,7 +331,7 @@ static inline void skb_frag_size_set(skb_frag_t 
>> *frag, unsigned int size)
>>  }
>>
>>  /**
>> - * skb_frag_size_add - Incrementes the size of a skb fragment by 
>> %delta
>> + * skb_frag_size_add - Increments the size of a skb fragment by 
>> %delta
>>   * @frag: skb fragment
>>   * @delta: value to add
>>   */
>> @@ -2857,6 +2857,46 @@ static inline void 
>> skb_propagate_pfmemalloc(struct page *page,
>>  		skb->pfmemalloc = true;
>>  }
>>
>> +/**
>> + * skb_frag_off - Returns the offset of a skb fragment
>> + * @frag: the paged fragment
>> + */
>> +static inline unsigned int skb_frag_off(const skb_frag_t *frag)
>> +{
>> +	return frag->page_offset;
>> +}
>> +
>> +/**
>> + * skb_frag_off_add - Increments the offset of a skb fragment by 
>> %delta
>
> I realize you're following the existing code, but should we perhaps 
> use
> the latest kdoc syntax? '()' after function name, and args should have
> '@' prefix, '%' would be for constants.

That would be a task for a different cleanup.  Not that I disagree with 
you,
but there's also nothing worse than mixing styles in the same file.



>> + * @frag: skb fragment
>> + * @delta: value to add
>> + */
>> +static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
>> +{
>> +	frag->page_offset += delta;
>> +}
>> +
>> +/**
>> + * skb_frag_off_set - Sets the offset of a skb fragment
>> + * @frag: skb fragment
>> + * @offset: offset of fragment
>> + */
>> +static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int 
>> offset)
>> +{
>> +	frag->page_offset = offset;
>> +}
>> +
>> +/**
>> + * skb_frag_off_set_from - Sets the offset of a skb fragment from 
>> another fragment
>> + * @fragto: skb fragment where offset is set
>> + * @fragfrom: skb fragment offset is copied from
>> + */
>> +static inline void skb_frag_off_set_from(skb_frag_t *fragto,
>> +					 const skb_frag_t *fragfrom)
>
> skb_frag_off_copy() ?

That was my initial inclination, but due to the often overloaded
connotations of the word "copy", opted to use the same "set" verbiage
that existed in the other functions.


>> +{
>> +	fragto->page_offset = fragfrom->page_offset;
>> +}
>> +
>>  /**
>>   * skb_frag_page - retrieve the page referred to by a paged fragment
>>   * @frag: the paged fragment
>> @@ -2923,7 +2963,7 @@ static inline void skb_frag_unref(struct 
>> sk_buff *skb, int f)
>>   */
>>  static inline void *skb_frag_address(const skb_frag_t *frag)
>>  {
>> -	return page_address(skb_frag_page(frag)) + frag->page_offset;
>> +	return page_address(skb_frag_page(frag)) + skb_frag_off(frag);
>>  }
>>
>>  /**
>> @@ -2939,7 +2979,18 @@ static inline void 
>> *skb_frag_address_safe(const skb_frag_t *frag)
>>  	if (unlikely(!ptr))
>>  		return NULL;
>>
>> -	return ptr + frag->page_offset;
>> +	return ptr + skb_frag_off(frag);
>> +}
>> +
>> +/**
>> + * skb_frag_page_set_from - sets the page in a fragment from another 
>> fragment
>
> skb_frag_page_copy() ?

Same reasoning as above.



>> + * @fragto: skb fragment where page is set
>> + * @fragfrom: skb fragment page is copied from
>> + */
>> +static inline void skb_frag_page_set_from(skb_frag_t *fragto,
>> +					  const skb_frag_t *fragfrom)
>> +{
>> +	fragto->bv_page = fragfrom->bv_page;
>>  }
>>
>>  /**

^ permalink raw reply

* Re: [PATCH] net: sctp: drop unneeded likely() call around IS_ERR()
From: David Miller @ 2019-07-29 21:02 UTC (permalink / raw)
  To: info; +Cc: linux-kernel, vyasevich, nhorman, marcelo.leitner, linux-sctp,
	netdev
In-Reply-To: <1564426521-22525-1-git-send-email-info@metux.net>

From: "Enrico Weigelt, metux IT consult" <info@metux.net>
Date: Mon, 29 Jul 2019 20:55:21 +0200

> From: Enrico Weigelt <info@metux.net>
> 
> IS_ERR() already calls unlikely(), so this extra unlikely() call
> around IS_ERR() is not needed.
> 
> Signed-off-by: Enrico Weigelt <info@metux.net>

Applied.

^ permalink raw reply

* Re: [PATCH bpf-next 05/10] selftests/bpf: add CO-RE relocs nesting tests
From: Song Liu @ 2019-07-29 21:06 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Yonghong Song, Andrii Nakryiko, Kernel Team
In-Reply-To: <20190724192742.1419254-6-andriin@fb.com>

On Wed, Jul 24, 2019 at 1:33 PM Andrii Nakryiko <andriin@fb.com> wrote:
>
> Add a bunch of test validating correct handling of nested
> structs/unions.
>
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>

Acked-by: Song Liu <songliubraving@fb.com>

^ permalink raw reply

* Re: [PATCH] net: wan: sdla: Mark expected switch fall-through
From: David Miller @ 2019-07-29 21:09 UTC (permalink / raw)
  To: gustavo; +Cc: netdev, linux-kernel, keescook
In-Reply-To: <20190729200139.GA6102@embeddedor>

From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 29 Jul 2019 15:01:39 -0500

> Mark switch cases where we are expecting to fall through.
> 
> This patch fixes the following warning (Building: i386):
> 
> drivers/net/wan/sdla.c: In function ‘sdla_errors’:
> drivers/net/wan/sdla.c:414:7: warning: this statement may fall through [-Wimplicit-fallthrough=]
>     if (cmd == SDLA_INFORMATION_WRITE)
>        ^
> drivers/net/wan/sdla.c:417:3: note: here
>    default:
>    ^~~~~~~
> 
> Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>

Applied.

^ permalink raw reply

* Re: [PATCH] net: hamradio: baycom_epp: Mark expected switch fall-through
From: David Miller @ 2019-07-29 21:09 UTC (permalink / raw)
  To: gustavo; +Cc: t.sailer, linux-hams, netdev, linux-kernel, keescook
In-Reply-To: <20190729201231.GA7576@embeddedor>

From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 29 Jul 2019 15:12:31 -0500

> Mark switch cases where we are expecting to fall through.
> 
> This patch fixes the following warning (Building: i386):
> 
> drivers/net/hamradio/baycom_epp.c: In function ‘transmit’:
> drivers/net/hamradio/baycom_epp.c:491:7: warning: this statement may fall through [-Wimplicit-fallthrough=]
>     if (i) {
>        ^
> drivers/net/hamradio/baycom_epp.c:504:3: note: here
>    default:  /* fall through */
>    ^~~~~~~
> 
> Notice that, in this particular case, the code comment is
> modified in accordance with what GCC is expecting to find.
> 
> Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>

Applied.

^ permalink raw reply

* Re: [PATCH bpf-next 07/10] selftests/bpf: add CO-RE relocs enum/ptr/func_proto tests
From: Song Liu @ 2019-07-29 21:09 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Yonghong Song, Andrii Nakryiko, Kernel Team
In-Reply-To: <20190724192742.1419254-8-andriin@fb.com>

On Wed, Jul 24, 2019 at 12:31 PM Andrii Nakryiko <andriin@fb.com> wrote:
>
> Test CO-RE relocation handling of ints, enums, pointers, func protos, etc.
>
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>

Acked-by: Song Liu <songliubraving@fb.com>

^ permalink raw reply

* Re: [PATCH bpf-next 08/10] selftests/bpf: add CO-RE relocs modifiers/typedef tests
From: Song Liu @ 2019-07-29 21:11 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Yonghong Song, Andrii Nakryiko, Kernel Team
In-Reply-To: <20190724192742.1419254-9-andriin@fb.com>

On Wed, Jul 24, 2019 at 12:30 PM Andrii Nakryiko <andriin@fb.com> wrote:
>
> Add tests validating correct handling of various combinations of
> typedefs and const/volatile/restrict modifiers.
>
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>


Acked-by: Song Liu <songliubraving@fb.com>

^ permalink raw reply

* [PATCH mlx5-next 00/11] Mellanox, mlx5-next updates 2019-07-29
From: Saeed Mahameed @ 2019-07-29 21:12 UTC (permalink / raw)
  To: Saeed Mahameed, Leon Romanovsky, netdev@vger.kernel.org,
	linux-rdma@vger.kernel.org

Hi All,

This series include misc updates form mlx5 core driver.

1) Eli improves the handling of the support for QoS element type
2) Gavi refactors and prepares mlx5 flow counters for bluk allocation
support
3) Parav, refactors and improves eswitch load/unload flows
4) Saeed, two misc cleanups

In case of no objection this series will be applied to mlx5-next branch
and sent later as pull request to both rdma-next and net-next branches.

Thanks,
Saeed.

---

Eli Cohen (1):
  net/mlx5: E-Switch, Verify support QoS element type

Gavi Teitz (2):
  net/mlx5: Refactor and optimize flow counter bulk query
  net/mlx5: Add flow counter bulk allocation hardware bits and command

Parav Pandit (6):
  net/mlx5: Make load_one() and unload_one() symmetric
  net/mlx5: E-switch, Combine metadata enable/disable functionality
  net/mlx5: E-switch, Initialize TSAR Qos hardware block before its user
    vports
  net/mlx5: E-switch, Introduce helper function to enable/disable vports
  net/mlx5: E-Switch, Remove redundant mc_promisc NULL check
  net/mlx5: E-switch, Tide up eswitch config sequence

Saeed Mahameed (2):
  net/mlx5: Fix offset of tisc bits reserved field
  net/mlx5: E-Switch, remove redundant error handling

 .../net/ethernet/mellanox/mlx5/core/eswitch.c | 184 +++++++++++-------
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |  27 ++-
 .../mellanox/mlx5/core/eswitch_offloads.c     |  56 ++----
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.c  |  71 ++-----
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.h  |  16 +-
 .../ethernet/mellanox/mlx5/core/fs_counters.c | 125 ++++++------
 .../net/ethernet/mellanox/mlx5/core/main.c    |   5 +-
 include/linux/mlx5/driver.h                   |   1 +
 include/linux/mlx5/mlx5_ifc.h                 |  30 ++-
 9 files changed, 282 insertions(+), 233 deletions(-)

-- 
2.21.0


^ permalink raw reply

* [PATCH mlx5-next 01/11] net/mlx5: Refactor and optimize flow counter bulk query
From: Saeed Mahameed @ 2019-07-29 21:12 UTC (permalink / raw)
  To: Saeed Mahameed, Leon Romanovsky, netdev@vger.kernel.org,
	linux-rdma@vger.kernel.org
  Cc: Gavi Teitz, Vlad Buslov
In-Reply-To: <20190729211209.14772-1-saeedm@mellanox.com>

From: Gavi Teitz <gavi@mellanox.com>

Towards introducing the ability to allocate bulks of flow counters,
refactor the flow counter bulk query process, removing functions and
structs whose names indicated being used for flow counter bulk
allocation FW commands, despite them actually only being used to
support bulk querying, and migrate their functionality to correctly
named functions in their natural location, fs_counters.c.

Additionally, optimize the bulk query process by:
 * Extracting the memory used for the query to mlx5_fc_stats so
   that it is only allocated once, and not for each bulk query.
 * Querying all the counters in one function call.

Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.c  |  61 ++-------
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.h  |  13 +-
 .../ethernet/mellanox/mlx5/core/fs_counters.c | 125 ++++++++++--------
 include/linux/mlx5/driver.h                   |   1 +
 4 files changed, 81 insertions(+), 119 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 7ac1249eadc3..51f6972f4c70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -615,67 +615,24 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
 	return 0;
 }
 
-struct mlx5_cmd_fc_bulk {
-	u32 id;
-	int num;
-	int outlen;
-	u32 out[0];
-};
-
-struct mlx5_cmd_fc_bulk *
-mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num)
+int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len)
 {
-	struct mlx5_cmd_fc_bulk *b;
-	int outlen =
-		MLX5_ST_SZ_BYTES(query_flow_counter_out) +
-		MLX5_ST_SZ_BYTES(traffic_counter) * num;
-
-	b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL);
-	if (!b)
-		return NULL;
-
-	b->id = id;
-	b->num = num;
-	b->outlen = outlen;
-
-	return b;
+	return MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+		MLX5_ST_SZ_BYTES(traffic_counter) * bulk_len;
 }
 
-void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b)
-{
-	kfree(b);
-}
-
-int
-mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
+int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
+			   u32 *out)
 {
+	int outlen = mlx5_cmd_fc_get_bulk_query_out_len(bulk_len);
 	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
 
 	MLX5_SET(query_flow_counter_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
 	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
-	MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
-	MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);
-	return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen);
-}
-
-void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
-			  struct mlx5_cmd_fc_bulk *b, u32 id,
-			  u64 *packets, u64 *bytes)
-{
-	int index = id - b->id;
-	void *stats;
-
-	if (index < 0 || index >= b->num) {
-		mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n",
-			       id, b->id, b->id + b->num - 1);
-		return;
-	}
-
-	stats = MLX5_ADDR_OF(query_flow_counter_out, b->out,
-			     flow_statistics[index]);
-	*packets = MLX5_GET64(traffic_counter, stats, packets);
-	*bytes = MLX5_GET64(traffic_counter, stats, octets);
+	MLX5_SET(query_flow_counter_in, in, flow_counter_id, base_id);
+	MLX5_SET(query_flow_counter_in, in, num_of_counters, bulk_len);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 
 int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index e340f9af2f5a..db49eabba98d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -82,16 +82,9 @@ int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
 int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
 		      u64 *packets, u64 *bytes);
 
-struct mlx5_cmd_fc_bulk;
-
-struct mlx5_cmd_fc_bulk *
-mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num);
-void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b);
-int
-mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b);
-void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
-			  struct mlx5_cmd_fc_bulk *b, u32 id,
-			  u64 *packets, u64 *bytes);
+int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len);
+int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
+			   u32 *out);
 
 const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index b3762123a69c..067a4b56498b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -75,7 +75,7 @@ struct mlx5_fc {
  * access to counter list:
  * - create (user context)
  *   - mlx5_fc_create() only adds to an addlist to be used by
- *     mlx5_fc_stats_query_work(). addlist is a lockless single linked list
+ *     mlx5_fc_stats_work(). addlist is a lockless single linked list
  *     that doesn't require any additional synchronization when adding single
  *     node.
  *   - spawn thread to do the actual destroy
@@ -136,72 +136,69 @@ static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev,
 	spin_unlock(&fc_stats->counters_idr_lock);
 }
 
-/* The function returns the last counter that was queried so the caller
- * function can continue calling it till all counters are queried.
- */
-static struct mlx5_fc *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
-					   struct mlx5_fc *first,
-					   u32 last_id)
+static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
 {
-	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
-	struct mlx5_fc *counter = NULL;
-	struct mlx5_cmd_fc_bulk *b;
-	bool more = false;
-	u32 afirst_id;
-	int num;
-	int err;
+	return min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
+			  (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+}
 
-	int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
-			     (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+static void update_counter_cache(int index, u32 *bulk_raw_data,
+				 struct mlx5_fc_cache *cache)
+{
+	void *stats = MLX5_ADDR_OF(query_flow_counter_out, bulk_raw_data,
+			     flow_statistics[index]);
+	u64 packets = MLX5_GET64(traffic_counter, stats, packets);
+	u64 bytes = MLX5_GET64(traffic_counter, stats, octets);
 
-	/* first id must be aligned to 4 when using bulk query */
-	afirst_id = first->id & ~0x3;
+	if (cache->packets == packets)
+		return;
 
-	/* number of counters to query inc. the last counter */
-	num = ALIGN(last_id - afirst_id + 1, 4);
-	if (num > max_bulk) {
-		num = max_bulk;
-		last_id = afirst_id + num - 1;
-	}
+	cache->packets = packets;
+	cache->bytes = bytes;
+	cache->lastuse = jiffies;
+}
 
-	b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num);
-	if (!b) {
-		mlx5_core_err(dev, "Error allocating resources for bulk query\n");
-		return NULL;
-	}
+static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
+					      struct mlx5_fc *first,
+					      u32 last_id)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	bool query_more_counters = (first->id <= last_id);
+	int max_bulk_len = get_max_bulk_query_len(dev);
+	u32 *data = fc_stats->bulk_query_out;
+	struct mlx5_fc *counter = first;
+	u32 bulk_base_id;
+	int bulk_len;
+	int err;
 
-	err = mlx5_cmd_fc_bulk_query(dev, b);
-	if (err) {
-		mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
-		goto out;
-	}
+	while (query_more_counters) {
+		/* first id must be aligned to 4 when using bulk query */
+		bulk_base_id = counter->id & ~0x3;
 
-	counter = first;
-	list_for_each_entry_from(counter, &fc_stats->counters, list) {
-		struct mlx5_fc_cache *c = &counter->cache;
-		u64 packets;
-		u64 bytes;
+		/* number of counters to query inc. the last counter */
+		bulk_len = min_t(int, max_bulk_len,
+				 ALIGN(last_id - bulk_base_id + 1, 4));
 
-		if (counter->id > last_id) {
-			more = true;
-			break;
+		err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len,
+					     data);
+		if (err) {
+			mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
+			return;
 		}
+		query_more_counters = false;
 
-		mlx5_cmd_fc_bulk_get(dev, b,
-				     counter->id, &packets, &bytes);
+		list_for_each_entry_from(counter, &fc_stats->counters, list) {
+			int counter_index = counter->id - bulk_base_id;
+			struct mlx5_fc_cache *cache = &counter->cache;
 
-		if (c->packets == packets)
-			continue;
+			if (counter->id >= bulk_base_id + bulk_len) {
+				query_more_counters = true;
+				break;
+			}
 
-		c->packets = packets;
-		c->bytes = bytes;
-		c->lastuse = jiffies;
+			update_counter_cache(counter_index, data, cache);
+		}
 	}
-
-out:
-	mlx5_cmd_fc_bulk_free(b);
-
-	return more ? counter : NULL;
 }
 
 static void mlx5_free_fc(struct mlx5_core_dev *dev,
@@ -244,8 +241,8 @@ static void mlx5_fc_stats_work(struct work_struct *work)
 
 	counter = list_first_entry(&fc_stats->counters, struct mlx5_fc,
 				   list);
-	while (counter)
-		counter = mlx5_fc_stats_query(dev, counter, last->id);
+	if (counter)
+		mlx5_fc_stats_query_counter_range(dev, counter, last->id);
 
 	fc_stats->next_query = now + fc_stats->sampling_interval;
 }
@@ -324,6 +321,8 @@ EXPORT_SYMBOL(mlx5_fc_destroy);
 int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	int max_bulk_len;
+	int max_out_len;
 
 	spin_lock_init(&fc_stats->counters_idr_lock);
 	idr_init(&fc_stats->counters_idr);
@@ -331,14 +330,24 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 	init_llist_head(&fc_stats->addlist);
 	init_llist_head(&fc_stats->dellist);
 
+	max_bulk_len = get_max_bulk_query_len(dev);
+	max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
+	fc_stats->bulk_query_out = kzalloc(max_out_len, GFP_KERNEL);
+	if (!fc_stats->bulk_query_out)
+		return -ENOMEM;
+
 	fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
 	if (!fc_stats->wq)
-		return -ENOMEM;
+		goto err_wq_create;
 
 	fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD;
 	INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
 
 	return 0;
+
+err_wq_create:
+	kfree(fc_stats->bulk_query_out);
+	return -ENOMEM;
 }
 
 void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
@@ -352,6 +361,8 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
 	destroy_workqueue(dev->priv.fc_stats.wq);
 	dev->priv.fc_stats.wq = NULL;
 
+	kfree(fc_stats->bulk_query_out);
+
 	idr_destroy(&fc_stats->counters_idr);
 
 	tmplist = llist_del_all(&fc_stats->addlist);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 0e6da1840c7d..267b2bc0ca4a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -488,6 +488,7 @@ struct mlx5_fc_stats {
 	struct delayed_work work;
 	unsigned long next_query;
 	unsigned long sampling_interval; /* jiffies */
+	u32 *bulk_query_out;
 };
 
 struct mlx5_events;
-- 
2.21.0


^ permalink raw reply related

* [PATCH mlx5-next 02/11] net/mlx5: Add flow counter bulk allocation hardware bits and command
From: Saeed Mahameed @ 2019-07-29 21:12 UTC (permalink / raw)
  To: Saeed Mahameed, Leon Romanovsky, netdev@vger.kernel.org,
	linux-rdma@vger.kernel.org
  Cc: Gavi Teitz, Vlad Buslov
In-Reply-To: <20190729211209.14772-1-saeedm@mellanox.com>

From: Gavi Teitz <gavi@mellanox.com>

Add a handle to invoke the new FW capability of allocating a bulk of
flow counters.

Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 10 ++++++++-
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.h  |  3 +++
 include/linux/mlx5/mlx5_ifc.h                 | 21 +++++++++++++++++--
 3 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 51f6972f4c70..b84a225bbe86 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -566,7 +566,9 @@ static int mlx5_cmd_delete_fte(struct mlx5_flow_root_namespace *ns,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
+int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev,
+			   enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask,
+			   u32 *id)
 {
 	u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
@@ -574,6 +576,7 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
 
 	MLX5_SET(alloc_flow_counter_in, in, opcode,
 		 MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
+	MLX5_SET(alloc_flow_counter_in, in, flow_counter_bulk, alloc_bitmask);
 
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (!err)
@@ -581,6 +584,11 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
 	return err;
 }
 
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
+{
+	return mlx5_cmd_fc_bulk_alloc(dev, 0, id);
+}
+
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id)
 {
 	u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)]   = {0};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index db49eabba98d..bc4606306009 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -78,6 +78,9 @@ struct mlx5_flow_cmds {
 };
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
+int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev,
+			   enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask,
+			   u32 *id);
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
 int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
 		      u64 *packets, u64 *bytes);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b3d5752657d9..196987f14a3f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1040,6 +1040,21 @@ enum {
 	MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1,
 };
 
+#define MLX5_FC_BULK_SIZE_FACTOR 128
+
+enum mlx5_fc_bulk_alloc_bitmask {
+	MLX5_FC_BULK_128   = (1 << 0),
+	MLX5_FC_BULK_256   = (1 << 1),
+	MLX5_FC_BULK_512   = (1 << 2),
+	MLX5_FC_BULK_1024  = (1 << 3),
+	MLX5_FC_BULK_2048  = (1 << 4),
+	MLX5_FC_BULK_4096  = (1 << 5),
+	MLX5_FC_BULK_8192  = (1 << 6),
+	MLX5_FC_BULK_16384 = (1 << 7),
+};
+
+#define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum))
+
 struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_0[0x30];
 	u8         vhca_id[0x10];
@@ -1244,7 +1259,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_2e0[0x7];
 	u8         max_qp_mcg[0x19];
 
-	u8         reserved_at_300[0x18];
+	u8         reserved_at_300[0x10];
+	u8         flow_counter_bulk_alloc[0x8];
 	u8         log_max_mcg[0x8];
 
 	u8         reserved_at_320[0x3];
@@ -7815,7 +7831,8 @@ struct mlx5_ifc_alloc_flow_counter_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_at_40[0x40];
+	u8         reserved_at_40[0x38];
+	u8         flow_counter_bulk[0x8];
 };
 
 struct mlx5_ifc_add_vxlan_udp_dport_out_bits {
-- 
2.21.0


^ permalink raw reply related

* [PATCH mlx5-next 03/11] net/mlx5: Fix offset of tisc bits reserved field
From: Saeed Mahameed @ 2019-07-29 21:12 UTC (permalink / raw)
  To: Saeed Mahameed, Leon Romanovsky, netdev@vger.kernel.org,
	linux-rdma@vger.kernel.org
In-Reply-To: <20190729211209.14772-1-saeedm@mellanox.com>

First reserved field is off by one instead of reserved_at_1 it should be
reserved_at_2, fix that.

Fixes: a12ff35e0fb7 ("net/mlx5: Introduce TLS TX offload hardware bits and structures")
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 196987f14a3f..9265c84ad353 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2782,7 +2782,7 @@ struct mlx5_ifc_traffic_counter_bits {
 struct mlx5_ifc_tisc_bits {
 	u8         strict_lag_tx_port_affinity[0x1];
 	u8         tls_en[0x1];
-	u8         reserved_at_1[0x2];
+	u8         reserved_at_2[0x2];
 	u8         lag_tx_port_affinity[0x04];
 
 	u8         reserved_at_8[0x4];
-- 
2.21.0


^ permalink raw reply related

* [PATCH mlx5-next 05/11] net/mlx5: E-Switch, Verify support QoS element type
From: Saeed Mahameed @ 2019-07-29 21:13 UTC (permalink / raw)
  To: Saeed Mahameed, Leon Romanovsky, netdev@vger.kernel.org,
	linux-rdma@vger.kernel.org
  Cc: Eli Cohen, Paul Blakey
In-Reply-To: <20190729211209.14772-1-saeedm@mellanox.com>

From: Eli Cohen <eli@mellanox.com>

Check if firmware supports the requested element type before
attempting to create the element type.
In addition, explicitly specify the request element type and tsar type.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/eswitch.c | 31 +++++++++++++++++++
 include/linux/mlx5/mlx5_ifc.h                 |  7 +++++
 2 files changed, 38 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 1f3891fde2eb..2927fa1da92f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1393,19 +1393,50 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 	return err;
 }
 
+static bool element_type_supported(struct mlx5_eswitch *esw, int type)
+{
+	struct mlx5_core_dev *dev = esw->dev = esw->dev;
+
+	switch (type) {
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_TASR;
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_VPORT;
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_VPORT_TC;
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
+	}
+	return false;
+}
+
 /* Vport QoS management */
 static int esw_create_tsar(struct mlx5_eswitch *esw)
 {
 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
 	struct mlx5_core_dev *dev = esw->dev;
+	__be32 *attr;
 	int err;
 
 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
 		return 0;
 
+	if (!element_type_supported(esw, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
+		return 0;
+
 	if (esw->qos.enabled)
 		return -EEXIST;
 
+	MLX5_SET(scheduling_context, tsar_ctx, element_type,
+		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+
+	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
+	*attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
+
 	err = mlx5_create_scheduling_element_cmd(dev,
 						 SCHEDULING_HIERARCHY_E_SWITCH,
 						 tsar_ctx,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 9265c84ad353..30d15e80bcc7 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2957,6 +2957,13 @@ enum {
 	SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3,
 };
 
+enum {
+	ELEMENT_TYPE_CAP_MASK_TASR		= 1 << 0,
+	ELEMENT_TYPE_CAP_MASK_VPORT		= 1 << 1,
+	ELEMENT_TYPE_CAP_MASK_VPORT_TC		= 1 << 2,
+	ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC	= 1 << 3,
+};
+
 struct mlx5_ifc_scheduling_context_bits {
 	u8         element_type[0x8];
 	u8         reserved_at_8[0x18];
-- 
2.21.0


^ permalink raw reply related

* [PATCH mlx5-next 04/11] net/mlx5: Make load_one() and unload_one() symmetric
From: Saeed Mahameed @ 2019-07-29 21:12 UTC (permalink / raw)
  To: Saeed Mahameed, Leon Romanovsky, netdev@vger.kernel.org,
	linux-rdma@vger.kernel.org
  Cc: Parav Pandit
In-Reply-To: <20190729211209.14772-1-saeedm@mellanox.com>

From: Parav Pandit <parav@mellanox.com>

Currently mlx5_load_one() perform device registration using
mlx5_register_device(). But mlx5_unload_one() doesn't unregister.

Make them symmetric by doing device unregistration in
mlx5_unload_one().

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b15b27a497fc..fa0e991f1983 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1217,8 +1217,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
 {
 	int err = 0;
 
-	if (cleanup)
+	if (cleanup) {
+		mlx5_unregister_device(dev);
 		mlx5_drain_health_wq(dev);
+	}
 
 	mutex_lock(&dev->intf_state_mutex);
 	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
@@ -1369,7 +1371,6 @@ static void remove_one(struct pci_dev *pdev)
 
 	mlx5_crdump_disable(dev);
 	mlx5_devlink_unregister(devlink);
-	mlx5_unregister_device(dev);
 
 	if (mlx5_unload_one(dev, true)) {
 		mlx5_core_err(dev, "mlx5_unload_one failed\n");
-- 
2.21.0


^ permalink raw reply related

* [PATCH mlx5-next 06/11] net/mlx5: E-switch, Combine metadata enable/disable functionality
From: Saeed Mahameed @ 2019-07-29 21:13 UTC (permalink / raw)
  To: Saeed Mahameed, Leon Romanovsky, netdev@vger.kernel.org,
	linux-rdma@vger.kernel.org
  Cc: Parav Pandit
In-Reply-To: <20190729211209.14772-1-saeedm@mellanox.com>

From: Parav Pandit <parav@mellanox.com>

Except bit toggling code, rest of the code is same to enable/disable
metadata passing functionality.
Hence, combine them to single function and control using enable flag.

Also instead of checking metadata supported at multiple places,
fold into the helper function.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/eswitch_offloads.c     | 48 +++++--------------
 1 file changed, 12 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 089ae4d48a82..4be19890f725 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -587,38 +587,15 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
 	mlx5_del_flow_rules(rule);
 }
 
-static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw)
+static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
 {
 	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
 	u8 fdb_to_vport_reg_c_id;
 	int err;
 
-	err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
-						   out, sizeof(out));
-	if (err)
-		return err;
-
-	fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
-					 esw_vport_context.fdb_to_vport_reg_c_id);
-
-	fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
-	MLX5_SET(modify_esw_vport_context_in, in,
-		 esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
-
-	MLX5_SET(modify_esw_vport_context_in, in,
-		 field_select.fdb_to_vport_reg_c_id, 1);
-
-	return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
-						     in, sizeof(in));
-}
-
-static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw)
-{
-	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
-	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
-	u8 fdb_to_vport_reg_c_id;
-	int err;
+	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+		return 0;
 
 	err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
 						   out, sizeof(out));
@@ -628,7 +605,10 @@ static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw)
 	fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
 					 esw_vport_context.fdb_to_vport_reg_c_id);
 
-	fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
+	if (enable)
+		fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
+	else
+		fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
 
 	MLX5_SET(modify_esw_vport_context_in, in,
 		 esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
@@ -2138,11 +2118,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw)
 	if (err)
 		return err;
 
-	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
-		err = mlx5_eswitch_enable_passing_vport_metadata(esw);
-		if (err)
-			goto err_vport_metadata;
-	}
+	err = esw_set_passing_vport_metadata(esw, true);
+	if (err)
+		goto err_vport_metadata;
 
 	err = esw_offloads_load_all_reps(esw);
 	if (err)
@@ -2156,8 +2134,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw)
 	return 0;
 
 err_reps:
-	if (mlx5_eswitch_vport_match_metadata_enabled(esw))
-		mlx5_eswitch_disable_passing_vport_metadata(esw);
+	esw_set_passing_vport_metadata(esw, false);
 err_vport_metadata:
 	esw_offloads_steering_cleanup(esw);
 	return err;
@@ -2187,8 +2164,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw)
 	mlx5_rdma_disable_roce(esw->dev);
 	esw_offloads_devcom_cleanup(esw);
 	esw_offloads_unload_all_reps(esw);
-	if (mlx5_eswitch_vport_match_metadata_enabled(esw))
-		mlx5_eswitch_disable_passing_vport_metadata(esw);
+	esw_set_passing_vport_metadata(esw, false);
 	esw_offloads_steering_cleanup(esw);
 	esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 }
-- 
2.21.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox