Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [net-next 0/2] tipc: link changeover issues
From: David Miller @ 2019-07-12 17:45 UTC (permalink / raw)
  To: tuong.t.lien; +Cc: jon.maloy, maloy, ying.xue, netdev, tipc-discussion
In-Reply-To: <20190712051537.10826-1-tuong.t.lien@dektech.com.au>


net-next is closed right now, resubmit this when the tree opens back up.

^ permalink raw reply

* [PATCH v2 bpf-next] selftests/bpf: remove logic duplication in test_verifier.c
From: Andrii Nakryiko @ 2019-07-12 17:44 UTC (permalink / raw)
  To: bpf, netdev, ast, daniel
  Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko, Krzesimir Nowak

test_verifier tests can specify single- and multi-runs tests. Internally
logic of handling them is duplicated. Get rid of it by making single run
retval/data specification to be a first run spec.

Cc: Krzesimir Nowak <krzesimir@kinvolk.io>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/testing/selftests/bpf/test_verifier.c | 35 +++++++++------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index b0773291012a..84135d5f4b35 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -86,7 +86,7 @@ struct bpf_test {
 	int fixup_sk_storage_map[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
-	uint32_t retval, retval_unpriv, insn_processed;
+	uint32_t insn_processed;
 	int prog_len;
 	enum {
 		UNDEF,
@@ -95,16 +95,20 @@ struct bpf_test {
 	} result, result_unpriv;
 	enum bpf_prog_type prog_type;
 	uint8_t flags;
-	__u8 data[TEST_DATA_LEN];
 	void (*fill_helper)(struct bpf_test *self);
 	uint8_t runs;
-	struct {
-		uint32_t retval, retval_unpriv;
-		union {
-			__u8 data[TEST_DATA_LEN];
-			__u64 data64[TEST_DATA_LEN / 8];
-		};
-	} retvals[MAX_TEST_RUNS];
+#define bpf_testdata_struct_t					\
+	struct {						\
+		uint32_t retval, retval_unpriv;			\
+		union {						\
+			__u8 data[TEST_DATA_LEN];		\
+			__u64 data64[TEST_DATA_LEN / 8];	\
+		};						\
+	}
+	union {
+		bpf_testdata_struct_t;
+		bpf_testdata_struct_t retvals[MAX_TEST_RUNS];
+	};
 	enum bpf_attach_type expected_attach_type;
 };
 
@@ -949,17 +953,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		uint32_t expected_val;
 		int i;
 
-		if (!test->runs) {
-			expected_val = unpriv && test->retval_unpriv ?
-				test->retval_unpriv : test->retval;
-
-			err = do_prog_test_run(fd_prog, unpriv, expected_val,
-					       test->data, sizeof(test->data));
-			if (err)
-				run_errs++;
-			else
-				run_successes++;
-		}
+		if (!test->runs)
+			test->runs = 1;
 
 		for (i = 0; i < test->runs; i++) {
 			if (unpriv && test->retvals[i].retval_unpriv)
-- 
2.17.1


^ permalink raw reply related

* Re: [bpf-next v3 11/12] selftests/bpf: Add tests for bpf_prog_test_run for perf events progs
From: Krzesimir Nowak @ 2019-07-12 17:37 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: open list, Alban Crequy, Iago López Galeiras,
	Alexei Starovoitov, Daniel Borkmann, Martin KaFai Lau, Song Liu,
	Yonghong Song, David S. Miller, Jakub Kicinski,
	Jesper Dangaard Brouer, John Fastabend, Stanislav Fomichev,
	Networking, bpf, xdp-newbies
In-Reply-To: <CAEf4BzYaV=AxYZna225qKzyWPteU4YFPiBRE4cO30tYmyN_pJQ@mail.gmail.com>

On Fri, Jul 12, 2019 at 2:38 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Jul 8, 2019 at 3:42 PM Krzesimir Nowak <krzesimir@kinvolk.io> wrote:
> >
> > The tests check if ctx and data are correctly prepared from ctx_in and
> > data_in, so accessing the ctx and using the bpf_perf_prog_read_value
> > work as expected.
> >
>
> These are x86_64-specific tests, aren't they? Should probably guard
> them behind #ifdef's.

Yeah, they are x86_64 specific, because pt_regs are arch specific. I
was wondering what to do here in the cover letter. Ifdef? Ifdef and
cover also other arches (please no)? Do some weird tricks with
overriding the definition of pt_regs? Else?

>
> > Signed-off-by: Krzesimir Nowak <krzesimir@kinvolk.io>
> > ---
> >  tools/testing/selftests/bpf/test_verifier.c   | 48 ++++++++++
> >  .../selftests/bpf/verifier/perf_event_run.c   | 96 +++++++++++++++++++
> >  2 files changed, 144 insertions(+)
> >  create mode 100644 tools/testing/selftests/bpf/verifier/perf_event_run.c
> >
> > diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
> > index 6f124cc4ee34..484ea8842b06 100644
> > --- a/tools/testing/selftests/bpf/test_verifier.c
> > +++ b/tools/testing/selftests/bpf/test_verifier.c
> > @@ -295,6 +295,54 @@ static void bpf_fill_scale(struct bpf_test *self)
> >         }
> >  }
> >
> > +static void bpf_fill_perf_event_test_run_check(struct bpf_test *self)
> > +{
> > +       compiletime_assert(
> > +               sizeof(struct bpf_perf_event_data) <= TEST_CTX_LEN,
> > +               "buffer for ctx is too short to fit struct bpf_perf_event_data");
> > +       compiletime_assert(
> > +               sizeof(struct bpf_perf_event_value) <= TEST_DATA_LEN,
> > +               "buffer for data is too short to fit struct bpf_perf_event_value");
> > +
> > +       struct bpf_perf_event_data ctx = {
> > +               .regs = (bpf_user_pt_regs_t) {
> > +                       .r15 = 1,
> > +                       .r14 = 2,
> > +                       .r13 = 3,
> > +                       .r12 = 4,
> > +                       .rbp = 5,
> > +                       .rbx = 6,
> > +                       .r11 = 7,
> > +                       .r10 = 8,
> > +                       .r9 = 9,
> > +                       .r8 = 10,
> > +                       .rax = 11,
> > +                       .rcx = 12,
> > +                       .rdx = 13,
> > +                       .rsi = 14,
> > +                       .rdi = 15,
> > +                       .orig_rax = 16,
> > +                       .rip = 17,
> > +                       .cs = 18,
> > +                       .eflags = 19,
> > +                       .rsp = 20,
> > +                       .ss = 21,
> > +               },
> > +               .sample_period = 1,
> > +               .addr = 2,
> > +       };
> > +       struct bpf_perf_event_value data = {
> > +               .counter = 1,
> > +               .enabled = 2,
> > +               .running = 3,
> > +       };
> > +
> > +       memcpy(self->ctx, &ctx, sizeof(ctx));
> > +       memcpy(self->data, &data, sizeof(data));
>
> Just curious, just assignment didn't work?
>
> > +       free(self->fill_insns);
> > +       self->fill_insns = NULL;
> > +}
> > +
> >  /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
> >  #define BPF_SK_LOOKUP(func)                                            \
> >         /* struct bpf_sock_tuple tuple = {} */                          \
> > diff --git a/tools/testing/selftests/bpf/verifier/perf_event_run.c b/tools/testing/selftests/bpf/verifier/perf_event_run.c
> > new file mode 100644
> > index 000000000000..3f877458a7f8
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/verifier/perf_event_run.c
> > @@ -0,0 +1,96 @@
> > +#define PER_LOAD_AND_CHECK_PTREG(PT_REG_FIELD, VALUE)                  \
> > +       PER_LOAD_AND_CHECK_CTX(offsetof(bpf_user_pt_regs_t, PT_REG_FIELD), VALUE)
> > +#define PER_LOAD_AND_CHECK_EVENT(PED_FIELD, VALUE)                     \
> > +       PER_LOAD_AND_CHECK_CTX(offsetof(struct bpf_perf_event_data, PED_FIELD), VALUE)
> > +#define PER_LOAD_AND_CHECK_CTX(OFFSET, VALUE)                          \
> > +       PER_LOAD_AND_CHECK_64(BPF_REG_4, BPF_REG_1, OFFSET, VALUE)
> > +#define PER_LOAD_AND_CHECK_VALUE(PEV_FIELD, VALUE)                     \
> > +       PER_LOAD_AND_CHECK_64(BPF_REG_7, BPF_REG_6, offsetof(struct bpf_perf_event_value, PEV_FIELD), VALUE)
>
> Wrap long lines? Try also running scripts/checkpatch.pl again these
> files you are modifying.

Will wrap. Checkpatch was also complaining about complex macro not
being inside parens, but I can't see how to wrap it in parens and keep
it working at the same time.

>
> > +#define PER_LOAD_AND_CHECK_64(DST, SRC, OFFSET, VALUE)                 \
> > +       BPF_LDX_MEM(BPF_DW, DST, SRC, OFFSET),                          \
> > +       BPF_JMP_IMM(BPF_JEQ, DST, VALUE, 2),                            \
> > +       BPF_MOV64_IMM(BPF_REG_0, VALUE),                                \
> > +       BPF_EXIT_INSN()
> > +
> > +{
> > +       "check if regs contain expected values",
> > +       .insns = {
> > +       PER_LOAD_AND_CHECK_PTREG(r15, 1),
> > +       PER_LOAD_AND_CHECK_PTREG(r14, 2),
> > +       PER_LOAD_AND_CHECK_PTREG(r13, 3),
> > +       PER_LOAD_AND_CHECK_PTREG(r12, 4),
> > +       PER_LOAD_AND_CHECK_PTREG(rbp, 5),
> > +       PER_LOAD_AND_CHECK_PTREG(rbx, 6),
> > +       PER_LOAD_AND_CHECK_PTREG(r11, 7),
> > +       PER_LOAD_AND_CHECK_PTREG(r10, 8),
> > +       PER_LOAD_AND_CHECK_PTREG(r9, 9),
> > +       PER_LOAD_AND_CHECK_PTREG(r8, 10),
> > +       PER_LOAD_AND_CHECK_PTREG(rax, 11),
> > +       PER_LOAD_AND_CHECK_PTREG(rcx, 12),
> > +       PER_LOAD_AND_CHECK_PTREG(rdx, 13),
> > +       PER_LOAD_AND_CHECK_PTREG(rsi, 14),
> > +       PER_LOAD_AND_CHECK_PTREG(rdi, 15),
> > +       PER_LOAD_AND_CHECK_PTREG(orig_rax, 16),
> > +       PER_LOAD_AND_CHECK_PTREG(rip, 17),
> > +       PER_LOAD_AND_CHECK_PTREG(cs, 18),
> > +       PER_LOAD_AND_CHECK_PTREG(eflags, 19),
> > +       PER_LOAD_AND_CHECK_PTREG(rsp, 20),
> > +       PER_LOAD_AND_CHECK_PTREG(ss, 21),
> > +       BPF_MOV64_IMM(BPF_REG_0, 0),
> > +       BPF_EXIT_INSN(),
> > +       },
> > +       .result = ACCEPT,
> > +       .prog_type = BPF_PROG_TYPE_PERF_EVENT,
> > +       .ctx_len = sizeof(struct bpf_perf_event_data),
> > +       .data_len = sizeof(struct bpf_perf_event_value),
> > +       .fill_helper = bpf_fill_perf_event_test_run_check,
> > +       .override_data_out_len = true,
> > +},
> > +{
> > +       "check if sample period and addr contain expected values",
> > +       .insns = {
> > +       PER_LOAD_AND_CHECK_EVENT(sample_period, 1),
> > +       PER_LOAD_AND_CHECK_EVENT(addr, 2),
> > +       BPF_MOV64_IMM(BPF_REG_0, 0),
> > +       BPF_EXIT_INSN(),
> > +       },
> > +       .result = ACCEPT,
> > +       .prog_type = BPF_PROG_TYPE_PERF_EVENT,
> > +       .ctx_len = sizeof(struct bpf_perf_event_data),
> > +       .data_len = sizeof(struct bpf_perf_event_value),
> > +       .fill_helper = bpf_fill_perf_event_test_run_check,
> > +       .override_data_out_len = true,
> > +},
> > +{
> > +       "check if bpf_perf_prog_read_value returns expected data",
> > +       .insns = {
> > +       // allocate space for a struct bpf_perf_event_value
> > +       BPF_MOV64_REG(BPF_REG_6, BPF_REG_10),
> > +       BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -(int)sizeof(struct bpf_perf_event_value)),
> > +       // prepare parameters for bpf_perf_prog_read_value(ctx, struct bpf_perf_event_value*, u32)
> > +       // BPF_REG_1 already contains the context
> > +       BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
> > +       BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_perf_event_value)),
> > +       BPF_EMIT_CALL(BPF_FUNC_perf_prog_read_value),
> > +       // check the return value
> > +       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
> > +       BPF_EXIT_INSN(),
> > +       // check if the fields match the expected values
>
> Use /* */ comments.

Oops. Will fix.

>
> > +       PER_LOAD_AND_CHECK_VALUE(counter, 1),
> > +       PER_LOAD_AND_CHECK_VALUE(enabled, 2),
> > +       PER_LOAD_AND_CHECK_VALUE(running, 3),
> > +       BPF_MOV64_IMM(BPF_REG_0, 0),
> > +       BPF_EXIT_INSN(),
> > +       },
> > +       .result = ACCEPT,
> > +       .prog_type = BPF_PROG_TYPE_PERF_EVENT,
> > +       .ctx_len = sizeof(struct bpf_perf_event_data),
> > +       .data_len = sizeof(struct bpf_perf_event_value),
> > +       .fill_helper = bpf_fill_perf_event_test_run_check,
> > +       .override_data_out_len = true,
> > +},
> > +#undef PER_LOAD_AND_CHECK_64
> > +#undef PER_LOAD_AND_CHECK_VALUE
> > +#undef PER_LOAD_AND_CHECK_CTX
> > +#undef PER_LOAD_AND_CHECK_EVENT
> > +#undef PER_LOAD_AND_CHECK_PTREG
> > --
> > 2.20.1
> >



--
Kinvolk GmbH | Adalbertstr.6a, 10999 Berlin | tel: +491755589364
Geschäftsführer/Directors: Alban Crequy, Chris Kühl, Iago López Galeiras
Registergericht/Court of registration: Amtsgericht Charlottenburg
Registernummer/Registration number: HRB 171414 B
Ust-ID-Nummer/VAT ID number: DE302207000

^ permalink raw reply

* Re: [bpf-next v3 02/12] selftests/bpf: Avoid a clobbering of errno
From: Krzesimir Nowak @ 2019-07-12 17:31 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: open list, Alban Crequy, Iago López Galeiras,
	Alexei Starovoitov, Daniel Borkmann, Martin KaFai Lau, Song Liu,
	Yonghong Song, David S. Miller, Jakub Kicinski,
	Jesper Dangaard Brouer, John Fastabend, Stanislav Fomichev,
	Networking, bpf, xdp-newbies
In-Reply-To: <CAEf4Bzb-KW+p1zFcz39OSUuH0=DLFRNLa3NYT4V_-zz0Q_TJ5g@mail.gmail.com>

On Fri, Jul 12, 2019 at 2:59 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Thu, Jul 11, 2019 at 5:04 AM Krzesimir Nowak <krzesimir@kinvolk.io> wrote:
> >
> > On Thu, Jul 11, 2019 at 1:52 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Mon, Jul 8, 2019 at 3:42 PM Krzesimir Nowak <krzesimir@kinvolk.io> wrote:
> > > >
> > > > Save errno right after bpf_prog_test_run returns, so we later check
> > > > the error code actually set by bpf_prog_test_run, not by some libcap
> > > > function.
> > > >
> > > > Changes since v1:
> > > > - Fix the "Fixes:" tag to mention actual commit that introduced the
> > > >   bug
> > > >
> > > > Changes since v2:
> > > > - Move the declaration so it fits the reverse christmas tree style.
> > > >
> > > > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > > > Fixes: 832c6f2c29ec ("bpf: test make sure to run unpriv test cases in test_verifier")
> > > > Signed-off-by: Krzesimir Nowak <krzesimir@kinvolk.io>
> > > > ---
> > > >  tools/testing/selftests/bpf/test_verifier.c | 4 +++-
> > > >  1 file changed, 3 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
> > > > index b8d065623ead..3fe126e0083b 100644
> > > > --- a/tools/testing/selftests/bpf/test_verifier.c
> > > > +++ b/tools/testing/selftests/bpf/test_verifier.c
> > > > @@ -823,16 +823,18 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
> > > >         __u8 tmp[TEST_DATA_LEN << 2];
> > > >         __u32 size_tmp = sizeof(tmp);
> > > >         uint32_t retval;
> > > > +       int saved_errno;
> > > >         int err;
> > > >
> > > >         if (unpriv)
> > > >                 set_admin(true);
> > > >         err = bpf_prog_test_run(fd_prog, 1, data, size_data,
> > > >                                 tmp, &size_tmp, &retval, NULL);
> > >
> > > Given err is either 0 or -1, how about instead making err useful right
> > > here without extra variable?
> > >
> > > if (bpf_prog_test_run(...))
> > >         err = errno;
> >
> > I change it later to bpf_prog_test_run_xattr, which can also return
> > -EINVAL and then errno is not set. But this one probably should not be
>
> This is wrong. bpf_prog_test_run/bpf_prog_test_run_xattr should either
> always return -1 and set errno to actual error (like syscalls do), or
> always use return code with proper error. Give they are pretending to
> be just pure syscall, it's probably better to set errno to EINVAL and
> return -1 on invalid input args?

Yeah, this is inconsistent at best. But seems to be kind of expected?
See tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c.

>
> > triggered by the test code. So not sure, probably would be better to
> > keep it as is for consistency?
> >
> > >
> > > > +       saved_errno = errno;
> > > >         if (unpriv)
> > > >                 set_admin(false);
> > > >         if (err) {
> > > > -               switch (errno) {
> > > > +               switch (saved_errno) {
> > > >                 case 524/*ENOTSUPP*/:
> > >
> > > ENOTSUPP is defined in include/linux/errno.h, is there any problem
> > > with using this in selftests?
> >
> > I just used whatever there was earlier. Seems like <linux/errno.h> is
> > not copied to tools include directory.
>
> Ok, let's leave it as is, thanks!
>
> >
> > >
> > > >                         printf("Did not run the program (not supported) ");
> > > >                         return 0;
> > > > --
> > > > 2.20.1
> > > >
> >
> >
> >
> > --
> > Kinvolk GmbH | Adalbertstr.6a, 10999 Berlin | tel: +491755589364
> > Geschäftsführer/Directors: Alban Crequy, Chris Kühl, Iago López Galeiras
> > Registergericht/Court of registration: Amtsgericht Charlottenburg
> > Registernummer/Registration number: HRB 171414 B
> > Ust-ID-Nummer/VAT ID number: DE302207000



-- 
Kinvolk GmbH | Adalbertstr.6a, 10999 Berlin | tel: +491755589364
Geschäftsführer/Directors: Alban Crequy, Chris Kühl, Iago López Galeiras
Registergericht/Court of registration: Amtsgericht Charlottenburg
Registernummer/Registration number: HRB 171414 B
Ust-ID-Nummer/VAT ID number: DE302207000

^ permalink raw reply

* Re: [PATCH v4 bpf-next 1/4] selftests/bpf: compile progs with -D__TARGET_ARCH_$(SRCARCH)
From: Andrii Nakryiko @ 2019-07-12 17:28 UTC (permalink / raw)
  To: Ilya Leoshkevich
  Cc: bpf, Networking, Y Song, Daniel Borkmann, Stanislav Fomichev,
	David S. Miller, Alexei Starovoitov
In-Reply-To: <6E5C9DDE-FF1D-4BFA-813E-7A0C3232B5F0@linux.ibm.com>

On Fri, Jul 12, 2019 at 2:00 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> > Am 12.07.2019 um 02:53 schrieb Andrii Nakryiko <andrii.nakryiko@gmail.com>:
> >
> > On Thu, Jul 11, 2019 at 7:32 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
> >>
> >> This opens up the possibility of accessing registers in an
> >> arch-independent way.
> >>
> >> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> >> ---
> >> tools/testing/selftests/bpf/Makefile | 4 +++-
> >> 1 file changed, 3 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
> >> index 2620406a53ec..ad84450e4ab8 100644
> >> --- a/tools/testing/selftests/bpf/Makefile
> >> +++ b/tools/testing/selftests/bpf/Makefile
> >> @@ -1,4 +1,5 @@
> >> # SPDX-License-Identifier: GPL-2.0
> >> +include ../../../scripts/Makefile.arch
> >>
> >> LIBDIR := ../../../lib
> >> BPFDIR := $(LIBDIR)/bpf
> >> @@ -138,7 +139,8 @@ CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
> >>
> >> CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
> >>              $(CLANG_SYS_INCLUDES) \
> >> -             -Wno-compare-distinct-pointer-types
> >> +             -Wno-compare-distinct-pointer-types \
> >> +             -D__TARGET_ARCH_$(SRCARCH)
> >
> > samples/bpf/Makefile uses $(ARCH), why does it work for samples?
> > Should we update samples/bpf/Makefile as well?
>
> I believe that in common cases both are okay, but judging by
> linux:Makefile and linux:tools/scripts/Makefile.arch, one could use e.g.
> ARCH=i686, and that would be converted to SRCARCH=x86. So IMHO SRCARCH
> is safer, and we should change bpf/samples/Makefile. I could send a
> patch separately.

Yeah, let's do that then, thanks!

^ permalink raw reply

* Re: [PATCH v2 6/7] dt-bindings: net: realtek: Add property to configure LED mode
From: Matthias Kaehlcke @ 2019-07-12 17:28 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: Rob Herring, Andrew Lunn, Heiner Kallweit, David S . Miller,
	Mark Rutland, netdev, devicetree, linux-kernel@vger.kernel.org,
	Douglas Anderson
In-Reply-To: <7d102d81-750d-32d9-a554-95f018e69f2f@gmail.com>

Hi Florian,

On Wed, Jul 10, 2019 at 09:28:39AM -0700, Florian Fainelli wrote:
> On 7/10/19 8:55 AM, Rob Herring wrote:
> > On Wed, Jul 3, 2019 at 5:23 PM Matthias Kaehlcke <mka@chromium.org> wrote:
> >>
> >> Hi Florian,
> >>
> >> On Wed, Jul 03, 2019 at 02:37:47PM -0700, Florian Fainelli wrote:
> >>> On 7/3/19 12:37 PM, Matthias Kaehlcke wrote:
> >>>> The LED behavior of some Realtek PHYs is configurable. Add the
> >>>> property 'realtek,led-modes' to specify the configuration of the
> >>>> LEDs.
> >>>>
> >>>> Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> >>>> ---
> >>>> Changes in v2:
> >>>> - patch added to the series
> >>>> ---
> >>>>  .../devicetree/bindings/net/realtek.txt         |  9 +++++++++
> >>>>  include/dt-bindings/net/realtek.h               | 17 +++++++++++++++++
> >>>>  2 files changed, 26 insertions(+)
> >>>>  create mode 100644 include/dt-bindings/net/realtek.h
> >>>>
> >>>> diff --git a/Documentation/devicetree/bindings/net/realtek.txt b/Documentation/devicetree/bindings/net/realtek.txt
> >>>> index 71d386c78269..40b0d6f9ee21 100644
> >>>> --- a/Documentation/devicetree/bindings/net/realtek.txt
> >>>> +++ b/Documentation/devicetree/bindings/net/realtek.txt
> >>>> @@ -9,6 +9,12 @@ Optional properties:
> >>>>
> >>>>     SSC is only available on some Realtek PHYs (e.g. RTL8211E).
> >>>>
> >>>> +- realtek,led-modes: LED mode configuration.
> >>>> +
> >>>> +   A 0..3 element vector, with each element configuring the operating
> >>>> +   mode of an LED. Omitted LEDs are turned off. Allowed values are
> >>>> +   defined in "include/dt-bindings/net/realtek.h".
> >>>
> >>> This should probably be made more general and we should define LED modes
> >>> that makes sense regardless of the PHY device, introduce a set of
> >>> generic functions for validating and then add new function pointer for
> >>> setting the LED configuration to the PHY driver. This would allow to be
> >>> more future proof where each PHY driver could expose standard LEDs class
> >>> devices to user-space, and it would also allow facilities like: ethtool
> >>> -p to plug into that.
> >>>
> >>> Right now, each driver invents its own way of configuring LEDs, that
> >>> does not scale, and there is not really a good reason for that other
> >>> than reviewing drivers in isolation and therefore making it harder to
> >>> extract the commonality. Yes, I realize that since you are the latest
> >>> person submitting something in that area, you are being selected :)
> > 
> > I agree.
> > 
> >> I see the merit of your proposal to come up with a generic mechanism
> >> to configure Ethernet LEDs, however I can't justify spending much of
> >> my work time on this. If it is deemed useful I'm happy to send another
> >> version of the current patchset that addresses the reviewer's comments,
> >> but if the implementation of a generic LED configuration interface is
> >> a requirement I will have to abandon at least the LED configuration
> >> part of this series.
> > 
> > Can you at least define a common binding for this. Maybe that's just
> > removing 'realtek'. While the kernel side can evolve to a common
> > infrastructure, the DT bindings can't.
> 
> That would be a great start, and that is actually what I had in mind
> (should have been more specific), I was not going to have you Matthias
> do the grand slam and convert all this LED configuration into the LEDs
> class etc. that would not be fair.
> 
> It seems to be that we can fairly easily agree on a common binding for
> LED configuration, I would define something along those lines to be
> flexible:
> 
> phy-led-configuration = <LED_NUM_MASK LED_CFG_MASK>;
> 
> where LED_NUM_MASK is one of:
> 
> 0 -> link
> 1 -> activity
> 2 -> speed

I don't understand this proposal completely. Is LED_NUM_MASK actually
a mask/set (potentially containing multiple LEDs) or is it "one of"
the LEDs?

Are you suggesting to assign each LED a specific role (link, activity,
speed)?

Could you maybe post a specific example involving multiple LEDs?

Thanks

Matthias

> that way you can define single/dual/triple LED configurations by
> updating the bitmask.
> 
> LED_CFG_MASK is one of:
> 
> 0 -> LED_CFG_10
> 1 -> LED_CFG_100
> 2 -> LED_CFG_1000
> 
> (let's assume 1Gbps or less for now)
> 
> or this can be combined in a single cell with a left shift.
> 
> Andrew, Heiner, do you see that approach working correctly and scaling
> appropriately?

^ permalink raw reply

* [PATCH v3 bpf 3/3] selftests/bpf: use typedef'ed arrays as map values
From: Andrii Nakryiko @ 2019-07-12 17:25 UTC (permalink / raw)
  To: bpf, netdev, ast, daniel, yhs
  Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190712172557.4039121-1-andriin@fb.com>

Convert few tests that couldn't use typedef'ed arrays due to kernel bug.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c     | 3 ++-
 tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c | 3 +--
 tools/testing/selftests/bpf/progs/test_stacktrace_map.c      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
index d06b47a09097..33254b771384 100644
--- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
@@ -47,11 +47,12 @@ struct {
  * issue and avoid complicated C programming massaging.
  * This is an acceptable workaround since there is one entry here.
  */
+typedef __u64 raw_stack_trace_t[2 * MAX_STACK_RAWTP];
 struct {
 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 	__uint(max_entries, 1);
 	__type(key, __u32);
-	__u64 (*value)[2 * MAX_STACK_RAWTP];
+	__type(value, raw_stack_trace_t);
 } rawdata_map SEC(".maps");
 
 SEC("tracepoint/raw_syscalls/sys_enter")
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
index bbfc8337b6f0..f5638e26865d 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
@@ -36,8 +36,7 @@ struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__uint(max_entries, 128);
 	__type(key, __u32);
-	/* there seems to be a bug in kernel not handling typedef properly */
-	struct bpf_stack_build_id (*value)[PERF_MAX_STACK_DEPTH];
+	__type(value, stack_trace_t);
 } stack_amap SEC(".maps");
 
 /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
index 803c15dc109d..fa0be3e10a10 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
@@ -35,7 +35,7 @@ struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__uint(max_entries, 16384);
 	__type(key, __u32);
-	__u64 (*value)[PERF_MAX_STACK_DEPTH];
+	__type(value, stack_trace_t);
 } stack_amap SEC(".maps");
 
 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
-- 
2.17.1


^ permalink raw reply related

* [PATCH v3 bpf 1/3] bpf: fix BTF verifier size resolution logic
From: Andrii Nakryiko @ 2019-07-12 17:25 UTC (permalink / raw)
  To: bpf, netdev, ast, daniel, yhs
  Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko, Martin KaFai Lau
In-Reply-To: <20190712172557.4039121-1-andriin@fb.com>

BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier.  This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).

Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.

This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:

typedef int array_t[16];

struct {
    __uint(type, BPF_MAP_TYPE_ARRAY);
    __type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");

The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.

Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 kernel/bpf/btf.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 546ebee39e2a..5fcc7a17eb5a 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1073,11 +1073,18 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
 				 !btf_type_is_var(size_type)))
 			return NULL;

-		size = btf->resolved_sizes[size_type_id];
 		size_type_id = btf->resolved_ids[size_type_id];
 		size_type = btf_type_by_id(btf, size_type_id);
 		if (btf_type_nosize_or_null(size_type))
 			return NULL;
+		else if (btf_type_has_size(size_type))
+			size = size_type->size;
+		else if (btf_type_is_array(size_type))
+			size = btf->resolved_sizes[size_type_id];
+		else if (btf_type_is_ptr(size_type))
+			size = sizeof(void *);
+		else
+			return NULL;
 	}

 	*type_id = size_type_id;
@@ -1602,7 +1609,6 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
 	const struct btf_type *next_type;
 	u32 next_type_id = t->type;
 	struct btf *btf = env->btf;
-	u32 next_type_size = 0;

 	next_type = btf_type_by_id(btf, next_type_id);
 	if (!next_type || btf_type_is_resolve_source_only(next_type)) {
@@ -1620,7 +1626,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
 	 * save us a few type-following when we use it later (e.g. in
 	 * pretty print).
 	 */
-	if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
+	if (!btf_type_id_size(btf, &next_type_id, NULL)) {
 		if (env_type_is_resolved(env, next_type_id))
 			next_type = btf_type_id_resolve(btf, &next_type_id);

@@ -1633,7 +1639,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
 		}
 	}

-	env_stack_pop_resolved(env, next_type_id, next_type_size);
+	env_stack_pop_resolved(env, next_type_id, 0);

 	return 0;
 }
@@ -1645,7 +1651,6 @@ static int btf_var_resolve(struct btf_verifier_env *env,
 	const struct btf_type *t = v->t;
 	u32 next_type_id = t->type;
 	struct btf *btf = env->btf;
-	u32 next_type_size;

 	next_type = btf_type_by_id(btf, next_type_id);
 	if (!next_type || btf_type_is_resolve_source_only(next_type)) {
@@ -1675,12 +1680,12 @@ static int btf_var_resolve(struct btf_verifier_env *env,
 	 * forward types or similar that would resolve to size of
 	 * zero is allowed.
 	 */
-	if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
+	if (!btf_type_id_size(btf, &next_type_id, NULL)) {
 		btf_verifier_log_type(env, v->t, "Invalid type_id");
 		return -EINVAL;
 	}

-	env_stack_pop_resolved(env, next_type_id, next_type_size);
+	env_stack_pop_resolved(env, next_type_id, 0);

 	return 0;
 }
-- 
2.17.1

^ permalink raw reply related

* [PATCH v3 bpf 2/3] selftests/bpf: add trickier size resolution tests
From: Andrii Nakryiko @ 2019-07-12 17:25 UTC (permalink / raw)
  To: bpf, netdev, ast, daniel, yhs
  Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190712172557.4039121-1-andriin@fb.com>

Add more BTF tests, validating that size resolution logic is correct in
few trickier cases.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/testing/selftests/bpf/test_btf.c | 88 ++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 8351cb5f4a20..3d617e806054 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -3417,6 +3417,94 @@ static struct btf_raw_test raw_tests[] = {
 	.value_type_id = 1,
 	.max_entries = 4,
 },
+/*
+ * typedef int arr_t[16];
+ * struct s {
+ *	arr_t *a;
+ * };
+ */
+{
+	.descr = "struct->ptr->typedef->array->int size resolution",
+	.raw_types = {
+		BTF_STRUCT_ENC(NAME_TBD, 1, 8),			/* [1] */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+		BTF_PTR_ENC(3),					/* [2] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 4),			/* [3] */
+		BTF_TYPE_ARRAY_ENC(5, 5, 16),			/* [4] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [5] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0s\0a\0arr_t"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "ptr_mod_chain_size_resolve_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int) * 16,
+	.key_type_id = 5 /* int */,
+	.value_type_id = 3 /* arr_t */,
+	.max_entries = 4,
+},
+/*
+ * typedef int arr_t[16][8][4];
+ * struct s {
+ *	arr_t *a;
+ * };
+ */
+{
+	.descr = "struct->ptr->typedef->multi-array->int size resolution",
+	.raw_types = {
+		BTF_STRUCT_ENC(NAME_TBD, 1, 8),			/* [1] */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+		BTF_PTR_ENC(3),					/* [2] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 4),			/* [3] */
+		BTF_TYPE_ARRAY_ENC(5, 7, 16),			/* [4] */
+		BTF_TYPE_ARRAY_ENC(6, 7, 8),			/* [5] */
+		BTF_TYPE_ARRAY_ENC(7, 7, 4),			/* [6] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [7] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0s\0a\0arr_t"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "multi_arr_size_resolve_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int) * 16 * 8 * 4,
+	.key_type_id = 7 /* int */,
+	.value_type_id = 3 /* arr_t */,
+	.max_entries = 4,
+},
+/*
+ * typedef int int_t;
+ * typedef int_t arr3_t[4];
+ * typedef arr3_t arr2_t[8];
+ * typedef arr2_t arr1_t[16];
+ * struct s {
+ *	arr1_t *a;
+ * };
+ */
+{
+	.descr = "typedef/multi-arr mix size resolution",
+	.raw_types = {
+		BTF_STRUCT_ENC(NAME_TBD, 1, 8),			/* [1] */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+		BTF_PTR_ENC(3),					/* [2] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 4),			/* [3] */
+		BTF_TYPE_ARRAY_ENC(5, 10, 16),			/* [4] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 6),			/* [5] */
+		BTF_TYPE_ARRAY_ENC(7, 10, 8),			/* [6] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 8),			/* [7] */
+		BTF_TYPE_ARRAY_ENC(9, 10, 4),			/* [8] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 10),			/* [9] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [10] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0s\0a\0arr1_t\0arr2_t\0arr3_t\0int_t"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "typedef_arra_mix_size_resolve_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int) * 16 * 8 * 4,
+	.key_type_id = 10 /* int */,
+	.value_type_id = 3 /* arr_t */,
+	.max_entries = 4,
+},
 
 }; /* struct btf_raw_test raw_tests[] */
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH v3 bpf 0/3] fix BTF verification size resolution
From: Andrii Nakryiko @ 2019-07-12 17:25 UTC (permalink / raw)
  To: bpf, netdev, ast, daniel, yhs
  Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko

BTF size resolution logic isn't always resolving type size correctly, leading
to erroneous map creation failures due to value size mismatch.

This patch set:
1. fixes the issue (patch #1);
2. adds tests for trickier cases (patch #2);
3. and converts few test cases utilizing BTF-defined maps, that previously
   couldn't use typedef'ed arrays due to kernel bug (patch #3).

Andrii Nakryiko (3):
  bpf: fix BTF verifier size resolution logic
  selftests/bpf: add trickier size resolution tests
  selftests/bpf: use typedef'ed arrays as map values

 kernel/bpf/btf.c                              | 19 ++--
 .../bpf/progs/test_get_stack_rawtp.c          |  3 +-
 .../bpf/progs/test_stacktrace_build_id.c      |  3 +-
 .../selftests/bpf/progs/test_stacktrace_map.c |  2 +-
 tools/testing/selftests/bpf/test_btf.c        | 88 +++++++++++++++++++
 5 files changed, 104 insertions(+), 11 deletions(-)

-- 
2.17.1

^ permalink raw reply

* [PATCH net] net: neigh: fix multiple neigh timer scheduling
From: Lorenzo Bianconi @ 2019-07-12 17:22 UTC (permalink / raw)
  To: davem; +Cc: netdev, dsahern, marek

Neigh timer can be scheduled multiple times from userspace adding
multiple neigh entries and forcing the neigh timer scheduling passing
NTF_USE in the netlink requests.
This will result in a refcount leak and in the following dump stack:

[   32.465295] NEIGH: BUG, double timer add, state is 8
[   32.465308] CPU: 0 PID: 416 Comm: double_timer_ad Not tainted 5.2.0+ #65
[   32.465311] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-2.fc30 04/01/2014
[   32.465313] Call Trace:
[   32.465318]  dump_stack+0x7c/0xc0
[   32.465323]  __neigh_event_send+0x20c/0x880
[   32.465326]  ? ___neigh_create+0x846/0xfb0
[   32.465329]  ? neigh_lookup+0x2a9/0x410
[   32.465332]  ? neightbl_fill_info.constprop.0+0x800/0x800
[   32.465334]  neigh_add+0x4f8/0x5e0
[   32.465337]  ? neigh_xmit+0x620/0x620
[   32.465341]  ? find_held_lock+0x85/0xa0
[   32.465345]  rtnetlink_rcv_msg+0x204/0x570
[   32.465348]  ? rtnl_dellink+0x450/0x450
[   32.465351]  ? mark_held_locks+0x90/0x90
[   32.465354]  ? match_held_lock+0x1b/0x230
[   32.465357]  netlink_rcv_skb+0xc4/0x1d0
[   32.465360]  ? rtnl_dellink+0x450/0x450
[   32.465363]  ? netlink_ack+0x420/0x420
[   32.465366]  ? netlink_deliver_tap+0x115/0x560
[   32.465369]  ? __alloc_skb+0xc9/0x2f0
[   32.465372]  netlink_unicast+0x270/0x330
[   32.465375]  ? netlink_attachskb+0x2f0/0x2f0
[   32.465378]  netlink_sendmsg+0x34f/0x5a0
[   32.465381]  ? netlink_unicast+0x330/0x330
[   32.465385]  ? move_addr_to_kernel.part.0+0x20/0x20
[   32.465388]  ? netlink_unicast+0x330/0x330
[   32.465391]  sock_sendmsg+0x91/0xa0
[   32.465394]  ___sys_sendmsg+0x407/0x480
[   32.465397]  ? copy_msghdr_from_user+0x200/0x200
[   32.465401]  ? _raw_spin_unlock_irqrestore+0x37/0x40
[   32.465404]  ? lockdep_hardirqs_on+0x17d/0x250
[   32.465407]  ? __wake_up_common_lock+0xcb/0x110
[   32.465410]  ? __wake_up_common+0x230/0x230
[   32.465413]  ? netlink_bind+0x3e1/0x490
[   32.465416]  ? netlink_setsockopt+0x540/0x540
[   32.465420]  ? __fget_light+0x9c/0xf0
[   32.465423]  ? sockfd_lookup_light+0x8c/0xb0
[   32.465426]  __sys_sendmsg+0xa5/0x110
[   32.465429]  ? __ia32_sys_shutdown+0x30/0x30
[   32.465432]  ? __fd_install+0xe1/0x2c0
[   32.465435]  ? lockdep_hardirqs_off+0xb5/0x100
[   32.465438]  ? mark_held_locks+0x24/0x90
[   32.465441]  ? do_syscall_64+0xf/0x270
[   32.465444]  do_syscall_64+0x63/0x270
[   32.465448]  entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fix the issue unscheduling neigh_timer if selected entry is in 'IN_TIMER'
receiving a netlink request with NTF_USE flag set

Reported-by: Marek Majkowski <marek@cloudflare.com>
Fixes: 0c5c2d308906 ("neigh: Allow for user space users of the neighbour table")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 include/net/neighbour.h |  9 ++++++---
 net/core/neighbour.c    | 13 +++++++++----
 net/ipv4/route.c        |  2 +-
 net/sched/sch_teql.c    |  2 +-
 4 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 50a67bd6a434..5bc68bf03c3b 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -324,7 +324,8 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl,
 	return __neigh_create(tbl, pkey, dev, true);
 }
 void neigh_destroy(struct neighbour *neigh);
-int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb);
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
+		       bool unsched_timer);
 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags,
 		 u32 nlmsg_pid);
 void __neigh_set_probe_once(struct neighbour *neigh);
@@ -435,14 +436,16 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh)
 
 #define neigh_hold(n)	refcount_inc(&(n)->refcnt)
 
-static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+static inline int neigh_event_send(struct neighbour *neigh,
+				   struct sk_buff *skb,
+				   bool unsched_timer)
 {
 	unsigned long now = jiffies;
 	
 	if (neigh->used != now)
 		neigh->used = now;
 	if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
-		return __neigh_event_send(neigh, skb);
+		return __neigh_event_send(neigh, skb, unsched_timer);
 	return 0;
 }
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 742cea4ce72e..687f67458187 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1104,7 +1104,8 @@ static void neigh_timer_handler(struct timer_list *t)
 	neigh_release(neigh);
 }
 
-int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
+		       bool unsched_timer)
 {
 	int rc;
 	bool immediate_probe = false;
@@ -1124,7 +1125,9 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 
 			atomic_set(&neigh->probes,
 				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
-			neigh->nud_state     = NUD_INCOMPLETE;
+			if (unsched_timer)
+				neigh_del_timer(neigh);
+			neigh->nud_state = NUD_INCOMPLETE;
 			neigh->updated = now;
 			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
 					 HZ/2);
@@ -1140,6 +1143,8 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 		}
 	} else if (neigh->nud_state & NUD_STALE) {
 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
+		if (unsched_timer)
+			neigh_del_timer(neigh);
 		neigh->nud_state = NUD_DELAY;
 		neigh->updated = jiffies;
 		neigh_add_timer(neigh, jiffies +
@@ -1469,7 +1474,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
 {
 	int rc = 0;
 
-	if (!neigh_event_send(neigh, skb)) {
+	if (!neigh_event_send(neigh, skb, false)) {
 		int err;
 		struct net_device *dev = neigh->dev;
 		unsigned int seq;
@@ -1962,7 +1967,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		flags |= NEIGH_UPDATE_F_ISROUTER;
 
 	if (ndm->ndm_flags & NTF_USE) {
-		neigh_event_send(neigh, NULL);
+		neigh_event_send(neigh, NULL, true);
 		err = 0;
 	} else
 		err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 517300d587a7..062fc7ad82f5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -782,7 +782,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
 		n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
 	if (!IS_ERR(n)) {
 		if (!(n->nud_state & NUD_VALID)) {
-			neigh_event_send(n, NULL);
+			neigh_event_send(n, NULL, false);
 		} else {
 			if (fib_lookup(net, fl4, &res, 0) == 0) {
 				struct fib_nh_common *nhc = FIB_RES_NHC(res);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 689ef6f3ded8..80646c07e7b7 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -234,7 +234,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
 		n = mn;
 	}
 
-	if (neigh_event_send(n, skb_res) == 0) {
+	if (neigh_event_send(n, skb_res, false) == 0) {
 		int err;
 		char haddr[MAX_ADDR_LEN];
 
-- 
2.21.0


^ permalink raw reply related

* Re: [PATCH v2 6/7] dt-bindings: net: realtek: Add property to configure LED mode
From: Matthias Kaehlcke @ 2019-07-12 17:20 UTC (permalink / raw)
  To: Rob Herring
  Cc: Florian Fainelli, David S . Miller, Mark Rutland, Andrew Lunn,
	Heiner Kallweit, netdev, devicetree, linux-kernel@vger.kernel.org,
	Douglas Anderson
In-Reply-To: <CAL_JsqL_AU+JV0c2mNbXiPh2pvfYbPbLV-2PHHX0hC3vUH4QWg@mail.gmail.com>

On Wed, Jul 10, 2019 at 09:55:12AM -0600, Rob Herring wrote:
> On Wed, Jul 3, 2019 at 5:23 PM Matthias Kaehlcke <mka@chromium.org> wrote:
> >
> > Hi Florian,
> >
> > On Wed, Jul 03, 2019 at 02:37:47PM -0700, Florian Fainelli wrote:
> > > On 7/3/19 12:37 PM, Matthias Kaehlcke wrote:
> > > > The LED behavior of some Realtek PHYs is configurable. Add the
> > > > property 'realtek,led-modes' to specify the configuration of the
> > > > LEDs.
> > > >
> > > > Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> > > > ---
> > > > Changes in v2:
> > > > - patch added to the series
> > > > ---
> > > >  .../devicetree/bindings/net/realtek.txt         |  9 +++++++++
> > > >  include/dt-bindings/net/realtek.h               | 17 +++++++++++++++++
> > > >  2 files changed, 26 insertions(+)
> > > >  create mode 100644 include/dt-bindings/net/realtek.h
> > > >
> > > > diff --git a/Documentation/devicetree/bindings/net/realtek.txt b/Documentation/devicetree/bindings/net/realtek.txt
> > > > index 71d386c78269..40b0d6f9ee21 100644
> > > > --- a/Documentation/devicetree/bindings/net/realtek.txt
> > > > +++ b/Documentation/devicetree/bindings/net/realtek.txt
> > > > @@ -9,6 +9,12 @@ Optional properties:
> > > >
> > > >     SSC is only available on some Realtek PHYs (e.g. RTL8211E).
> > > >
> > > > +- realtek,led-modes: LED mode configuration.
> > > > +
> > > > +   A 0..3 element vector, with each element configuring the operating
> > > > +   mode of an LED. Omitted LEDs are turned off. Allowed values are
> > > > +   defined in "include/dt-bindings/net/realtek.h".
> > >
> > > This should probably be made more general and we should define LED modes
> > > that makes sense regardless of the PHY device, introduce a set of
> > > generic functions for validating and then add new function pointer for
> > > setting the LED configuration to the PHY driver. This would allow to be
> > > more future proof where each PHY driver could expose standard LEDs class
> > > devices to user-space, and it would also allow facilities like: ethtool
> > > -p to plug into that.
> > >
> > > Right now, each driver invents its own way of configuring LEDs, that
> > > does not scale, and there is not really a good reason for that other
> > > than reviewing drivers in isolation and therefore making it harder to
> > > extract the commonality. Yes, I realize that since you are the latest
> > > person submitting something in that area, you are being selected :)
> 
> I agree.
> 
> > I see the merit of your proposal to come up with a generic mechanism
> > to configure Ethernet LEDs, however I can't justify spending much of
> > my work time on this. If it is deemed useful I'm happy to send another
> > version of the current patchset that addresses the reviewer's comments,
> > but if the implementation of a generic LED configuration interface is
> > a requirement I will have to abandon at least the LED configuration
> > part of this series.
> 
> Can you at least define a common binding for this. Maybe that's just
> removing 'realtek'. While the kernel side can evolve to a common
> infrastructure, the DT bindings can't.

Defining a common binding sounds good to me, I will follow up on
Florian's reply to this.

^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/3] bpf: fix BTF verifier size resolution logic
From: Andrii Nakryiko @ 2019-07-12 17:10 UTC (permalink / raw)
  To: Yonghong Song
  Cc: Andrii Nakryiko, bpf@vger.kernel.org, netdev@vger.kernel.org,
	Alexei Starovoitov, daniel@iogearbox.net, Kernel Team, Martin Lau
In-Reply-To: <1563de9c-b5f6-cb15-18f6-cc01d3ddd110@fb.com>

On Fri, Jul 12, 2019 at 8:47 AM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 7/12/19 8:36 AM, Andrii Nakryiko wrote:
> > On Thu, Jul 11, 2019 at 10:59 PM Yonghong Song <yhs@fb.com> wrote:
> >>
> >>
> >>
> >> On 7/10/19 11:53 PM, Andrii Nakryiko wrote:
> >>> BTF verifier has a size resolution bug which in some circumstances leads to
> >>> invalid size resolution for, e.g., TYPEDEF modifier.  This happens if we have
> >>> [1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
> >>> context ARRAY size won't be resolved (because for pointer it doesn't matter, so
> >>> it's a sink in pointer context), but it will be permanently remembered as zero
> >>> for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
> >>> be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
> >>> This, subsequently, will lead to erroneous map creation failure, if that
> >>> TYPEDEF is specified as either key or value, as key_size/value_size won't
> >>> correspond to resolved size of TYPEDEF (kernel will believe it's zero).
> >>>
> >>> Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
> >>> won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
> >>> calculated and stored.
> >>>
> >>> This bug manifests itself in rejecting BTF-defined maps that use array
> >>> typedef as a value type:
> >>>
> >>> typedef int array_t[16];
> >>>
> >>> struct {
> >>>       __uint(type, BPF_MAP_TYPE_ARRAY);
> >>>       __type(value, array_t); /* i.e., array_t *value; */
> >>> } test_map SEC(".maps");
> >>>
> >>> The fix consists on not relying on modifier's resolved_size and instead using
> >>> modifier's resolved_id (type ID for "concrete" type to which modifier
> >>> eventually resolves) and doing size determination for that resolved type. This
> >>> allow to preserve existing "early DFS termination" logic for PTR or
> >>> STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
> >>> types.
> >>>
> >>> Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
> >>> Cc: Martin KaFai Lau <kafai@fb.com>
> >>> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> >>> ---
> >>>    kernel/bpf/btf.c | 14 ++++++++++----
> >>>    1 file changed, 10 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> >>> index cad09858a5f2..22fe8b155e51 100644
> >>> --- a/kernel/bpf/btf.c
> >>> +++ b/kernel/bpf/btf.c
> >>> @@ -1073,11 +1073,18 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
> >>>                                 !btf_type_is_var(size_type)))
> >>>                        return NULL;
> >>>
> >>> -             size = btf->resolved_sizes[size_type_id];
> >>>                size_type_id = btf->resolved_ids[size_type_id];
> >>>                size_type = btf_type_by_id(btf, size_type_id);
> >>>                if (btf_type_nosize_or_null(size_type))
> >>>                        return NULL;
> >>> +             else if (btf_type_has_size(size_type))
> >>> +                     size = size_type->size;
> >>> +             else if (btf_type_is_array(size_type))
> >>> +                     size = btf->resolved_sizes[size_type_id];
> >>> +             else if (btf_type_is_ptr(size_type))
> >>> +                     size = sizeof(void *);
> >>> +             else
> >>> +                     return NULL;
> >>
> >> Looks good to me. Not sure whether we need to do any adjustment for
> >> var kind or not. Maybe we can do similar change in btf_var_resolve()
> >
> > I don't think btf_var_resolve() needs any change. btf_var_resolve
> > can't be referenced by modifier, so it doesn't have any problem. It's
> > similar to array in that it's size will be determined correctly.
>
> Correct. With your previous patch, the resolved_sizes[..] for var type
> is not used, so that is why I suggest to just set it to 0.

Ah, sorry, I misunderstood your initial suggestion. Yes,
resolved_sizes for VAR is not used anymore, so yeah, I'll set it to
zero.

>
> >
> > But I think btf_type_id_size() doesn't handle var case correctly, I'll do
> >
> > +             else if (btf_type_is_array(size_type) ||
> > btf_type_is_var(size_type))
> > +                     size = btf->resolved_sizes[size_type_id];
>
> This change should work too (to use btf->resolved_sizes[...]).

Reading code again, VAR's size is handled similar to modifier's size,
so I won't change btf_type_id_size().

Posting v3 soon.

>
> >
> > to fix that.
> >
> >> to btf_modifier_resolve()? But I do not think it impacts correctness
> >> similar to btf_modifier_resolve() below as you changed
> >> btf_type_id_size() implementation in the above.
> >>
> >>>        }
> >>>
> >>>        *type_id = size_type_id;
> >>> @@ -1602,7 +1609,6 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
> >>>        const struct btf_type *next_type;
> >>>        u32 next_type_id = t->type;
> >>>        struct btf *btf = env->btf;
> >>> -     u32 next_type_size = 0;
> >>>
> >>>        next_type = btf_type_by_id(btf, next_type_id);
> >>>        if (!next_type || btf_type_is_resolve_source_only(next_type)) {
> >>> @@ -1620,7 +1626,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
> >>>         * save us a few type-following when we use it later (e.g. in
> >>>         * pretty print).
> >>>         */
> >>> -     if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
> >>> +     if (!btf_type_id_size(btf, &next_type_id, NULL)) {
> >>>                if (env_type_is_resolved(env, next_type_id))
> >>>                        next_type = btf_type_id_resolve(btf, &next_type_id);
> >>>
> >>> @@ -1633,7 +1639,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
> >>>                }
> >>>        }
> >>>
> >>> -     env_stack_pop_resolved(env, next_type_id, next_type_size);
> >>> +     env_stack_pop_resolved(env, next_type_id, 0);
> >>>
> >>>        return 0;
> >>>    }
> >>>

^ permalink raw reply

* Re: [PATCH v1 1/6] rcu: Add support for consolidated-RCU reader checking
From: Joel Fernandes @ 2019-07-12 17:06 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Peter Zijlstra, linux-kernel, Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Josh Triplett, keescook, kernel-hardening,
	Lai Jiangshan, Len Brown, linux-acpi, linux-pci, linux-pm,
	Mathieu Desnoyers, neilb, netdev, oleg, Pavel Machek,
	Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712164531.GW26519@linux.ibm.com>

On Fri, Jul 12, 2019 at 09:45:31AM -0700, Paul E. McKenney wrote:
> On Fri, Jul 12, 2019 at 11:10:51AM -0400, Joel Fernandes wrote:
> > On Fri, Jul 12, 2019 at 01:11:25PM +0200, Peter Zijlstra wrote:
> > > On Thu, Jul 11, 2019 at 07:43:56PM -0400, Joel Fernandes (Google) wrote:
> > > > +int rcu_read_lock_any_held(void)
> > > > +{
> > > > +	int lockdep_opinion = 0;
> > > > +
> > > > +	if (!debug_lockdep_rcu_enabled())
> > > > +		return 1;
> > > > +	if (!rcu_is_watching())
> > > > +		return 0;
> > > > +	if (!rcu_lockdep_current_cpu_online())
> > > > +		return 0;
> > > > +
> > > > +	/* Preemptible RCU flavor */
> > > > +	if (lock_is_held(&rcu_lock_map))
> > > 
> > > you forgot debug_locks here.
> > 
> > Actually, it turns out debug_locks checking is not even needed. If
> > debug_locks == 0, then debug_lockdep_rcu_enabled() returns 0 and we would not
> > get to this point.
> > 
> > > > +		return 1;
> > > > +
> > > > +	/* BH flavor */
> > > > +	if (in_softirq() || irqs_disabled())
> > > 
> > > I'm not sure I'd put irqs_disabled() under BH, also this entire
> > > condition is superfluous, see below.
> > > 
> > > > +		return 1;
> > > > +
> > > > +	/* Sched flavor */
> > > > +	if (debug_locks)
> > > > +		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
> > > > +	return lockdep_opinion || !preemptible();
> > > 
> > > that !preemptible() turns into:
> > > 
> > >   !(preempt_count()==0 && !irqs_disabled())
> > > 
> > > which is:
> > > 
> > >   preempt_count() != 0 || irqs_disabled()
> > > 
> > > and already includes irqs_disabled() and in_softirq().
> > > 
> > > > +}
> > > 
> > > So maybe something lke:
> > > 
> > > 	if (debug_locks && (lock_is_held(&rcu_lock_map) ||
> > > 			    lock_is_held(&rcu_sched_lock_map)))
> > > 		return true;
> > 
> > Agreed, I will do it this way (without the debug_locks) like:
> > 
> > ---8<-----------------------
> > 
> > diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> > index ba861d1716d3..339aebc330db 100644
> > --- a/kernel/rcu/update.c
> > +++ b/kernel/rcu/update.c
> > @@ -296,27 +296,15 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
> >  
> >  int rcu_read_lock_any_held(void)
> >  {
> > -	int lockdep_opinion = 0;
> > -
> >  	if (!debug_lockdep_rcu_enabled())
> >  		return 1;
> >  	if (!rcu_is_watching())
> >  		return 0;
> >  	if (!rcu_lockdep_current_cpu_online())
> >  		return 0;
> > -
> > -	/* Preemptible RCU flavor */
> > -	if (lock_is_held(&rcu_lock_map))
> > -		return 1;
> > -
> > -	/* BH flavor */
> > -	if (in_softirq() || irqs_disabled())
> > -		return 1;
> > -
> > -	/* Sched flavor */
> > -	if (debug_locks)
> > -		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
> > -	return lockdep_opinion || !preemptible();
> > +	if (lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map))
> 
> OK, I will bite...  Why not also lock_is_held(&rcu_bh_lock_map)?

Hmm, I was borrowing the strategy from rcu_read_lock_bh_held() which does not
check for a lock held in this map.

Honestly, even  lock_is_held(&rcu_sched_lock_map) seems unnecessary per-se
since !preemptible() will catch that? rcu_read_lock_sched() disables
preemption already, so lockdep's opinion of the matter seems redundant there.

Sorry I already sent out patches again before seeing your comment but I can
rework and resend them based on any other suggestions.

thanks,

 - Joel



^ permalink raw reply

* Re: [PATCH net] net: fix use-after-free in __netif_receive_skb_core
From: Jonathan Lemon @ 2019-07-12 17:06 UTC (permalink / raw)
  To: Edward Cree; +Cc: Sabrina Dubroca, netdev, Andreas Steinmetz
In-Reply-To: <8166b82f-8430-1441-32e7-540c1829754e@solarflare.com>



On 12 Jul 2019, at 8:29, Edward Cree wrote:

> On 10/07/2019 23:47, Sabrina Dubroca wrote:
>> 2019-07-10, 16:07:43 +0100, Edward Cree wrote:
>>> On 10/07/2019 14:52, Sabrina Dubroca wrote:
>>>> -static int __netif_receive_skb_core(struct sk_buff *skb, bool 
>>>> pfmemalloc,
>>>> +static int __netif_receive_skb_core(struct sk_buff **pskb, bool 
>>>> pfmemalloc,
>>>>  				    struct packet_type **ppt_prev)
>>>>  {
>>>>  	struct packet_type *ptype, *pt_prev;
>>>>  	rx_handler_func_t *rx_handler;
>>>> +	struct sk_buff *skb = *pskb;
>>> Would it not be simpler just to change all users of skb to *pskb?
>>> Then you avoid having to keep doing "*pskb = skb;" whenever skb 
>>> changes
>>>  (with concomitant risk of bugs if one gets missed).
>> Yes, that would be less risky. I wrote a version of the patch that 
>> did
>> exactly that, but found it really too ugly (both the patch and the
>> resulting code).
> If you've still got that version (or can dig it out of your reflog), 
> can
>  you post it so we can see just how ugly it turns out?
>
>>  We have more than 50 occurences of skb, including
>> things like:
>>
>>     atomic_long_inc(&skb->dev->rx_dropped);
> Ooh, yes, I can see why that ends up looking funny...
>
> Here's a thought, how about switching round the 
> return-vs-pass-by-pointer
>  and writing:
>
> static struct sk_buff *__netif_receive_skb_core(struct sk_buff *skb, 
> bool pfmemalloc,
>                                                 struct packet_type 
> **ppt_prev, int *ret)
> ?
> (Although, you might want to rename 'ret' in that case.)
>
> Does that make things any less ugly?

That was actually my first thought as well - this seems to fit well with 
the
other APIS which can return a different skb.
-- 
Jonathan


^ permalink raw reply

* [PATCH iproute2-next] tunnel: factorize printout of GRE key and flags
From: Andrea Claudi @ 2019-07-12 17:02 UTC (permalink / raw)
  To: netdev; +Cc: stephen, dsahern

print_tunnel() functions in ip6tunnel.c and iptunnel.c contains
the same code to print out GRE key and flags

This commit factorize the code in a helper function in tunnel.c

Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
---
 ip/ip6tunnel.c | 22 ++--------------------
 ip/iptunnel.c  | 19 ++-----------------
 ip/tunnel.c    | 26 ++++++++++++++++++++++++++
 ip/tunnel.h    |  3 +++
 4 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c
index 2e0f099cd7ced..d7684a673fdc4 100644
--- a/ip/ip6tunnel.c
+++ b/ip/ip6tunnel.c
@@ -120,26 +120,8 @@ static void print_tunnel(const void *t)
 	if (p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE)
 		printf(" allow-localremote");
 
-	if ((p->i_flags & GRE_KEY) && (p->o_flags & GRE_KEY) &&
-	    p->o_key == p->i_key)
-		printf(" key %u", ntohl(p->i_key));
-	else {
-		if (p->i_flags & GRE_KEY)
-			printf(" ikey %u", ntohl(p->i_key));
-		if (p->o_flags & GRE_KEY)
-			printf(" okey %u", ntohl(p->o_key));
-	}
-
-	if (p->proto == IPPROTO_GRE) {
-		if (p->i_flags & GRE_SEQ)
-			printf("%s  Drop packets out of sequence.", _SL_);
-		if (p->i_flags & GRE_CSUM)
-			printf("%s  Checksum in received packet is required.", _SL_);
-		if (p->o_flags & GRE_SEQ)
-			printf("%s  Sequence packets on output.", _SL_);
-		if (p->o_flags & GRE_CSUM)
-			printf("%s  Checksum output packets.", _SL_);
-	}
+	tnl_print_gre_flags(p->proto, p->i_flags, p->o_flags,
+			    p->i_key, p->o_key);
 }
 
 static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p)
diff --git a/ip/iptunnel.c b/ip/iptunnel.c
index 92a5cb923b6b0..66929e75c7448 100644
--- a/ip/iptunnel.c
+++ b/ip/iptunnel.c
@@ -354,23 +354,8 @@ static void print_tunnel(const void *t)
 		}
 	}
 
-	if ((p->i_flags & GRE_KEY) && (p->o_flags & GRE_KEY) && p->o_key == p->i_key)
-		printf(" key %u", ntohl(p->i_key));
-	else if ((p->i_flags | p->o_flags) & GRE_KEY) {
-		if (p->i_flags & GRE_KEY)
-			printf(" ikey %u", ntohl(p->i_key));
-		if (p->o_flags & GRE_KEY)
-			printf(" okey %u", ntohl(p->o_key));
-	}
-
-	if (p->i_flags & GRE_SEQ)
-		printf("%s  Drop packets out of sequence.", _SL_);
-	if (p->i_flags & GRE_CSUM)
-		printf("%s  Checksum in received packet is required.", _SL_);
-	if (p->o_flags & GRE_SEQ)
-		printf("%s  Sequence packets on output.", _SL_);
-	if (p->o_flags & GRE_CSUM)
-		printf("%s  Checksum output packets.", _SL_);
+	tnl_print_gre_flags(p->iph.protocol, p->i_flags, p->o_flags,
+			    p->i_key, p->o_key);
 }
 
 
diff --git a/ip/tunnel.c b/ip/tunnel.c
index d0d55f37169e9..41b0ef3165fdc 100644
--- a/ip/tunnel.c
+++ b/ip/tunnel.c
@@ -308,6 +308,32 @@ void tnl_print_endpoint(const char *name, const struct rtattr *rta, int family)
 	}
 }
 
+void tnl_print_gre_flags(__u8 proto,
+			 __be16 i_flags, __be16 o_flags,
+			 __be32 i_key, __be32 o_key)
+{
+	if ((i_flags & GRE_KEY) && (o_flags & GRE_KEY) &&
+	    o_key == i_key) {
+		printf(" key %u", ntohl(i_key));
+	} else {
+		if (i_flags & GRE_KEY)
+			printf(" ikey %u", ntohl(i_key));
+		if (o_flags & GRE_KEY)
+			printf(" okey %u", ntohl(o_key));
+	}
+
+	if (proto == IPPROTO_GRE) {
+		if (i_flags & GRE_SEQ)
+			printf("%s  Drop packets out of sequence.", _SL_);
+		if (i_flags & GRE_CSUM)
+			printf("%s  Checksum in received packet is required.", _SL_);
+		if (o_flags & GRE_SEQ)
+			printf("%s  Sequence packets on output.", _SL_);
+		if (o_flags & GRE_CSUM)
+			printf("%s  Checksum output packets.", _SL_);
+	}
+}
+
 static void tnl_print_stats(const struct rtnl_link_stats64 *s)
 {
 	printf("%s", _SL_);
diff --git a/ip/tunnel.h b/ip/tunnel.h
index e530d07cbf6a2..604f8cbfd6db2 100644
--- a/ip/tunnel.h
+++ b/ip/tunnel.h
@@ -55,5 +55,8 @@ void tnl_print_encap(struct rtattr *tb[],
 		     int encap_sport, int encap_dport);
 void tnl_print_endpoint(const char *name,
 			const struct rtattr *rta, int family);
+void tnl_print_gre_flags(__u8 proto,
+			 __be16 i_flags, __be16 o_flags,
+			 __be32 i_key, __be32 o_key);
 
 #endif
-- 
2.20.1


^ permalink raw reply related

* [PATCH v2 1/9] rcu/update: Remove useless check for debug_locks
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

In rcu_read_lock_sched_held(), debug_locks can never be true at the
point we check it because we already check debug_locks in
debug_lockdep_rcu_enabled() in the beginning. Remove the check.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/rcu/update.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index c3bf44ba42e5..bb961cd89e76 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -93,17 +93,13 @@ module_param(rcu_normal_after_boot, int, 0);
  */
 int rcu_read_lock_sched_held(void)
 {
-	int lockdep_opinion = 0;
-
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
 	if (!rcu_is_watching())
 		return 0;
 	if (!rcu_lockdep_current_cpu_online())
 		return 0;
-	if (debug_locks)
-		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
-	return lockdep_opinion || !preemptible();
+	return lock_is_held(&rcu_sched_lock_map) || !preemptible();
 }
 EXPORT_SYMBOL(rcu_read_lock_sched_held);
 #endif
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 2/9] rcu: Add support for consolidated-RCU reader checking
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

This patch adds support for checking RCU reader sections in list
traversal macros. Optionally, if the list macro is called under SRCU or
other lock/mutex protection, then appropriate lockdep expressions can be
passed to make the checks pass.

Existing list_for_each_entry_rcu() invocations don't need to pass the
optional fourth argument (cond) unless they are under some non-RCU
protection and needs to make lockdep check pass.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 include/linux/rculist.h  | 28 +++++++++++++++++++++++-----
 include/linux/rcupdate.h |  7 +++++++
 kernel/rcu/Kconfig.debug | 11 +++++++++++
 kernel/rcu/update.c      | 14 ++++++++++++++
 4 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index e91ec9ddcd30..1048160625bb 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -40,6 +40,20 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
  */
 #define list_next_rcu(list)	(*((struct list_head __rcu **)(&(list)->next)))
 
+/*
+ * Check during list traversal that we are within an RCU reader
+ */
+
+#ifdef CONFIG_PROVE_RCU_LIST
+#define __list_check_rcu(dummy, cond, ...)				\
+	({								\
+	RCU_LOCKDEP_WARN(!cond && !rcu_read_lock_any_held(),		\
+			 "RCU-list traversed in non-reader section!");	\
+	 })
+#else
+#define __list_check_rcu(dummy, cond, ...) ({})
+#endif
+
 /*
  * Insert a new entry between two known consecutive entries.
  *
@@ -343,14 +357,16 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
  * @member:	the name of the list_head within the struct.
+ * @cond:	optional lockdep expression if called from non-RCU protection.
  *
  * This list-traversal primitive may safely run concurrently with
  * the _rcu list-mutation primitives such as list_add_rcu()
  * as long as the traversal is guarded by rcu_read_lock().
  */
-#define list_for_each_entry_rcu(pos, head, member) \
-	for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
-		&pos->member != (head); \
+#define list_for_each_entry_rcu(pos, head, member, cond...)		\
+	for (__list_check_rcu(dummy, ## cond, 0),			\
+	     pos = list_entry_rcu((head)->next, typeof(*pos), member);	\
+		&pos->member != (head);					\
 		pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 /**
@@ -616,13 +632,15 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
  * @member:	the name of the hlist_node within the struct.
+ * @cond:	optional lockdep expression if called from non-RCU protection.
  *
  * This list-traversal primitive may safely run concurrently with
  * the _rcu list-mutation primitives such as hlist_add_head_rcu()
  * as long as the traversal is guarded by rcu_read_lock().
  */
-#define hlist_for_each_entry_rcu(pos, head, member)			\
-	for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\
+#define hlist_for_each_entry_rcu(pos, head, member, cond...)		\
+	for (__list_check_rcu(dummy, ## cond, 0),			\
+	     pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\
 			typeof(*(pos)), member);			\
 		pos;							\
 		pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 922bb6848813..712b464ab960 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -223,6 +223,7 @@ int debug_lockdep_rcu_enabled(void);
 int rcu_read_lock_held(void);
 int rcu_read_lock_bh_held(void);
 int rcu_read_lock_sched_held(void);
+int rcu_read_lock_any_held(void);
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
@@ -243,6 +244,12 @@ static inline int rcu_read_lock_sched_held(void)
 {
 	return !preemptible();
 }
+
+static inline int rcu_read_lock_any_held(void)
+{
+	return !preemptible();
+}
+
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 #ifdef CONFIG_PROVE_RCU
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 0ec7d1d33a14..b20d0e2903d1 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -7,6 +7,17 @@ menu "RCU Debugging"
 config PROVE_RCU
 	def_bool PROVE_LOCKING
 
+config PROVE_RCU_LIST
+	bool "RCU list lockdep debugging"
+	depends on PROVE_RCU
+	default n
+	help
+	  Enable RCU lockdep checking for list usages. By default it is
+	  turned off since there are several list RCU users that still
+	  need to be converted to pass a lockdep expression. To prevent
+	  false-positive splats, we keep it default disabled but once all
+	  users are converted, we can remove this config option.
+
 config TORTURE_TEST
 	tristate
 	default n
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index bb961cd89e76..0cc7be0fb6b5 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -294,6 +294,20 @@ int rcu_read_lock_bh_held(void)
 }
 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
 
+int rcu_read_lock_any_held(void)
+{
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
+	if (!rcu_is_watching())
+		return 0;
+	if (!rcu_lockdep_current_cpu_online())
+		return 0;
+	if (lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map))
+		return 1;
+	return !preemptible();
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_any_held);
+
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 /**
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 4/9] ipv4: add lockdep condition to fix for_each_entry
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 net/ipv4/fib_frontend.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b298255f6fdb..ef7c9f8e8682 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -127,7 +127,8 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 	h = id & (FIB_TABLE_HASHSZ - 1);
 
 	head = &net->ipv4.fib_table_hash[h];
-	hlist_for_each_entry_rcu(tb, head, tb_hlist) {
+	hlist_for_each_entry_rcu(tb, head, tb_hlist,
+				 lockdep_rtnl_is_held()) {
 		if (tb->tb_id == id)
 			return tb;
 	}
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 3/9] rcu/sync: Remove custom check for reader-section
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Oleg Nesterov, Alexey Kuznetsov,
	Bjorn Helgaas, Borislav Petkov, c0d1n61at3, David S. Miller,
	edumazet, Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Paul E. McKenney, Pavel Machek, peterz,
	Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

The rcu/sync code was doing its own check whether we are in a reader
section. With RCU consolidating flavors and the generic helper added in
this series, this is no longer need. We can just use the generic helper
and it results in a nice cleanup.

Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
Please note: Only build and boot tested this particular patch so far.

 include/linux/rcu_sync.h |  5 ++---
 kernel/rcu/sync.c        | 22 ----------------------
 2 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index 6fc53a1345b3..c954f1efc919 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -39,9 +39,8 @@ extern void rcu_sync_lockdep_assert(struct rcu_sync *);
  */
 static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
 {
-#ifdef CONFIG_PROVE_RCU
-	rcu_sync_lockdep_assert(rsp);
-#endif
+	RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(),
+			 "suspicious rcu_sync_is_idle() usage");
 	return !rsp->gp_state; /* GP_IDLE */
 }
 
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index a8304d90573f..535e02601f56 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -10,37 +10,25 @@
 #include <linux/rcu_sync.h>
 #include <linux/sched.h>
 
-#ifdef CONFIG_PROVE_RCU
-#define __INIT_HELD(func)	.held = func,
-#else
-#define __INIT_HELD(func)
-#endif
-
 static const struct {
 	void (*sync)(void);
 	void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
 	void (*wait)(void);
-#ifdef CONFIG_PROVE_RCU
-	int  (*held)(void);
-#endif
 } gp_ops[] = {
 	[RCU_SYNC] = {
 		.sync = synchronize_rcu,
 		.call = call_rcu,
 		.wait = rcu_barrier,
-		__INIT_HELD(rcu_read_lock_held)
 	},
 	[RCU_SCHED_SYNC] = {
 		.sync = synchronize_rcu,
 		.call = call_rcu,
 		.wait = rcu_barrier,
-		__INIT_HELD(rcu_read_lock_sched_held)
 	},
 	[RCU_BH_SYNC] = {
 		.sync = synchronize_rcu,
 		.call = call_rcu,
 		.wait = rcu_barrier,
-		__INIT_HELD(rcu_read_lock_bh_held)
 	},
 };
 
@@ -49,16 +37,6 @@ enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
 
 #define	rss_lock	gp_wait.lock
 
-#ifdef CONFIG_PROVE_RCU
-void rcu_sync_lockdep_assert(struct rcu_sync *rsp)
-{
-	RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(),
-			 "suspicious rcu_sync_is_idle() usage");
-}
-
-EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert);
-#endif
-
 /**
  * rcu_sync_init() - Initialize an rcu_sync structure
  * @rsp: Pointer to rcu_sync structure to be initialized
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 7/9] x86/pci: Pass lockdep condition to pcm_mmcfg_list iterator
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

The pcm_mmcfg_list is traversed with list_for_each_entry_rcu without a
reader-lock held, because the pci_mmcfg_lock is already held. Make this
known to the list macro so that it fixes new lockdep warnings that
trigger due to lockdep checks added to list_for_each_entry_rcu().

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 arch/x86/pci/mmconfig-shared.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 7389db538c30..6fa42e9c4e6f 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -29,6 +29,7 @@
 static bool pci_mmcfg_running_state;
 static bool pci_mmcfg_arch_init_failed;
 static DEFINE_MUTEX(pci_mmcfg_lock);
+#define pci_mmcfg_lock_held() lock_is_held(&(pci_mmcfg_lock).dep_map)
 
 LIST_HEAD(pci_mmcfg_list);
 
@@ -54,7 +55,7 @@ static void list_add_sorted(struct pci_mmcfg_region *new)
 	struct pci_mmcfg_region *cfg;
 
 	/* keep list sorted by segment and starting bus number */
-	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) {
+	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held()) {
 		if (cfg->segment > new->segment ||
 		    (cfg->segment == new->segment &&
 		     cfg->start_bus >= new->start_bus)) {
@@ -118,7 +119,7 @@ struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)
 {
 	struct pci_mmcfg_region *cfg;
 
-	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
+	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held())
 		if (cfg->segment == segment &&
 		    cfg->start_bus <= bus && bus <= cfg->end_bus)
 			return cfg;
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 9/9] doc: Update documentation about list_for_each_entry_rcu
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

This patch updates the documentation with information about
usage of lockdep with list_for_each_entry_rcu().

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 Documentation/RCU/lockdep.txt   | 15 +++++++++++----
 Documentation/RCU/whatisRCU.txt |  9 ++++++++-
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt
index da51d3068850..3d967df3a801 100644
--- a/Documentation/RCU/lockdep.txt
+++ b/Documentation/RCU/lockdep.txt
@@ -96,7 +96,14 @@ other flavors of rcu_dereference().  On the other hand, it is illegal
 to use rcu_dereference_protected() if either the RCU-protected pointer
 or the RCU-protected data that it points to can change concurrently.
 
-There are currently only "universal" versions of the rcu_assign_pointer()
-and RCU list-/tree-traversal primitives, which do not (yet) check for
-being in an RCU read-side critical section.  In the future, separate
-versions of these primitives might be created.
+Similar to rcu_dereference_protected, The RCU list and hlist traversal
+primitives also check for whether there are called from within a reader
+section. However, an optional lockdep expression can be passed to them as
+the last argument in case they are called under other non-RCU protection.
+
+For example, the workqueue for_each_pwq() macro is implemented as follows.
+It is safe to call for_each_pwq() outside a reader section but under protection
+of wq->mutex:
+#define for_each_pwq(pwq, wq)
+	list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node,
+				lock_is_held(&(wq->mutex).dep_map))
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 981651a8b65d..a08c03735963 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -290,7 +290,7 @@ rcu_dereference()
 	at any time, including immediately after the rcu_dereference().
 	And, again like rcu_assign_pointer(), rcu_dereference() is
 	typically used indirectly, via the _rcu list-manipulation
-	primitives, such as list_for_each_entry_rcu().
+	primitives, such as list_for_each_entry_rcu() [2].
 
 	[1] The variant rcu_dereference_protected() can be used outside
 	of an RCU read-side critical section as long as the usage is
@@ -305,6 +305,13 @@ rcu_dereference()
 	a lockdep splat is emitted.  See RCU/Design/Requirements/Requirements.html
 	and the API's code comments for more details and example usage.
 
+	[2] In case the list_for_each_entry_rcu() primitive is intended
+	to be used outside of an RCU reader section such as when
+	protected by a lock, then an additional lockdep expression can be
+	passed as the last argument to it so that RCU lockdep checking code
+	knows that the dereference of the list pointers are safe. If the
+	indicated protection is not provided, a lockdep splat is emitted.
+
 The following diagram shows how each API communicates among the
 reader, updater, and reclaimer.
 
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 8/9] acpi: Use built-in RCU list checking for acpi_ioremaps list
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

list_for_each_entry_rcu has built-in RCU and lock checking. Make use of
it for acpi_ioremaps list traversal.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 drivers/acpi/osl.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index f29e427d0d1d..c8b5d712c7ae 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/lockdep.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 #include <linux/kmod.h>
@@ -94,6 +95,7 @@ struct acpi_ioremap {
 
 static LIST_HEAD(acpi_ioremaps);
 static DEFINE_MUTEX(acpi_ioremap_lock);
+#define acpi_ioremap_lock_held() lock_is_held(&acpi_ioremap_lock.dep_map)
 
 static void __init acpi_request_region (struct acpi_generic_address *gas,
 	unsigned int length, char *desc)
@@ -220,7 +222,7 @@ acpi_map_lookup(acpi_physical_address phys, acpi_size size)
 {
 	struct acpi_ioremap *map;
 
-	list_for_each_entry_rcu(map, &acpi_ioremaps, list)
+	list_for_each_entry_rcu(map, &acpi_ioremaps, list, acpi_ioremap_lock_held())
 		if (map->phys <= phys &&
 		    phys + size <= map->phys + map->size)
 			return map;
@@ -263,7 +265,7 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size)
 {
 	struct acpi_ioremap *map;
 
-	list_for_each_entry_rcu(map, &acpi_ioremaps, list)
+	list_for_each_entry_rcu(map, &acpi_ioremaps, list, acpi_ioremap_lock_held())
 		if (map->virt <= virt &&
 		    virt + size <= map->virt + map->size)
 			return map;
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 6/9] workqueue: Convert for_each_wq to use built-in list check
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

list_for_each_entry_rcu now has support to check for RCU reader sections
as well as lock. Just use the support in it, instead of explictly
checking in the caller.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/workqueue.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9657315405de..5e88449bdd83 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -363,11 +363,6 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
 			 !lockdep_is_held(&wq_pool_mutex),		\
 			 "RCU or wq_pool_mutex should be held")
 
-#define assert_rcu_or_wq_mutex(wq)					\
-	RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&			\
-			 !lockdep_is_held(&wq->mutex),			\
-			 "RCU or wq->mutex should be held")
-
 #define assert_rcu_or_wq_mutex_or_pool_mutex(wq)			\
 	RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&			\
 			 !lockdep_is_held(&wq->mutex) &&		\
@@ -424,9 +419,8 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  * ignored.
  */
 #define for_each_pwq(pwq, wq)						\
-	list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node)		\
-		if (({ assert_rcu_or_wq_mutex(wq); false; })) { }	\
-		else
+	list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node,		\
+				 lock_is_held(&(wq->mutex).dep_map))
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 5/9] driver/core: Convert to use built-in RCU list checking
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Greg Kroah-Hartman, Alexey Kuznetsov,
	Bjorn Helgaas, Borislav Petkov, c0d1n61at3, David S. Miller,
	edumazet, Hideaki YOSHIFUJI, H. Peter Anvin, Ingo Molnar,
	Jonathan Corbet, Josh Triplett, keescook, kernel-hardening,
	kernel-team, Lai Jiangshan, Len Brown, linux-acpi, linux-doc,
	linux-pci, linux-pm, Mathieu Desnoyers, neilb, netdev,
	Oleg Nesterov, Paul E. McKenney, Pavel Machek, peterz,
	Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

list_for_each_entry_rcu has built-in RCU and lock checking. Make use of
it in driver core.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 drivers/base/base.h          |  1 +
 drivers/base/core.c          | 10 ++++++++++
 drivers/base/power/runtime.c | 15 ++++++++++-----
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/base/base.h b/drivers/base/base.h
index b405436ee28e..0d32544b6f91 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -165,6 +165,7 @@ static inline int devtmpfs_init(void) { return 0; }
 /* Device links support */
 extern int device_links_read_lock(void);
 extern void device_links_read_unlock(int idx);
+extern int device_links_read_lock_held(void);
 extern int device_links_check_suppliers(struct device *dev);
 extern void device_links_driver_bound(struct device *dev);
 extern void device_links_driver_cleanup(struct device *dev);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index fd7511e04e62..6c5ca9685647 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -68,6 +68,11 @@ void device_links_read_unlock(int idx)
 {
 	srcu_read_unlock(&device_links_srcu, idx);
 }
+
+int device_links_read_lock_held(void)
+{
+	return srcu_read_lock_held(&device_links_srcu);
+}
 #else /* !CONFIG_SRCU */
 static DECLARE_RWSEM(device_links_lock);
 
@@ -91,6 +96,11 @@ void device_links_read_unlock(int not_used)
 {
 	up_read(&device_links_lock);
 }
+
+int device_links_read_lock_held(void)
+{
+	return lock_is_held(&device_links_lock);
+}
 #endif /* !CONFIG_SRCU */
 
 /**
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 952a1e7057c7..7a10e8379a70 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -287,7 +287,8 @@ static int rpm_get_suppliers(struct device *dev)
 {
 	struct device_link *link;
 
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) {
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+				device_links_read_lock_held()) {
 		int retval;
 
 		if (!(link->flags & DL_FLAG_PM_RUNTIME) ||
@@ -309,7 +310,8 @@ static void rpm_put_suppliers(struct device *dev)
 {
 	struct device_link *link;
 
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) {
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+				device_links_read_lock_held()) {
 		if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND)
 			continue;
 
@@ -1640,7 +1642,8 @@ void pm_runtime_clean_up_links(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu(link, &dev->links.consumers, s_node) {
+	list_for_each_entry_rcu(link, &dev->links.consumers, s_node,
+				device_links_read_lock_held()) {
 		if (link->flags & DL_FLAG_STATELESS)
 			continue;
 
@@ -1662,7 +1665,8 @@ void pm_runtime_get_suppliers(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+				device_links_read_lock_held())
 		if (link->flags & DL_FLAG_PM_RUNTIME) {
 			link->supplier_preactivated = true;
 			refcount_inc(&link->rpm_active);
@@ -1683,7 +1687,8 @@ void pm_runtime_put_suppliers(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+				device_links_read_lock_held())
 		if (link->supplier_preactivated) {
 			link->supplier_preactivated = false;
 			if (refcount_dec_not_one(&link->rpm_active))
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox