Netdev List
 help / color / mirror / Atom feed
* [PATCH v2] tools: bpftool: fix reading from /proc/config.gz
From: Peter Wu @ 2019-08-06  1:07 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: netdev, Stanislav Fomichev, Jakub Kicinski, Quentin Monnet

/proc/config has never existed as far as I can see, but /proc/config.gz
is present on Arch Linux. Execute an external gunzip program to avoid
linking to zlib and rework the option scanning code since a pipe is not
seekable. This also fixes a file handle leak on some error paths.

Fixes: 4567b983f78c ("tools: bpftool: add probes for kernel configuration options")
Cc: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Peter Wu <peter@lekensteyn.nl>
---
 v2: fix style (reorder vars as reverse xmas tree, rename function,
     braces), fallback to /proc/config.gz if uname() fails.

Hi,

Although Stanislav and Jakub suggested to use zlib in v1, I have not
implemented that yet since the current patch is quite minimal.

Using zlib instead of executing an external gzip program would like add
another 100-150 lines. It likely requires a bigger rewrite to avoid
getline() assuming that no temporary file is used for the uncompressed
config. If zlib is desired, I would suggest doing it in another patch.

Thoughts?

Kind regards,
Peter
---
 tools/bpf/bpftool/feature.c | 104 +++++++++++++++++++++---------------
 1 file changed, 60 insertions(+), 44 deletions(-)

diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index d672d9086fff..b9ade5a8bc3c 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -284,34 +284,35 @@ static void probe_jit_limit(void)
 	}
 }
 
-static char *get_kernel_config_option(FILE *fd, const char *option)
+static bool read_next_kernel_config_option(FILE *fd, char **buf_p, size_t *n_p,
+					   char **value)
 {
-	size_t line_n = 0, optlen = strlen(option);
-	char *res, *strval, *line = NULL;
-	ssize_t n;
+	ssize_t linelen;
+	char *sep;
 
-	rewind(fd);
-	while ((n = getline(&line, &line_n, fd)) > 0) {
-		if (strncmp(line, option, optlen))
-			continue;
-		/* Check we have at least '=', value, and '\n' */
-		if (strlen(line) < optlen + 3)
+	while ((linelen = getline(buf_p, n_p, fd)) > 0) {
+		char *line = *buf_p;
+
+		if (strncmp(line, "CONFIG_", 7))
 			continue;
-		if (*(line + optlen) != '=')
+
+		sep = memchr(line, '=', linelen);
+		if (!sep)
 			continue;
 
 		/* Trim ending '\n' */
-		line[strlen(line) - 1] = '\0';
+		line[linelen - 1] = '\0';
+
+		/* Split on '=' and ensure that a value is present. */
+		*sep = '\0';
+		if (!sep[1])
+			continue;
 
-		/* Copy and return config option value */
-		strval = line + optlen + 1;
-		res = strdup(strval);
-		free(line);
-		return res;
+		*value = sep + 1;
+		return true;
 	}
-	free(line);
 
-	return NULL;
+	return false;
 }
 
 static void probe_kernel_image_config(void)
@@ -386,31 +387,36 @@ static void probe_kernel_image_config(void)
 		/* test_bpf module for BPF tests */
 		"CONFIG_TEST_BPF",
 	};
+	char *values[ARRAY_SIZE(options)] = { };
 	char *value, *buf = NULL;
 	struct utsname utsn;
 	char path[PATH_MAX];
+	bool is_pipe = false;
+	FILE *fd = NULL;
 	size_t i, n;
 	ssize_t ret;
-	FILE *fd;
 
-	if (uname(&utsn))
-		goto no_config;
+	if (!uname(&utsn)) {
+		snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
 
-	snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
+		fd = fopen(path, "r");
+		if (!fd && errno != ENOENT)
+			p_info("Cannot open %s: %s", path, strerror(errno));
+	}
 
-	fd = fopen(path, "r");
-	if (!fd && errno == ENOENT) {
-		/* Some distributions put the config file at /proc/config, give
-		 * it a try.
-		 * Sometimes it is also at /proc/config.gz but we do not try
-		 * this one for now, it would require linking against libz.
+	if (!fd) {
+		/* Some distributions build with CONFIG_IKCONFIG=y and put the
+		 * config file at /proc/config.gz. We try to invoke an external
+		 * gzip program to avoid linking to libz.
+		 * Hide stderr to avoid interference with the JSON output.
 		 */
-		fd = fopen("/proc/config", "r");
+		fd = popen("gunzip -c /proc/config.gz 2>/dev/null", "r");
+		is_pipe = true;
 	}
 	if (!fd) {
 		p_info("skipping kernel config, can't open file: %s",
 		       strerror(errno));
-		goto no_config;
+		goto end_parse;
 	}
 	/* Sanity checks */
 	ret = getline(&buf, &n, fd);
@@ -418,27 +424,37 @@ static void probe_kernel_image_config(void)
 	if (!buf || !ret) {
 		p_info("skipping kernel config, can't read from file: %s",
 		       strerror(errno));
-		free(buf);
-		goto no_config;
+		goto end_parse;
 	}
 	if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) {
 		p_info("skipping kernel config, can't find correct file");
-		free(buf);
-		goto no_config;
+		goto end_parse;
+	}
+
+	while (read_next_kernel_config_option(fd, &buf, &n, &value)) {
+		for (i = 0; i < ARRAY_SIZE(options); i++) {
+			if (values[i] || strcmp(buf, options[i]))
+				continue;
+
+			values[i] = strdup(value);
+		}
+	}
+
+end_parse:
+	if (fd) {
+		if (is_pipe) {
+			if (pclose(fd))
+				p_info("failed to read /proc/config.gz");
+		} else {
+			fclose(fd);
+		}
 	}
 	free(buf);
 
 	for (i = 0; i < ARRAY_SIZE(options); i++) {
-		value = get_kernel_config_option(fd, options[i]);
-		print_kernel_option(options[i], value);
-		free(value);
+		print_kernel_option(options[i], values[i]);
+		free(values[i]);
 	}
-	fclose(fd);
-	return;
-
-no_config:
-	for (i = 0; i < ARRAY_SIZE(options); i++)
-		print_kernel_option(options[i], NULL);
 }
 
 static bool probe_bpf_syscall(const char *define_prefix)
-- 
2.22.0


^ permalink raw reply related

* Re: [PATCH v1 1/3] net: hisilicon: make hip04_tx_reclaim non-reentrant
From: Jakub Kicinski @ 2019-08-06  0:46 UTC (permalink / raw)
  To: Jiangfeng Xiao
  Cc: davem, yisen.zhuang, salil.mehta, netdev, linux-kernel, leeyou.li,
	xiaowei774, nixiaoming
In-Reply-To: <1564835501-90257-2-git-send-email-xiaojiangfeng@huawei.com>

On Sat, 3 Aug 2019 20:31:39 +0800, Jiangfeng Xiao wrote:
> If hip04_tx_reclaim is interrupted while it is running
> and then __napi_schedule continues to execute
> hip04_rx_poll->hip04_tx_reclaim, reentrancy occurs
> and oops is generated. So you need to mask the interrupt
> during the hip04_tx_reclaim run.

Napi poll method for the same napi instance can't be run concurrently.
Could you explain a little more what happens here?

Also looking at hip04_rx_poll() I don't think the interrupt re-enabling
logic guarantees the interrupt is not armed when NAPI is scheduled.
Please note that NAPI is no longer scheduled if napi_complete_done()
returned false.


^ permalink raw reply

* Re: [PATCH net-next v3] net: phy: broadcom: add 1000Base-X support for BCM54616S
From: Tao Ren @ 2019-08-06  0:11 UTC (permalink / raw)
  To: Heiner Kallweit, Vladimir Oltean
  Cc: Andrew Lunn, Florian Fainelli, David S . Miller,
	Arun Parameswaran, Justin Chen, netdev, lkml,
	openbmc@lists.ozlabs.org
In-Reply-To: <291a3c6e-ca8f-a9b8-a0b8-735a68dc04ea@gmail.com>

On 8/5/19 1:45 PM, Heiner Kallweit wrote:
> On 04.08.2019 21:22, Vladimir Oltean wrote:
>> On Sun, 4 Aug 2019 at 19:07, Heiner Kallweit <hkallweit1@gmail.com> wrote:
>>>
>>> On 04.08.2019 17:59, Vladimir Oltean wrote:
>>>> On Sun, 4 Aug 2019 at 17:52, Andrew Lunn <andrew@lunn.ch> wrote:
>>>>>
>>>>>>> The patchset looks better now. But is it ok, I wonder, to keep
>>>>>>> PHY_BCM_FLAGS_MODE_1000BX in phydev->dev_flags, considering that
>>>>>>> phy_attach_direct is overwriting it?
>>>>>>
>>>>>
>>>>>> I checked ftgmac100 driver (used on my machine) and it calls
>>>>>> phy_connect_direct which passes phydev->dev_flags when calling
>>>>>> phy_attach_direct: that explains why the flag is not cleared in my
>>>>>> case.
>>>>>
>>>>> Yes, that is the way it is intended to be used. The MAC driver can
>>>>> pass flags to the PHY. It is a fragile API, since the MAC needs to
>>>>> know what PHY is being used, since the flags are driver specific.
>>>>>
>>>>> One option would be to modify the assignment in phy_attach_direct() to
>>>>> OR in the flags passed to it with flags which are already in
>>>>> phydev->dev_flags.
>>>>>
>>>>>         Andrew
>>>>
>>>> Even if that were the case (patching phy_attach_direct to apply a
>>>> logical-or to dev_flags), it sounds fishy to me that the genphy code
>>>> is unable to determine that this PHY is running in 1000Base-X mode.
>>>>
>>>> In my opinion it all boils down to this warning:
>>>>
>>>> "PHY advertising (0,00000200,000062c0) more modes than genphy
>>>> supports, some modes not advertised".
>>>>
>>> The genphy code deals with Clause 22 + Gigabit BaseT only.
>>> Question is whether you want aneg at all in 1000Base-X mode and
>>> what you want the config_aneg callback to do.
>>> There may be some inspiration in the Marvel PHY drivers.
>>>
>>
>> AN for 1000Base-X still gives you duplex and pause frame settings. I
>> thought the base page format for exchanging that info is standardized
>> in clause 37.
>> Does genphy cover only copper media by design, or is it desirable to
>> augment genphy_read_status?
>>
> So far we care about copper only in phylib. Some constants needed for
> Clause 37 support are defined, but used by few drivers only.
> 
> ADVERTISE_1000XHALF
> ADVERTISE_1000XFULL
> ADVERTISE_1000XPAUSE
> ADVERTISE_1000XPSE_ASYM
> 
> I think it would make sense to have something like genphy_c37_config_aneg.
> Similar for read_status.

Thank you all for the inputs on this patch.

If I understand correctly, we are going to create a set of genphy_c37_* functions for 1000x support so it can be used by phy drivers? Or are we considering other options? What's your recommendation on this specific patch?


Thanks,

Tao

^ permalink raw reply

* Re: [PATCH 1/1] bpf: introduce new helper udp_flow_src_port
From: Jakub Kicinski @ 2019-08-06  0:10 UTC (permalink / raw)
  To: Y Song, Alexei Starovoitov; +Cc: Farid Zakaria, Daniel Borkmann, netdev, bpf
In-Reply-To: <CAH3MdRXTEN-Ra+61QA37hM2mkHx99K5NM7f+H6d8Em-bxvaenw@mail.gmail.com>

On Sat, 3 Aug 2019 23:52:16 -0700, Y Song wrote:
> >  include/uapi/linux/bpf.h                      | 21 +++++++--
> >  net/core/filter.c                             | 20 ++++++++
> >  tools/include/uapi/linux/bpf.h                | 21 +++++++--
> >  tools/testing/selftests/bpf/bpf_helpers.h     |  2 +
> >  .../bpf/prog_tests/udp_flow_src_port.c        | 28 +++++++++++
> >  .../bpf/progs/test_udp_flow_src_port_kern.c   | 47 +++++++++++++++++++
> >  6 files changed, 131 insertions(+), 8 deletions(-)
> >  create mode 100644 tools/testing/selftests/bpf/prog_tests/udp_flow_src_port.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/test_udp_flow_src_port_kern.c  
> 
> First, for each review, backport and sync with libbpf repo, in the future,
> could you break the patch to two patches?
>    1. kernel changes (net/core/filter.c, include/uapi/linux/bpf.h)
>    2. tools/include/uapi/linux/bpf.h
>    3. tools/testing/ changes

A lot of people get caught off by this, could explain why this is
necessary?

git can deal with this scenario without missing a step, format-patch
takes paths:

$ git show --oneline -s
1002f3e955d7 (HEAD) bpf: introduce new helper udp_flow_src_port

$ git format-patch HEAD~ -- tools/include/uapi/linux/bpf.h
0001-bpf-introduce-new-helper-udp_flow_src_port.patch

$ grep -B1 changed 0001-bpf-introduce-new-helper-udp_flow_src_port.patch 
 tools/include/uapi/linux/bpf.h | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

$ cd ../libbpf
$ git am -p2 ../linux/0001-bpf-introduce-new-helper-udp_flow_src_port.patch
Applying: bpf: introduce new helper udp_flow_src_port
error: patch failed: include/uapi/linux/bpf.h:2853
error: include/uapi/linux/bpf.h: patch does not apply
...

Well, the patch doesn't apply to libbpf right now, but git finds the
right paths and all that.

IMO it'd be good to not have this artificial process obstacle and all
the "sync headers" commits in the tree.

^ permalink raw reply

* hv_netvsc: WARNING: suspicious RCU usage?
From: Dexuan Cui @ 2019-08-05 23:55 UTC (permalink / raw)
  To: netdev@vger.kernel.org, Yidong Ren, Haiyang Zhang,
	Stephen Hemminger, David S. Miller
  Cc: linux-hyperv@vger.kernel.org

Hi,
After the VM boots up, I always get the below call-trace when I run "nload" for the first time:

[  113.910911] WARNING: suspicious RCU usage
[  113.913244] 5.2.0+ #19 Not tainted
[  113.915216] -----------------------------
[  113.917521] drivers/net/hyperv/netvsc_drv.c:1243 suspicious rcu_dereference_check() usage!
[  113.922191]
[  113.922191] other info that might help us debug this:
[  113.926573]
[  113.926573] rcu_scheduler_active = 2, debug_locks = 1
[  113.930052] 4 locks held by nload/1977:
[  113.932251]  #0: 0000000080b71e86 (&p->lock){+.+.}, at: seq_read+0x41/0x3d0
[  113.936115]  #1: 00000000cacff770 (&of->mutex){+.+.}, at: kernfs_seq_start+0x2a/0x90
[  113.940115]  #2: 00000000287c988f (kn->count#134){.+.+}, at: kernfs_seq_start+0x32/0x90
[  113.944292]  #3: 00000000996fa9cc (dev_base_lock){++.+}, at: netstat_show.isra.25+0x4a/0xb0
[  113.958076]
[  113.958076] stack backtrace:
[  113.958081] CPU: 3 PID: 1977 Comm: nload Not tainted 5.2.0+ #19
[  113.958083] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090006  04/28/2016
[  113.958084] Call Trace:
[  113.958091]  dump_stack+0x67/0x90
[  113.973663]  netvsc_get_stats64+0x159/0x170 [hv_netvsc]
[  113.973663]  dev_get_stats+0x55/0xb0
[  113.973663]  netstat_show.isra.25+0x5b/0xb0
[  113.973663]  dev_attr_show+0x15/0x40
[  113.981661]  sysfs_kf_seq_show+0xad/0xf0
[  113.981661]  seq_read+0x146/0x3d0
[  113.981661]  vfs_read+0x9c/0x160
[  113.989025]  ksys_read+0x5c/0xd0
[  113.989025]  do_syscall_64+0x5e/0x220
[  113.989025]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  113.989025] RIP: 0033:0x7f4485daaf31

nload is a console application which monitors network traffic and bandwidth usage in real time.

The warning is caused by the rcu_dereference_rtnl() :

1239 static void netvsc_get_stats64(struct net_device *net,
1240                                struct rtnl_link_stats64 *t)
1241 {
1242         struct net_device_context *ndev_ctx = netdev_priv(net);
1243         struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);

I think here netvsc_get_stats64() neither holds rcu_read_lock() nor RTNL

IMO it should call rcu_read_lock()/unlock(), or get RTNL to fix the warning?

Thanks,
-- Dexuan


^ permalink raw reply

* Re: [PATCH] tools: bpftool: fix reading from /proc/config.gz
From: Peter Wu @ 2019-08-05 23:54 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Stanislav Fomichev, Alexei Starovoitov, Daniel Borkmann, netdev,
	Stanislav Fomichev, Quentin Monnet
In-Reply-To: <20190805120649.421211da@cakuba.netronome.com>

Hi all,

Thank you for your quick feedback, I will address them in the next
revision.

On Mon, Aug 05, 2019 at 11:41:09AM +0100, Quentin Monnet wrote:

> As far as I understood (from examining Cilium [0]), /proc/config _is_
> used by some distributions, such as CoreOS. This is why we look at that
> location in bpftool.
> 
> [0] https://github.com/cilium/cilium/blob/master/bpf/run_probes.sh#L42

This comment[1] says "CoreOS uses /proc/config", but I think that is a
typo and is supposed to say "/proc/config.gz". The original feature
request[2] uses "/boot/config" as example.

 [1]: https://github.com/cilium/cilium/pull/1065
 [2]: https://github.com/cilium/cilium/issues/891

Given that "/proc/config.gz" is the standard since at least v2.6.12-rc2,
and the official kernel has no mention of "/proc/config", I would like
to skip the latter. If someone has a need for this and it is not covered
by either /boot/config-$(uname -r) or /proc/config.gz, they could submit
a patch for it with links to documentation. How about that?

> > -static char *get_kernel_config_option(FILE *fd, const char *option)
> > +static bool get_kernel_config_option(FILE *fd, char **buf_p, size_t *n_p,
> > +				     char **value)
> 
> Maybe we could rename this function, and have "next" appear in it
> somewhere? After your changes, it does not return the value for a
> specific option anymore.

I have changed it to "read_next_kernel_config_option", let me know if
you prefer an alternative.

> >  {
> > -	size_t line_n = 0, optlen = strlen(option);
> > -	char *res, *strval, *line = NULL;
> > -	ssize_t n;
> > +	char *sep;
> > +	ssize_t linelen;
> 
> Please order the declarations in reverse-Christmas tree style.

Does this refer to the type, name, or full line length? I did not find
this in CodingStyle, the closest I could get is:
https://lore.kernel.org/patchwork/patch/732076/

I will assume the line length for now.

> >  static void probe_kernel_image_config(void)
> > @@ -386,31 +386,34 @@ static void probe_kernel_image_config(void)
> >  		/* test_bpf module for BPF tests */
> >  		"CONFIG_TEST_BPF",
> >  	};
> > +	char *values[ARRAY_SIZE(options)] = { };
> >  	char *value, *buf = NULL;
> >  	struct utsname utsn;
> >  	char path[PATH_MAX];
> >  	size_t i, n;
> >  	ssize_t ret;
> > -	FILE *fd;
> > +	FILE *fd = NULL;
> > +	bool is_pipe = false;
> 
> Reverse-Christmas-tree style please.

Even if that means moving lines? Something like this?

        char path[PATH_MAX];
   +    bool is_pipe = false;
   +    FILE *fd = NULL;
        size_t i, n;
        ssize_t ret;
   -    FILE *fd;

> >  	if (uname(&utsn))
> > -		goto no_config;
> > +		goto end_parse;
> 
> Just thinking, maybe if uname() fails we can skip /boot/config-$(uname
> -r) but still attempt to parse /proc/config{,.gz} instead of printing
> only NULL options?

Good idea, I'll try a bit harder if uname falls for whatever reason.

> Because some distributions do use /proc/config, we should keep this. You
> can probably add /proc/config.gz as another attempt below (or even
> above) the current case?

I doubt it is actually in use, it looks like a typo in the original PR.
This post only lists /proc/config.gz, /boot/config and
/boot/config-$(uname -r): https://superuser.com/questions/287371

> > +	while (get_kernel_config_option(fd, &buf, &n, &value))> +		for (i = 0; i < ARRAY_SIZE(options); i++) {
> > +			if (values[i] || strcmp(buf, options[i]))
> 
> Can we have an option set multiple times in the config file? If so,
> maybe have a p_info() if values are different to warn users that
> conflicting values were found?

scripts/kconfig/merge_config.sh seems to apply a merge strategy,
overwriting earlier values and warning about it. However this should be
rare given that it ended up at /proc/config.gz. For now I will favor
simplicity over complexity and keep the old situation. Let me know if
you prefer otherwise.


On Mon, Aug 05, 2019 at 12:06:49PM -0700, Jakub Kicinski wrote:
> On Mon, 5 Aug 2019 08:29:36 -0700, Stanislav Fomichev wrote:
> > On 08/05, Peter Wu wrote:
> > > /proc/config has never existed as far as I can see, but /proc/config.gz
> > > is present on Arch Linux. Execute an external gunzip program to avoid
> > > linking to zlib and rework the option scanning code since a pipe is not
> > > seekable. This also fixes a file handle leak on some error paths.  
> > Thanks for doing that! One question: why not link against -lz instead?
> > With fork/execing gunzip you're just hiding this dependency.
> > 
> > You can add something like this to the Makefile:
> > ifeq ($(feature-zlib),1)
> > CLFAGS += -DHAVE_ZLIB
> > endif
> > 
> > And then conditionally add support for config.gz. Thoughts?
> 
> +1

Given that the old code did not have this library dependency I did not
add it (the program would otherwise fail to run). Executing an external
process is similar to what tar does. I will look into linking directly
to zlib, thanks!
-- 
Kind regards,
Peter Wu
https://lekensteyn.nl

^ permalink raw reply

* Re: [PATCH 00/12] block/bio, fs: convert put_page() to put_user_page*()
From: John Hubbard @ 2019-08-05 22:54 UTC (permalink / raw)
  To: Christoph Hellwig, john.hubbard
  Cc: Andrew Morton, Alexander Viro, Anna Schumaker, David S . Miller,
	Dominique Martinet, Eric Van Hensbergen, Jason Gunthorpe,
	Jason Wang, Jens Axboe, Latchesar Ionkov, Michael S . Tsirkin,
	Miklos Szeredi, Trond Myklebust, Christoph Hellwig,
	Matthew Wilcox, linux-mm, LKML, ceph-devel, kvm, linux-block,
	linux-cifs, linux-fsdevel, linux-nfs, linux-rdma, netdev,
	samba-technical, v9fs-developer, virtualization
In-Reply-To: <20190724061750.GA19397@infradead.org>

On 7/23/19 11:17 PM, Christoph Hellwig wrote:
> On Tue, Jul 23, 2019 at 09:25:06PM -0700, john.hubbard@gmail.com wrote:
>> * Store, in the iov_iter, a "came from gup (get_user_pages)" parameter.
>>   Then, use the new iov_iter_get_pages_use_gup() to retrieve it when
>>   it is time to release the pages. That allows choosing between put_page()
>>   and put_user_page*().
>>
>> * Pass in one more piece of information to bio_release_pages: a "from_gup"
>>   parameter. Similar use as above.
>>
>> * Change the block layer, and several file systems, to use
>>   put_user_page*().
> 
> I think we can do this in a simple and better way.  We have 5 ITER_*
> types.  Of those ITER_DISCARD as the name suggests never uses pages, so
> we can skip handling it.  ITER_PIPE is rejected іn the direct I/O path,
> which leaves us with three.
> 

Hi Christoph,

Are you working on anything like this? Or on the put_user_bvec() idea?
Please let me know, otherwise I'll go in and implement something here.


thanks,
-- 
John Hubbard
NVIDIA

> Out of those ITER_BVEC needs a user page reference, so we want to call
> put_user_page* on it.  ITER_BVEC always already has page reference,
> which means in the block direct I/O path path we alread don't take
> a page reference.  We should extent that handling to all other calls
> of iov_iter_get_pages / iov_iter_get_pages_alloc.  I think we should
> just reject ITER_KVEC for direct I/O as well as we have no users and
> it is rather pointless.  Alternatively if we see a use for it the
> callers should always have a life page reference anyway (or might
> be on kmalloc memory), so we really should not take a reference either.
> 
> In other words:  the only time we should ever have to put a page in
> this patch is when they are user pages.  We'll need to clean up
> various bits of code for that, but that can be done gradually before
> even getting to the actual put_user_pages conversion.
> 

^ permalink raw reply

* [PATCH v2 net-next] selftests: Add l2tp tests
From: David Ahern @ 2019-08-05 22:41 UTC (permalink / raw)
  To: davem; +Cc: netdev, David Ahern

From: David Ahern <dsahern@gmail.com>

Add IPv4 and IPv6 l2tp tests. Current set is over IP and with
IPsec.

v2
- add l2tp.sh to TEST_PROGS in Makefile

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 tools/testing/selftests/net/Makefile |   2 +-
 tools/testing/selftests/net/l2tp.sh  | 382 +++++++++++++++++++++++++++++++++++
 2 files changed, 383 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/net/l2tp.sh

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 70f2d6656170..0bd6b23c97ef 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -10,7 +10,7 @@ TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
 TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
 TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
 TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
-TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh
+TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh
new file mode 100644
index 000000000000..5782433886fc
--- /dev/null
+++ b/tools/testing/selftests/net/l2tp.sh
@@ -0,0 +1,382 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# L2TPv3 tunnel between 2 hosts
+#
+#            host-1          |   router   |     host-2
+#                            |            |
+#      lo          l2tp      |            |      l2tp           lo
+# 172.16.101.1  172.16.1.1   |            | 172.16.1.2    172.16.101.2
+#  fc00:101::1   fc00:1::1   |            |   fc00:1::2    fc00:101::2
+#                            |            |
+#                  eth0      |            |     eth0
+#                10.1.1.1    |            |   10.1.2.1
+#              2001:db8:1::1 |            | 2001:db8:2::1
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+################################################################################
+#
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+run_cmd()
+{
+	local ns
+	local cmd
+	local out
+	local rc
+
+	ns="$1"
+	shift
+	cmd="$*"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "    COMMAND: $cmd\n"
+	fi
+
+	out=$(eval ip netns exec ${ns} ${cmd} 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
+}
+
+################################################################################
+# create namespaces and interconnects
+
+create_ns()
+{
+	local ns=$1
+	local addr=$2
+	local addr6=$3
+
+	[ -z "${addr}" ] && addr="-"
+	[ -z "${addr6}" ] && addr6="-"
+
+	ip netns add ${ns}
+
+	ip -netns ${ns} link set lo up
+	if [ "${addr}" != "-" ]; then
+		ip -netns ${ns} addr add dev lo ${addr}
+	fi
+	if [ "${addr6}" != "-" ]; then
+		ip -netns ${ns} -6 addr add dev lo ${addr6}
+	fi
+
+	ip -netns ${ns} ro add unreachable default metric 8192
+	ip -netns ${ns} -6 ro add unreachable default metric 8192
+
+	ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+# create veth pair to connect namespaces and apply addresses.
+connect_ns()
+{
+	local ns1=$1
+	local ns1_dev=$2
+	local ns1_addr=$3
+	local ns1_addr6=$4
+	local ns2=$5
+	local ns2_dev=$6
+	local ns2_addr=$7
+	local ns2_addr6=$8
+
+	ip -netns ${ns1} li add ${ns1_dev} type veth peer name tmp
+	ip -netns ${ns1} li set ${ns1_dev} up
+	ip -netns ${ns1} li set tmp netns ${ns2} name ${ns2_dev}
+	ip -netns ${ns2} li set ${ns2_dev} up
+
+	if [ "${ns1_addr}" != "-" ]; then
+		ip -netns ${ns1} addr add dev ${ns1_dev} ${ns1_addr}
+		ip -netns ${ns2} addr add dev ${ns2_dev} ${ns2_addr}
+	fi
+
+	if [ "${ns1_addr6}" != "-" ]; then
+		ip -netns ${ns1} addr add dev ${ns1_dev} ${ns1_addr6}
+		ip -netns ${ns2} addr add dev ${ns2_dev} ${ns2_addr6}
+	fi
+}
+
+################################################################################
+# test setup
+
+cleanup()
+{
+	local ns
+
+	for ns in host-1 host-2 router
+	do
+		ip netns del ${ns} 2>/dev/null
+	done
+}
+
+setup_l2tp_ipv4()
+{
+	#
+	# configure l2tpv3 tunnel on host-1
+	#
+	ip -netns host-1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \
+			 encap ip local 10.1.1.1 remote 10.1.2.1
+	ip -netns host-1 l2tp add session name l2tp4 tunnel_id 1041 \
+			 session_id 1041 peer_session_id 1042
+	ip -netns host-1 link set dev l2tp4 up
+	ip -netns host-1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2
+
+	#
+	# configure l2tpv3 tunnel on host-2
+	#
+	ip -netns host-2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \
+			 encap ip local 10.1.2.1 remote 10.1.1.1
+	ip -netns host-2 l2tp add session name l2tp4 tunnel_id 1042 \
+			 session_id 1042 peer_session_id 1041
+	ip -netns host-2 link set dev l2tp4 up
+	ip -netns host-2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1
+
+	#
+	# add routes to loopback addresses
+	#
+	ip -netns host-1 ro add 172.16.101.2/32 via 172.16.1.2
+	ip -netns host-2 ro add 172.16.101.1/32 via 172.16.1.1
+}
+
+setup_l2tp_ipv6()
+{
+	#
+	# configure l2tpv3 tunnel on host-1
+	#
+	ip -netns host-1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \
+			 encap ip local 2001:db8:1::1 remote 2001:db8:2::1
+	ip -netns host-1 l2tp add session name l2tp6 tunnel_id 1061 \
+			 session_id 1061 peer_session_id 1062
+	ip -netns host-1 link set dev l2tp6 up
+	ip -netns host-1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2
+
+	#
+	# configure l2tpv3 tunnel on host-2
+	#
+	ip -netns host-2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \
+			 encap ip local 2001:db8:2::1 remote 2001:db8:1::1
+	ip -netns host-2 l2tp add session name l2tp6 tunnel_id 1062 \
+			 session_id 1062 peer_session_id 1061
+	ip -netns host-2 link set dev l2tp6 up
+	ip -netns host-2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1
+
+	#
+	# add routes to loopback addresses
+	#
+	ip -netns host-1 -6 ro add fc00:101::2/128 via fc00:1::2
+	ip -netns host-2 -6 ro add fc00:101::1/128 via fc00:1::1
+}
+
+setup()
+{
+	# start clean
+	cleanup
+
+	set -e
+	create_ns host-1 172.16.101.1/32 fc00:101::1/128
+	create_ns host-2 172.16.101.2/32 fc00:101::2/128
+	create_ns router
+
+	connect_ns host-1 eth0 10.1.1.1/24 2001:db8:1::1/64 \
+	           router eth1 10.1.1.2/24 2001:db8:1::2/64
+
+	connect_ns host-2 eth0 10.1.2.1/24 2001:db8:2::1/64 \
+	           router eth2 10.1.2.2/24 2001:db8:2::2/64
+
+	ip -netns host-1 ro add 10.1.2.0/24 via 10.1.1.2
+	ip -netns host-1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2
+
+	ip -netns host-2 ro add 10.1.1.0/24 via 10.1.2.2
+	ip -netns host-2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2
+
+	setup_l2tp_ipv4
+	setup_l2tp_ipv6
+	set +e
+}
+
+setup_ipsec()
+{
+	#
+	# IPv4
+	#
+	run_cmd host-1 ip xfrm policy add \
+		src 10.1.1.1 dst 10.1.2.1 dir out \
+		tmpl proto esp mode transport
+
+	run_cmd host-1 ip xfrm policy add \
+		src 10.1.2.1 dst 10.1.1.1 dir in \
+		tmpl proto esp mode transport
+
+	run_cmd host-2 ip xfrm policy add \
+		src 10.1.1.1 dst 10.1.2.1 dir in \
+		tmpl proto esp mode transport
+
+	run_cmd host-2 ip xfrm policy add \
+		src 10.1.2.1 dst 10.1.1.1 dir out \
+		tmpl proto esp mode transport
+
+	ip -netns host-1 xfrm state add \
+		src 10.1.1.1 dst 10.1.2.1 \
+		spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
+		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+	ip -netns host-1 xfrm state add \
+		src 10.1.2.1 dst 10.1.1.1 \
+		spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
+		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+	ip -netns host-2 xfrm state add \
+		src 10.1.1.1 dst 10.1.2.1 \
+		spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
+		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+	ip -netns host-2 xfrm state add \
+		src 10.1.2.1 dst 10.1.1.1 \
+		spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
+		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+	#
+	# IPV6
+	#
+	run_cmd host-1 ip -6 xfrm policy add \
+		src 2001:db8:1::1 dst 2001:db8:2::1 dir out \
+		tmpl proto esp mode transport
+
+	run_cmd host-1 ip -6 xfrm policy add \
+		src 2001:db8:2::1 dst 2001:db8:1::1 dir in \
+		tmpl proto esp mode transport
+
+	run_cmd host-2 ip -6 xfrm policy add \
+		src 2001:db8:1::1 dst 2001:db8:2::1 dir in \
+		tmpl proto esp mode transport
+
+	run_cmd host-2 ip -6 xfrm policy add \
+		src 2001:db8:2::1 dst 2001:db8:1::1 dir out \
+		tmpl proto esp mode transport
+
+	ip -netns host-1 -6 xfrm state add \
+		src 2001:db8:1::1 dst 2001:db8:2::1 \
+		spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
+		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+	ip -netns host-1 -6 xfrm state add \
+		src 2001:db8:2::1 dst 2001:db8:1::1 \
+		spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
+		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+	ip -netns host-2 -6 xfrm state add \
+		src 2001:db8:1::1 dst 2001:db8:2::1 \
+		spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
+		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+	ip -netns host-2 -6 xfrm state add \
+		src 2001:db8:2::1 dst 2001:db8:1::1 \
+		spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
+		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+}
+
+teardown_ipsec()
+{
+	run_cmd host-1 ip xfrm state flush
+	run_cmd host-1 ip xfrm policy flush
+	run_cmd host-2 ip xfrm state flush
+	run_cmd host-2 ip xfrm policy flush
+}
+
+################################################################################
+# generate traffic through tunnel for various cases
+
+run_ping()
+{
+	local desc="$1"
+
+	run_cmd host-1 ping -c1 -w1 172.16.1.2
+	log_test $? 0 "IPv4 basic L2TP tunnel ${desc}"
+
+	run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
+	log_test $? 0 "IPv4 route through L2TP tunnel ${desc}"
+
+	run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2
+	log_test $? 0 "IPv6 basic L2TP tunnel ${desc}"
+
+	run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
+	log_test $? 0 "IPv6 route through L2TP tunnel ${desc}"
+}
+
+run_tests()
+{
+	local desc
+
+	setup
+	run_ping
+
+	setup_ipsec
+	run_ping "- with IPsec"
+	run_cmd host-1 ping -c1 -w1 172.16.1.2
+	log_test $? 0 "IPv4 basic L2TP tunnel ${desc}"
+
+	run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
+	log_test $? 0 "IPv4 route through L2TP tunnel ${desc}"
+
+	run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2
+	log_test $? 0 "IPv6 basic L2TP tunnel - with IPsec"
+
+	run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
+	log_test $? 0 "IPv6 route through L2TP tunnel - with IPsec"
+
+	teardown_ipsec
+	run_ping "- after IPsec teardown"
+}
+
+################################################################################
+# main
+
+declare -i nfail=0
+declare -i nsuccess=0
+
+while getopts :pv o
+do
+	case $o in
+		p) PAUSE_ON_FAIL=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		*) exit 1;;
+	esac
+done
+
+run_tests
+cleanup
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n"   ${nfail}
-- 
2.11.0


^ permalink raw reply related

* [PATCH net 2/2] net: docs: replace IPX in tuntap documentation
From: Stephen Hemminger @ 2019-08-05 22:30 UTC (permalink / raw)
  To: davem, corbet; +Cc: linux-dog, netdev, Stephen Hemminger
In-Reply-To: <20190805223003.13444-1-stephen@networkplumber.org>

IPX is no longer supported, but the example in the documentation
might useful. Replace it with IPv6.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 Documentation/networking/tuntap.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/tuntap.txt b/Documentation/networking/tuntap.txt
index 949d5dcdd9a3..0104830d5075 100644
--- a/Documentation/networking/tuntap.txt
+++ b/Documentation/networking/tuntap.txt
@@ -204,8 +204,8 @@ Ethernet device, which instead of receiving packets from a physical
 media, receives them from user space program and instead of sending 
 packets via physical media sends them to the user space program. 
 
-Let's say that you configured IPX on the tap0, then whenever 
-the kernel sends an IPX packet to tap0, it is passed to the application
+Let's say that you configured IPv6 on the tap0, then whenever
+the kernel sends an IPv6 packet to tap0, it is passed to the application
 (VTun for example). The application encrypts, compresses and sends it to 
 the other side over TCP or UDP. The application on the other side decompresses
 and decrypts the data received and writes the packet to the TAP device, 
-- 
2.20.1


^ permalink raw reply related

* [PATCH net 1/2] docs: admin-guide: remove references to IPX and token-ring
From: Stephen Hemminger @ 2019-08-05 22:30 UTC (permalink / raw)
  To: davem, corbet; +Cc: linux-dog, netdev, Stephen Hemminger

Both IPX and TR have not been supported for a while now.
Remove them from the /proc/sys/net documentation.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 Documentation/admin-guide/sysctl/net.rst | 29 +-----------------------
 1 file changed, 1 insertion(+), 28 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index a7d44e71019d..287b98708a40 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -39,7 +39,6 @@ Table : Subdirectories in /proc/sys/net
  802       E802 protocol         ax25       AX25
  ethernet  Ethernet protocol     rose       X.25 PLP layer
  ipv4      IP version 4          x25        X.25 protocol
- ipx       IPX                   token-ring IBM token ring
  bridge    Bridging              decnet     DEC net
  ipv6      IP version 6          tipc       TIPC
  ========= =================== = ========== ==================
@@ -401,33 +400,7 @@ interface.
 (network) that the route leads to, the router (may be directly connected), the
 route flags, and the device the route is using.
 
-
-5. IPX
-------
-
-The IPX protocol has no tunable values in proc/sys/net.
-
-The IPX  protocol  does,  however,  provide  proc/net/ipx. This lists each IPX
-socket giving  the  local  and  remote  addresses  in  Novell  format (that is
-network:node:port). In  accordance  with  the  strange  Novell  tradition,
-everything but the port is in hex. Not_Connected is displayed for sockets that
-are not  tied to a specific remote address. The Tx and Rx queue sizes indicate
-the number  of  bytes  pending  for  transmission  and  reception.  The  state
-indicates the  state  the  socket  is  in and the uid is the owning uid of the
-socket.
-
-The /proc/net/ipx_interface  file lists all IPX interfaces. For each interface
-it gives  the network number, the node number, and indicates if the network is
-the primary  network.  It  also  indicates  which  device  it  is bound to (or
-Internal for  internal  networks)  and  the  Frame  Type if appropriate. Linux
-supports 802.3,  802.2,  802.2  SNAP  and DIX (Blue Book) ethernet framing for
-IPX.
-
-The /proc/net/ipx_route  table  holds  a list of IPX routes. For each route it
-gives the  destination  network, the router node (or Directly) and the network
-address of the router (or Connected) for internal networks.
-
-6. TIPC
+5. TIPC
 -------
 
 tipc_rmem
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Andy Lutomirski @ 2019-08-05 22:21 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Alexei Starovoitov, Song Liu, Kees Cook, Networking, bpf,
	Alexei Starovoitov, Daniel Borkmann, Kernel Team, Lorenz Bauer,
	Jann Horn, Greg KH, Linux API, LSM List
In-Reply-To: <CALCETrVtPs8gY-H4gmzSqPboid3CB++n50SvYd6RU9YVde_-Ow@mail.gmail.com>



> On Aug 5, 2019, at 2:25 PM, Andy Lutomirski <luto@kernel.org> wrote:
> 
> On Mon, Aug 5, 2019 at 12:21 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:


> 
>> What we need is to drop privileges sooner in daemons like systemd.
> 
> This is doable right now: systemd could fork off a subprocess and
> delegate its cgroup operations to it.  It would be maybe a couple
> hundred lines of code.  As an added benefit, that subprocess could
> verify that the bpf operations in question are reasonable.
> Alternatively, if there was a CAP_BPF_ADMIN, systemd could retain that
> capability and flip it on and off as needed.

I tried to look at the code and I couldn’t find it. Does systemd drop privileges at all?  Can you point me at the code you’re thinking of

^ permalink raw reply

* Re: [PATCH 10/16] net: phy: adin: add EEE translation layer for Clause 22
From: Andrew Lunn @ 2019-08-05 22:11 UTC (permalink / raw)
  To: Alexandru Ardelean
  Cc: netdev, devicetree, linux-kernel, davem, robh+dt, mark.rutland,
	f.fainelli, hkallweit1
In-Reply-To: <20190805165453.3989-11-alexandru.ardelean@analog.com>

> +static int adin_cl22_to_adin_reg(int devad, u16 cl22_regnum)
> +{
> +	struct clause22_mmd_map *m;
> +	int i;
> +
> +	if (devad == MDIO_MMD_VEND1)
> +		return cl22_regnum;
> +
> +	for (i = 0; i < ARRAY_SIZE(clause22_mmd_map); i++) {
> +		m = &clause22_mmd_map[i];
> +		if (m->devad == devad && m->cl22_regnum == cl22_regnum)
> +			return m->adin_regnum;
> +	}
> +
> +	pr_err("No translation available for devad: %d reg: %04x\n",
> +	       devad, cl22_regnum);

phydev_err(). 

	      Andrew

^ permalink raw reply

* Re: [PATCH] dt-bindings: net: meson-dwmac: convert to yaml
From: Rob Herring @ 2019-08-05 22:09 UTC (permalink / raw)
  To: Neil Armstrong
  Cc: Martin Blumenstingl, devicetree, netdev, linux-amlogic,
	moderated list:ARM/FREESCALE IMX / MXC ARM ARCHITECTURE,
	linux-kernel@vger.kernel.org
In-Reply-To: <20190805122558.5130-1-narmstrong@baylibre.com>

On Mon, Aug 5, 2019 at 6:26 AM Neil Armstrong <narmstrong@baylibre.com> wrote:
>
> Now that we have the DT validation in place, let's convert the device tree
> bindings for the Synopsys DWMAC Glue for Amlogic SoCs over to a YAML schemas.
>
> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
> ---
> Rob,
>
> I keep getting :
> .../devicetree/bindings/net/amlogic,meson-dwmac.example.dt.yaml: ethernet@c9410000: reg: [[3376480256, 65536], [3364046144, 8]] is too long

Because snps,dwmac.yaml has:

  reg:
    maxItems: 1

The schemas are applied separately and all have to be valid. You'll
need to change snps,dwmac.yaml to:

reg:
  minItems: 1
  maxItems: 2


The schema error messages leave something to be desired. I wish the
error messages said which schema is throwing the error.

> for the example DT
>
> and for the board DT :
> ../amlogic/meson-gxl-s905x-libretech-cc.dt.yaml: ethernet@c9410000: reg: [[0, 3376480256, 0, 65536, 0, 3364046144, 0, 4]] is too short
> ../amlogic/meson-gxl-s905x-nexbox-a95x.dt.yaml: soc: ethernet@c9410000:reg:0: [0, 3376480256, 0, 65536, 0, 3364046144, 0, 4] is too long
>
> and I don't know how to get rid of it.

The first issue is the same as the above. The 2nd issue is the use of
<> in dts files becomes stricter with the schema. Each entry in an
array needs to be bracketed:

reg = <0x0 0xc9410000 0x0 0x10000>,
          <0x0 0xc8834540 0x0 0x4>;

Rob

^ permalink raw reply

* Re: [Intel-wired-lan] [PATCH 1/2] ixgbe: Explicitly initialize reference count to 0
From: Alexander Duyck @ 2019-08-05 22:03 UTC (permalink / raw)
  To: Bowers, AndrewX
  Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	intel-wired-lan@lists.osuosl.org
In-Reply-To: <26D9FDECA4FBDD4AADA65D8E2FC68A4A1D40F162@ORSMSX104.amr.corp.intel.com>

On Mon, Aug 5, 2019 at 2:42 PM Bowers, AndrewX <andrewx.bowers@intel.com> wrote:
>
> > -----Original Message-----
> > From: Intel-wired-lan [mailto:intel-wired-lan-bounces@osuosl.org] On
> > Behalf Of Chuhong Yuan
> > Sent: Friday, August 2, 2019 3:55 AM
> > Cc: netdev@vger.kernel.org; Chuhong Yuan <hslester96@gmail.com>; linux-
> > kernel@vger.kernel.org; intel-wired-lan@lists.osuosl.org; David S . Miller
> > <davem@davemloft.net>
> > Subject: [Intel-wired-lan] [PATCH 1/2] ixgbe: Explicitly initialize reference
> > count to 0
> >
> > The driver does not explicitly call atomic_set to initialize refcount to 0.
> > Add the call so that it will be more straight forward to convert refcount from
> > atomic_t to refcount_t.
> >
> > Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
> > ---
> >  drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 1 +
> >  1 file changed, 1 insertion(+)
>
> Tested-by: Andrew Bowers <andrewx.bowers@intel.com>

NAK. This patch is badly broken. We should not be resetting the fcoe
refcnt in ixgbe_setup_fcoe_ddp_resources.

^ permalink raw reply

* Re: [Intel-wired-lan] [PATCH 2/2] ixgbe: Use refcount_t for refcount
From: Alexander Duyck @ 2019-08-05 22:02 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Chuhong Yuan, Network Development, intel-wired-lan, linux-kernel,
	David S . Miller
In-Reply-To: <CA+FuTScLs-qJApj5Yw9OOjVk4++HSjn__Vdy+xX2V1rpWU8uLg@mail.gmail.com>

On Fri, Aug 2, 2019 at 6:47 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Fri, Aug 2, 2019 at 6:55 AM Chuhong Yuan <hslester96@gmail.com> wrote:
> >
> > refcount_t is better for reference counters since its
> > implementation can prevent overflows.
> > So convert atomic_t ref counters to refcount_t.
> >
> > Also convert refcount from 0-based to 1-based.
> >
> > This patch depends on PATCH 1/2.
> >
> > Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
> > ---
> >  drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 6 +++---
> >  drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h | 2 +-
> >  2 files changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
> > index 00710f43cfd2..d313d00065cd 100644
> > --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
> > +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
> > @@ -773,7 +773,7 @@ int ixgbe_setup_fcoe_ddp_resources(struct ixgbe_adapter *adapter)
> >
> >         fcoe->extra_ddp_buffer = buffer;
> >         fcoe->extra_ddp_buffer_dma = dma;
> > -       atomic_set(&fcoe->refcnt, 0);
> > +       refcount_set(&fcoe->refcnt, 1);
>
> Same point as in the cxgb4 driver patch: how can you just change the
> initial value without modifying the condition for release?
>
> This is not a suggestion to resubmit all these changes again with a
> change to the release condition.

So I am pretty sure this patch is badly broken. It doesn't make any
sense to be resetting with the refcnt in
ixgbe_setup_fcoe_ddp_resources. The value is initialized to zero when
the adapter structure was allocated.

Consider this a NAK from me.

^ permalink raw reply

* Re: [Intel-wired-lan] [PATCH 2/2] ixgbe: Use refcount_t for refcount
From: Willem de Bruijn @ 2019-08-05 21:59 UTC (permalink / raw)
  To: Bowers, AndrewX
  Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	intel-wired-lan@lists.osuosl.org
In-Reply-To: <26D9FDECA4FBDD4AADA65D8E2FC68A4A1D40F174@ORSMSX104.amr.corp.intel.com>

On Mon, Aug 5, 2019 at 5:43 PM Bowers, AndrewX <andrewx.bowers@intel.com> wrote:
>
> > -----Original Message-----
> > From: Intel-wired-lan [mailto:intel-wired-lan-bounces@osuosl.org] On
> > Behalf Of Chuhong Yuan
> > Sent: Friday, August 2, 2019 3:55 AM
> > Cc: netdev@vger.kernel.org; Chuhong Yuan <hslester96@gmail.com>; linux-
> > kernel@vger.kernel.org; intel-wired-lan@lists.osuosl.org; David S . Miller
> > <davem@davemloft.net>
> > Subject: [Intel-wired-lan] [PATCH 2/2] ixgbe: Use refcount_t for refcount
> >
> > refcount_t is better for reference counters since its implementation can
> > prevent overflows.
> > So convert atomic_t ref counters to refcount_t.
> >
> > Also convert refcount from 0-based to 1-based.
> >
> > This patch depends on PATCH 1/2.
> >
> > Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
> > ---
> >  drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 6 +++---
> > drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h | 2 +-
> >  2 files changed, 4 insertions(+), 4 deletions(-)
>
> Tested-by: Andrew Bowers <andrewx.bowers@intel.com>

To reiterate, this patchset should not be applied as is. It is not
correct to simply change the initial refcount.

^ permalink raw reply

* RE: [Intel-wired-lan] [PATCH][net-next] ice: fix potential infinite loop
From: Bowers, AndrewX @ 2019-08-05 21:49 UTC (permalink / raw)
  To: intel-wired-lan@lists.osuosl.org, netdev@vger.kernel.org
  Cc: kernel-janitors@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <20190802155217.16996-1-colin.king@canonical.com>

> -----Original Message-----
> From: Intel-wired-lan [mailto:intel-wired-lan-bounces@osuosl.org] On
> Behalf Of Colin King
> Sent: Friday, August 2, 2019 8:52 AM
> To: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; David S . Miller
> <davem@davemloft.net>; intel-wired-lan@lists.osuosl.org;
> netdev@vger.kernel.org
> Cc: kernel-janitors@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: [Intel-wired-lan] [PATCH][net-next] ice: fix potential infinite loop
> 
> From: Colin Ian King <colin.king@canonical.com>
> 
> The loop counter of a for-loop is a u8 however this is being compared to an
> int upper bound and this can lead to an infinite loop if the upper bound is
> greater than 255 since the loop counter will wrap back to zero. Fix this
> potential issue by making the loop counter an int.
> 
> Addresses-Coverity: ("Infinite loop")
> Fixes: c7aeb4d1b9bf ("ice: Disable VFs until reset is completed")
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
> ---
>  drivers/net/ethernet/intel/ice/ice_main.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Tested-by: Andrew Bowers <andrewx.bowers@intel.com>



^ permalink raw reply

* Re: pull-request: can 2019-08-02
From: David Miller @ 2019-08-05 21:45 UTC (permalink / raw)
  To: mkl; +Cc: netdev, linux-can, kernel
In-Reply-To: <20190802120038.18154-1-mkl@pengutronix.de>

From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Fri,  2 Aug 2019 14:00:34 +0200

> this is a pull request of 4 patches for net/master.
> 
> The first two patches are by Wang Xiayang, they force that the string buffer
> during a dev_info() is properly NULL terminated.
> 
> The last two patches are by Tomas Bortoli and fix both a potential info leak of
> kernel memory to USB devices.

Pulled, thanks Marc.

^ permalink raw reply

* Re: linux-next: Signed-off-by missing for commit in the net tree
From: David Miller @ 2019-08-05 21:43 UTC (permalink / raw)
  To: sfr; +Cc: netdev, linux-next, linux-kernel
In-Reply-To: <20190806073825.6e6ba393@canb.auug.org.au>

From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 6 Aug 2019 07:38:25 +1000

> Commit
> 
>   c3953a3c2d31 ("NFC: nfcmrvl: fix gpio-handling regression")
> 
> is missing a Signed-off-by from its committer.

That has to be the first time that's ever happened to me :-)

And indeed as I check my command line history I forgot the --signoff
command line option.


^ permalink raw reply

* Re: [PATCH net-next v2] openvswitch: Print error when ovs_execute_actions() fails
From: Yifeng Sun @ 2019-08-05 21:43 UTC (permalink / raw)
  To: Pravin Shelar; +Cc: Linux Kernel Network Developers, Greg Rose
In-Reply-To: <CAOrHB_C758HjLJxb3jzAn0Wy1a_m4G2o4gsqMDdhJ9PRdT4GUg@mail.gmail.com>

Thanks Pravin!

Best,
Yifeng

On Mon, Aug 5, 2019 at 1:49 PM Pravin Shelar <pshelar@ovn.org> wrote:
>
> On Sun, Aug 4, 2019 at 7:56 PM Yifeng Sun <pkusunyifeng@gmail.com> wrote:
> >
> > Currently in function ovs_dp_process_packet(), return values of
> > ovs_execute_actions() are silently discarded. This patch prints out
> > an debug message when error happens so as to provide helpful hints
> > for debugging.
> > ---
> > v1->v2: Fixed according to Pravin's review.
> >
>
> Looks good.
> Acked-by: Pravin B Shelar <pshelar@ovn.org>
>
> Thanks,
> Pravin.

^ permalink raw reply

* RE: [Intel-wired-lan] [PATCH 2/2] ixgbe: Use refcount_t for refcount
From: Bowers, AndrewX @ 2019-08-05 21:43 UTC (permalink / raw)
  To: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	intel-wired-lan@lists.osuosl.org
In-Reply-To: <20190802105507.16650-1-hslester96@gmail.com>

> -----Original Message-----
> From: Intel-wired-lan [mailto:intel-wired-lan-bounces@osuosl.org] On
> Behalf Of Chuhong Yuan
> Sent: Friday, August 2, 2019 3:55 AM
> Cc: netdev@vger.kernel.org; Chuhong Yuan <hslester96@gmail.com>; linux-
> kernel@vger.kernel.org; intel-wired-lan@lists.osuosl.org; David S . Miller
> <davem@davemloft.net>
> Subject: [Intel-wired-lan] [PATCH 2/2] ixgbe: Use refcount_t for refcount
> 
> refcount_t is better for reference counters since its implementation can
> prevent overflows.
> So convert atomic_t ref counters to refcount_t.
> 
> Also convert refcount from 0-based to 1-based.
> 
> This patch depends on PATCH 1/2.
> 
> Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 6 +++---
> drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h | 2 +-
>  2 files changed, 4 insertions(+), 4 deletions(-)

Tested-by: Andrew Bowers <andrewx.bowers@intel.com>



^ permalink raw reply

* RE: [Intel-wired-lan] [PATCH 1/2] ixgbe: Explicitly initialize reference count to 0
From: Bowers, AndrewX @ 2019-08-05 21:42 UTC (permalink / raw)
  To: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	intel-wired-lan@lists.osuosl.org
In-Reply-To: <20190802105457.16596-1-hslester96@gmail.com>

> -----Original Message-----
> From: Intel-wired-lan [mailto:intel-wired-lan-bounces@osuosl.org] On
> Behalf Of Chuhong Yuan
> Sent: Friday, August 2, 2019 3:55 AM
> Cc: netdev@vger.kernel.org; Chuhong Yuan <hslester96@gmail.com>; linux-
> kernel@vger.kernel.org; intel-wired-lan@lists.osuosl.org; David S . Miller
> <davem@davemloft.net>
> Subject: [Intel-wired-lan] [PATCH 1/2] ixgbe: Explicitly initialize reference
> count to 0
> 
> The driver does not explicitly call atomic_set to initialize refcount to 0.
> Add the call so that it will be more straight forward to convert refcount from
> atomic_t to refcount_t.
> 
> Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 1 +
>  1 file changed, 1 insertion(+)

Tested-by: Andrew Bowers <andrewx.bowers@intel.com>



^ permalink raw reply

* linux-next: Signed-off-by missing for commit in the net tree
From: Stephen Rothwell @ 2019-08-05 21:38 UTC (permalink / raw)
  To: David Miller, Networking
  Cc: Linux Next Mailing List, Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 168 bytes --]

Hi all,

Commit

  c3953a3c2d31 ("NFC: nfcmrvl: fix gpio-handling regression")

is missing a Signed-off-by from its committer.

-- 
Cheers,
Stephen Rothwell

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH bpf-next 1/2] selftests/bpf: add loop test 4
From: Andrii Nakryiko @ 2019-08-05 21:37 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Yonghong Song, Alexei Starovoitov, David S. Miller,
	Daniel Borkmann, Networking, bpf, Kernel Team
In-Reply-To: <f3ccc18f-7c25-a4e8-3d3d-c9f0bdf453ea@fb.com>

On Mon, Aug 5, 2019 at 1:53 PM Alexei Starovoitov <ast@fb.com> wrote:
>
> On 8/5/19 1:04 PM, Yonghong Song wrote:
> >
> >
> > On 8/5/19 12:45 PM, Andrii Nakryiko wrote:
> >> On Sat, Aug 3, 2019 at 8:19 PM Alexei Starovoitov <ast@kernel.org> wrote:
> >>>
> >>> Add a test that returns a 'random' number between [0, 2^20)
> >>> If state pruning is not working correctly for loop body the number of
> >>> processed insns will be 2^20 * num_of_insns_in_loop_body and the program
> >>> will be rejected.
> >>>
> >>> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> >>> ---
> >>>    .../bpf/prog_tests/bpf_verif_scale.c          |  1 +
> >>>    tools/testing/selftests/bpf/progs/loop4.c     | 23 +++++++++++++++++++
> >>>    2 files changed, 24 insertions(+)
> >>>    create mode 100644 tools/testing/selftests/bpf/progs/loop4.c
> >>>
> >>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
> >>> index b4be96162ff4..757e39540eda 100644
> >>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
> >>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
> >>> @@ -71,6 +71,7 @@ void test_bpf_verif_scale(void)
> >>>
> >>>                   { "loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
> >>>                   { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
> >>> +               { "loop4.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
> >>>
> >>>                   /* partial unroll. 19k insn in a loop.
> >>>                    * Total program size 20.8k insn.
> >>> diff --git a/tools/testing/selftests/bpf/progs/loop4.c b/tools/testing/selftests/bpf/progs/loop4.c
> >>> new file mode 100644
> >>> index 000000000000..3e7ee14fddbd
> >>> --- /dev/null
> >>> +++ b/tools/testing/selftests/bpf/progs/loop4.c
> >>> @@ -0,0 +1,23 @@
> >>> +// SPDX-License-Identifier: GPL-2.0
> >>> +// Copyright (c) 2019 Facebook
> >>> +#include <linux/sched.h>
> >>> +#include <linux/ptrace.h>
> >>> +#include <stdint.h>
> >>> +#include <stddef.h>
> >>> +#include <stdbool.h>
> >>> +#include <linux/bpf.h>
> >>> +#include "bpf_helpers.h"
> >>> +
> >>> +char _license[] SEC("license") = "GPL";
> >>> +
> >>> +SEC("socket")
> >>> +int combinations(volatile struct __sk_buff* skb)
> >>> +{
> >>> +       int ret = 0, i;
> >>> +
> >>> +#pragma nounroll
> >>> +       for (i = 0; i < 20; i++)
> >>> +               if (skb->len)
> >>> +                       ret |= 1 << i;
> >>
> >> So I think the idea is that because verifier shouldn't know whether
> >> skb->len is zero or not, then you have two outcomes on every iteration
> >> leading to 2^20 states, right?
> >>
> >> But I'm afraid that verifier can eventually be smart enough (if it's
> >> not already, btw), to figure out that ret can be either 0 or ((1 <<
> >> 21) - 1), actually. If skb->len is put into separate register, then
> >> that register's bounds will be established on first loop iteration as
> >> either == 0 on one branch or (0, inf) on another branch, after which
> >> all subsequent iterations will not branch at all (one or the other
> >> branch will be always taken).
> >>
> >> It's also possible that LLVM/Clang is smart enough already to figure
> >> this out on its own and optimize loop into.
> >>
> >>
> >> if (skb->len) {
> >>       for (i = 0; i < 20; i++)
> >>           ret |= 1 << i;
> >> }
> >
> > We have
> >      volatile struct __sk_buff* skb
> >
> > So from the source code, skb->len could be different for each
> > iteration. The compiler cannot do the above optimization.
>
> yep.
> Without volatile llvm optimizes it even more than Andrii predicted :)

My bad, completely missed volatile.

>
> >>
> >>
> >> So two complains:
> >>
> >> 1. Let's obfuscate this a bit more, e.g., with testing (skb->len &
> >> (1<<i)) instead, so that result really depends on actual length of the
> >> packet.
> >> 2. Is it possible to somehow turn off this precision tracking (e.g.,
> >> running not under root, maybe?) and see that this same program fails
> >> in that case? That way we'll know test actually validates what we
> >> think it validates.
>
> that's on my todo list already.
> To do proper unit tests for all this stuff there should be a way
> to turn off not only precision, but heuristics too.
> All magic numbers in is_state_visited() need to be switchable.
> I'm still thinking on the way to expose it to tests infra.

Yep, that would be great.

I have nothing beyond what Yonghong suggested.

Acked-by: Andrii Nakryiko <andriin@fb.com>

^ permalink raw reply

* [WIP 0/4] bpf: A bit of progress toward unprivileged use
From: Andy Lutomirski @ 2019-08-05 21:29 UTC (permalink / raw)
  To: LKML, Alexei Starovoitov
  Cc: Song Liu, Kees Cook, Networking, bpf, Daniel Borkmann,
	Alexei Starovoitov, Kernel Team, Lorenz Bauer, Jann Horn, Greg KH,
	Linux API, LSM List, Andy Lutomirski

Other than the mknod() patch, this is not ready for prime time.  These
patches try to make progress toward making bpf() more useful without
privilege

Andy Lutomirski (4):
  bpf: Respect persistent map and prog access modes
  bpf: Don't require mknod() permission to pin an object
  bpf: Add a way to mark functions as requiring privilege
  bpf: Allow creating all program types without privilege

 include/linux/bpf.h          | 30 +++++++++++++++-----
 include/linux/bpf_verifier.h |  1 +
 kernel/bpf/arraymap.c        |  8 +++++-
 kernel/bpf/cgroup.c          |  6 +++-
 kernel/bpf/inode.c           | 29 +++++++++++--------
 kernel/bpf/syscall.c         | 54 +++++++++++++++++++++++++-----------
 kernel/bpf/verifier.c        |  8 ++++++
 kernel/events/core.c         |  5 ++--
 kernel/trace/bpf_trace.c     |  1 +
 net/core/dev.c               |  4 ++-
 net/core/filter.c            |  8 ++++--
 net/netfilter/xt_bpf.c       |  5 ++--
 net/packet/af_packet.c       |  2 +-
 13 files changed, 115 insertions(+), 46 deletions(-)

-- 
2.21.0


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox