* [PATCH bpf-next v10 2/4] bpf: new helper to obtain namespace data from current task New bpf helper bpf_get_current_pidns_info.
From: Carlos Neira @ 2019-09-06 15:09 UTC (permalink / raw)
To: netdev; +Cc: yhs, ebiederm, brouer, cneirabustos, bpf
In-Reply-To: <20190906150952.23066-1-cneirabustos@gmail.com>
This helper(bpf_get_current_pidns_info) obtains the active namespace from
current and returns pid, tgid, device and namespace id as seen from that
namespace, allowing to instrument a process inside a container.
Signed-off-by: Carlos Neira <cneirabustos@gmail.com>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 35 +++++++++++++++++++-
kernel/bpf/core.c | 1 +
kernel/bpf/helpers.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 2 ++
5 files changed, 124 insertions(+), 1 deletion(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5b9d22338606..819cb1c84be0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1055,6 +1055,7 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
extern const struct bpf_func_proto bpf_strtol_proto;
extern const struct bpf_func_proto bpf_strtoul_proto;
extern const struct bpf_func_proto bpf_tcp_sock_proto;
+extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b5889257cc33..3ec9aa1438b7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2747,6 +2747,32 @@ union bpf_attr {
* **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
*
* **-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ *
+ * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Get tgid, pid and namespace id as seen by the current namespace,
+ * and device major/minor numbers from /proc/self/ns/pid. Such
+ * information is stored in *pidns* of size *size*.
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper always returns the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** if *size_of_pidns* is not valid or unable to get ns, pid
+ * or tgid of the current task.
+ *
+ * **-ENOENT** if /proc/self/ns/pid does not exists.
+ *
+ * **-ENOENT** if /proc/self/ns does not exists.
+ *
+ * **-ENOMEM** if helper internal allocation fails.
+ *
+ * **-EPERM** if not able to call helper.
+ *
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2859,7 +2885,8 @@ union bpf_attr {
FN(sk_storage_get), \
FN(sk_storage_delete), \
FN(send_signal), \
- FN(tcp_gen_syncookie),
+ FN(tcp_gen_syncookie), \
+ FN(get_current_pidns_info),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -3610,4 +3637,10 @@ struct bpf_sockopt {
__s32 retval;
};
+struct bpf_pidns_info {
+ __u32 dev; /* dev_t from /proc/self/ns/pid inode */
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8191a7db2777..3159f2a0188c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2038,6 +2038,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_get_current_pidns_info __weak;
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5e28718928ca..8dbe6347893c 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -11,6 +11,11 @@
#include <linux/uidgid.h>
#include <linux/filter.h>
#include <linux/ctype.h>
+#include <linux/pid_namespace.h>
+#include <linux/kdev_t.h>
+#include <linux/stat.h>
+#include <linux/namei.h>
+#include <linux/version.h>
#include "../../lib/kstrtox.h"
@@ -312,6 +317,87 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
preempt_enable();
}
+BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
+ size)
+{
+ const char *pidns_path = "/proc/self/ns/pid";
+ struct pid_namespace *pidns = NULL;
+ struct filename *fname = NULL;
+ struct inode *inode;
+ struct path kp;
+ pid_t tgid = 0;
+ pid_t pid = 0;
+ int ret = -EINVAL;
+ int len;
+
+ if (unlikely(in_interrupt() ||
+ current->flags & (PF_KTHREAD | PF_EXITING)))
+ return -EPERM;
+
+ if (unlikely(size != sizeof(struct bpf_pidns_info)))
+ return -EINVAL;
+
+ pidns = task_active_pid_ns(current);
+ if (unlikely(!pidns))
+ return -ENOENT;
+
+ pidns_info->nsid = pidns->ns.inum;
+ pid = task_pid_nr_ns(current, pidns);
+ if (unlikely(!pid))
+ goto clear;
+
+ tgid = task_tgid_nr_ns(current, pidns);
+ if (unlikely(!tgid))
+ goto clear;
+
+ pidns_info->tgid = (u32) tgid;
+ pidns_info->pid = (u32) pid;
+
+ fname = kmem_cache_alloc(names_cachep, GFP_ATOMIC);
+ if (unlikely(!fname)) {
+ ret = -ENOMEM;
+ goto clear;
+ }
+ const size_t fnamesize = offsetof(struct filename, iname[1]);
+ struct filename *tmp;
+
+ tmp = kmalloc(fnamesize, GFP_ATOMIC);
+ if (unlikely(!tmp)) {
+ __putname(fname);
+ ret = -ENOMEM;
+ goto clear;
+ }
+
+ tmp->name = (char *)fname;
+ fname = tmp;
+ len = strlen(pidns_path) + 1;
+ memcpy((char *)fname->name, pidns_path, len);
+ fname->uptr = NULL;
+ fname->aname = NULL;
+ fname->refcnt = 1;
+
+ ret = filename_lookup(AT_FDCWD, fname, 0, &kp, NULL);
+ if (ret)
+ goto clear;
+
+ inode = d_backing_inode(kp.dentry);
+ pidns_info->dev = (u32)inode->i_rdev;
+
+ return 0;
+
+clear:
+ memset((void *)pidns_info, 0, (size_t) size);
+ return ret;
+}
+
+const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
+ .func = bpf_get_current_pidns_info,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+};
+
#ifdef CONFIG_CGROUPS
BPF_CALL_0(bpf_get_current_cgroup_id)
{
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ca1255d14576..5e1dc22765a5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -709,6 +709,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#endif
case BPF_FUNC_send_signal:
return &bpf_send_signal_proto;
+ case BPF_FUNC_get_current_pidns_info:
+ return &bpf_get_current_pidns_info_proto;
default:
return NULL;
}
--
2.11.0
^ permalink raw reply related
* [PATCH bpf-next v10 3/4] tools: Added bpf_get_current_pidns_info helper.
From: Carlos Neira @ 2019-09-06 15:09 UTC (permalink / raw)
To: netdev; +Cc: yhs, ebiederm, brouer, cneirabustos, bpf
In-Reply-To: <20190906150952.23066-1-cneirabustos@gmail.com>
Signed-off-by: Carlos Neira <cneirabustos@gmail.com>
---
tools/include/uapi/linux/bpf.h | 35 ++++++++++++++++++++++++++++++++++-
1 file changed, 34 insertions(+), 1 deletion(-)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b5889257cc33..3ec9aa1438b7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2747,6 +2747,32 @@ union bpf_attr {
* **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
*
* **-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ *
+ * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Get tgid, pid and namespace id as seen by the current namespace,
+ * and device major/minor numbers from /proc/self/ns/pid. Such
+ * information is stored in *pidns* of size *size*.
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper always returns the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** if *size_of_pidns* is not valid or unable to get ns, pid
+ * or tgid of the current task.
+ *
+ * **-ENOENT** if /proc/self/ns/pid does not exists.
+ *
+ * **-ENOENT** if /proc/self/ns does not exists.
+ *
+ * **-ENOMEM** if helper internal allocation fails.
+ *
+ * **-EPERM** if not able to call helper.
+ *
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2859,7 +2885,8 @@ union bpf_attr {
FN(sk_storage_get), \
FN(sk_storage_delete), \
FN(send_signal), \
- FN(tcp_gen_syncookie),
+ FN(tcp_gen_syncookie), \
+ FN(get_current_pidns_info),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -3610,4 +3637,10 @@ struct bpf_sockopt {
__s32 retval;
};
+struct bpf_pidns_info {
+ __u32 dev; /* dev_t from /proc/self/ns/pid inode */
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
#endif /* _UAPI__LINUX_BPF_H__ */
--
2.11.0
^ permalink raw reply related
* [PATCH bpf-next v10 4/4] tools/testing/selftests/bpf: Add self-tests for helper bpf_get_pidns_info.
From: Carlos Neira @ 2019-09-06 15:09 UTC (permalink / raw)
To: netdev; +Cc: yhs, ebiederm, brouer, cneirabustos, bpf
In-Reply-To: <20190906150952.23066-1-cneirabustos@gmail.com>
Added 2 selftest:
bpf_get_current_pidns_info helper is called in an interrupt
context and also in a non interrupt context.
Signed-off-by: Carlos Neira <cneirabustos@gmail.com>
---
tools/testing/selftests/bpf/Makefile | 2 +-
tools/testing/selftests/bpf/bpf_helpers.h | 3 +
.../testing/selftests/bpf/progs/test_pidns_kern.c | 52 ++++++++
.../selftests/bpf/progs/test_pidns_nmi_kern.c | 52 ++++++++
tools/testing/selftests/bpf/test_pidns.c | 146 +++++++++++++++++++++
tools/testing/selftests/bpf/test_pidns_nmi.c | 139 ++++++++++++++++++++
6 files changed, 393 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/bpf/progs/test_pidns_kern.c
create mode 100644 tools/testing/selftests/bpf/progs/test_pidns_nmi_kern.c
create mode 100644 tools/testing/selftests/bpf/test_pidns.c
create mode 100644 tools/testing/selftests/bpf/test_pidns_nmi.c
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1faad0c3c3c9..8507c89141f5 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -29,7 +29,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_cgroup_storage test_select_reuseport test_section_names \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk \
- test_sockopt_multi test_sockopt_inherit test_tcp_rtt
+ test_sockopt_multi test_sockopt_inherit test_tcp_rtt test_pidns test_pidns_nmi
BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
TEST_GEN_FILES = $(BPF_OBJ_FILES)
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 6c4930bc6e2e..30a70ebe583a 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -313,6 +313,9 @@ static unsigned int (*bpf_set_hash)(void *ctx, __u32 hash) =
static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
unsigned long long flags) =
(void *) BPF_FUNC_skb_adjust_room;
+static int (*bpf_get_current_pidns_info)(struct bpf_pidns_info *buf,
+ unsigned int buf_size) =
+ (void *) BPF_FUNC_get_current_pidns_info;
/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
#if defined(__TARGET_ARCH_x86)
diff --git a/tools/testing/selftests/bpf/progs/test_pidns_kern.c b/tools/testing/selftests/bpf/progs/test_pidns_kern.c
new file mode 100644
index 000000000000..a4c0bde1608b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pidns_kern.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@gmail.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <errno.h>
+#include "bpf_helpers.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct bpf_pidns_info);
+} nsidmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} pidmap SEC(".maps");
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int trace(void *ctx)
+{
+ struct bpf_pidns_info nsinfo;
+ __u32 key = 0, *expected_pid;
+ struct bpf_pidns_info *val;
+
+ if (bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo)))
+ return -EINVAL;
+
+ expected_pid = bpf_map_lookup_elem(&pidmap, &key);
+ __u64 tgid = bpf_get_current_pid_tgid();
+
+ if (!expected_pid || *expected_pid != nsinfo.pid ||
+ nsinfo.tgid != (__u32)tgid)
+ return 0;
+
+ val = bpf_map_lookup_elem(&nsidmap, &key);
+ if (val)
+ *val = nsinfo;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_pidns_nmi_kern.c b/tools/testing/selftests/bpf/progs/test_pidns_nmi_kern.c
new file mode 100644
index 000000000000..7f02e4e29021
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pidns_nmi_kern.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@gmail.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <errno.h>
+#include "bpf_helpers.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct bpf_pidns_info);
+} nsidmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} pidmap SEC(".maps");
+
+SEC("tracepoint/net/netif_receive_skb")
+int trace(void *ctx)
+{
+ struct bpf_pidns_info nsinfo;
+ __u32 key = 0, *expected_pid;
+ struct bpf_pidns_info *val;
+
+ if (bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo)))
+ return -EINVAL;
+
+ expected_pid = bpf_map_lookup_elem(&pidmap, &key);
+ __u64 tgid = bpf_get_current_pid_tgid();
+
+ if (!expected_pid || *expected_pid != nsinfo.pid ||
+ nsinfo.tgid != (__u32)tgid)
+ return 0;
+
+ val = bpf_map_lookup_elem(&nsidmap, &key);
+ if (val)
+ *val = nsinfo;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/test_pidns.c b/tools/testing/selftests/bpf/test_pidns.c
new file mode 100644
index 000000000000..0c579305da53
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_pidns.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@gmail.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+
+#define CHECK(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("%s:FAIL:%s ", __func__, tag); \
+ printf(format); \
+ } else { \
+ printf("%s:PASS:%s\n", __func__, tag); \
+ } \
+ __ret; \
+})
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_map *map;
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (!map)
+ return -1;
+ return bpf_map__fd(map);
+}
+
+
+int main(int argc, char **argv)
+{
+ const char *probe_name = "syscalls/sys_enter_nanosleep";
+ const char *file = "test_pidns_kern.o";
+ int err, bytes, efd, prog_fd, pmu_fd;
+ struct bpf_pidns_info knsid = {};
+ int pidmap_fd, nsidmap_fd;
+ struct perf_event_attr attr = {};
+ struct bpf_object *obj;
+ __u32 key = 0, pid;
+ int exit_code = 1;
+ struct stat st;
+ char buf[256];
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ goto cleanup_cgroup_env;
+
+ nsidmap_fd = bpf_find_map(__func__, obj, "nsidmap");
+ if (CHECK(nsidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+ nsidmap_fd, errno))
+ goto close_prog;
+
+ pidmap_fd = bpf_find_map(__func__, obj, "pidmap");
+ if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+ pidmap_fd, errno))
+ goto close_prog;
+
+ pid = getpid();
+ bpf_map_update_elem(pidmap_fd, &key, &pid, 0);
+
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+ "bytes %d errno %d\n", bytes, errno))
+ goto close_prog;
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+ errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ /* trigger some syscalls */
+ sleep(1);
+
+ err = bpf_map_lookup_elem(nsidmap_fd, &key, &knsid);
+ if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
+ goto close_pmu;
+
+ if (stat("/proc/self/ns/pid", &st))
+ goto close_pmu;
+
+ if (CHECK(knsid.nsid != (__u32) st.st_ino, "namespace id",
+ "bpf %u user %u\n", knsid.nsid, (__u32) st.st_ino))
+ goto close_pmu;
+
+ if (CHECK(major(knsid.dev) != major(st.st_rdev), "dev major",
+ "bpf %u user %u\n", major(knsid.dev), major(st.st_rdev)))
+ goto close_pmu;
+
+ if (CHECK(minor(knsid.dev) != minor(st.st_rdev), "dev minor",
+ "bpf %u user %u\n", minor(knsid.dev), minor(st.st_rdev)))
+ goto close_pmu;
+
+ exit_code = 0;
+ printf("%s:PASS\n", argv[0]);
+
+close_pmu:
+ close(pmu_fd);
+close_prog:
+ bpf_object__close(obj);
+cleanup_cgroup_env:
+ return exit_code;
+}
diff --git a/tools/testing/selftests/bpf/test_pidns_nmi.c b/tools/testing/selftests/bpf/test_pidns_nmi.c
new file mode 100644
index 000000000000..bb8075bbe7d8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_pidns_nmi.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@gmail.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+
+#define CHECK(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("%s:FAIL:%s ", __func__, tag); \
+ printf(format); \
+ } else { \
+ printf("%s:PASS:%s\n", __func__, tag); \
+ } \
+ __ret; \
+})
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_map *map;
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (!map)
+ return -1;
+ return bpf_map__fd(map);
+}
+
+
+int main(int argc, char **argv)
+{
+ const char *probe_name = "net/netif_receive_skb";
+ const char *file = "test_pidns_kern.o";
+ int err, bytes, efd, prog_fd, pmu_fd;
+ struct bpf_pidns_info knsid = {};
+ int pidmap_fd, nsidmap_fd;
+ struct perf_event_attr attr = {};
+ struct bpf_object *obj;
+ __u32 key = 0, pid;
+ int exit_code = 1;
+ struct stat st;
+ char buf[256];
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ goto cleanup_cgroup_env;
+
+ nsidmap_fd = bpf_find_map(__func__, obj, "nsidmap");
+ if (CHECK(nsidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+ nsidmap_fd, errno))
+ goto close_prog;
+
+ pidmap_fd = bpf_find_map(__func__, obj, "pidmap");
+ if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+ pidmap_fd, errno))
+ goto close_prog;
+
+ pid = getpid();
+ bpf_map_update_elem(pidmap_fd, &key, &pid, 0);
+
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+ "bytes %d errno %d\n", bytes, errno))
+ goto close_prog;
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+ errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ /* trigger some syscalls */
+ sleep(1);
+
+ err = bpf_map_lookup_elem(nsidmap_fd, &key, &knsid);
+ if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
+ goto close_pmu;
+
+ if (stat("/proc/self/ns/pid", &st))
+ goto close_pmu;
+
+ if (CHECK(knsid.nsid != (__u32) st.st_ino, "namespace_id",
+ "Called in interrupt context bpf %u user %u\n",
+ knsid.nsid, (__u32) st.st_ino))
+ goto close_pmu;
+
+ exit_code = 0;
+ printf("%s:PASS\n", argv[0]);
+
+close_pmu:
+ close(pmu_fd);
+close_prog:
+ bpf_object__close(obj);
+cleanup_cgroup_env:
+ return exit_code;
+}
--
2.11.0
^ permalink raw reply related
* [PATCH] net/mlx5: reduce stack usage in FW tracer
From: Arnd Bergmann @ 2019-09-06 15:11 UTC (permalink / raw)
To: Saeed Mahameed, Leon Romanovsky, David S. Miller
Cc: Arnd Bergmann, Feras Daoud, Erez Shitrit, Moshe Shemesh,
Eran Ben Elisha, Qian Cai, netdev, linux-rdma, linux-kernel
It's generally not ok to put a 512 byte buffer on the stack, as kernel
stack is a scarce resource:
drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c:660:13: error: stack frame size of 1032 bytes in function 'mlx5_fw_tracer_handle_traces' [-Werror,-Wframe-larger-than=]
This is done in a context that is allowed to sleep, so using
dynamic allocation is ok as well. I'm not too worried about
runtime overhead, as this already contains an snprintf() and
other expensive functions.
Fixes: 70dd6fdb8987 ("net/mlx5: FW tracer, parse traces and kernel tracing support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
.../mellanox/mlx5/core/diag/fw_tracer.c | 21 ++++++++++---------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index 2011eaf15cc5..d81e78060f9f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -557,16 +557,16 @@ static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
struct mlx5_core_dev *dev,
u64 trace_timestamp)
{
- char tmp[512];
-
- snprintf(tmp, sizeof(tmp), str_frmt->string,
- str_frmt->params[0],
- str_frmt->params[1],
- str_frmt->params[2],
- str_frmt->params[3],
- str_frmt->params[4],
- str_frmt->params[5],
- str_frmt->params[6]);
+ char *tmp = kasprintf(GFP_KERNEL, str_frmt->string,
+ str_frmt->params[0],
+ str_frmt->params[1],
+ str_frmt->params[2],
+ str_frmt->params[3],
+ str_frmt->params[4],
+ str_frmt->params[5],
+ str_frmt->params[6]);
+ if (!tmp)
+ return;
trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
str_frmt->event_id, tmp);
@@ -576,6 +576,7 @@ static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
/* remove it from hash */
mlx5_tracer_clean_message(str_frmt);
+ kfree(tmp);
}
static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
--
2.20.0
^ permalink raw reply related
* [PATCH net-next] netfilter: nf_tables: avoid excessive stack usage
From: Arnd Bergmann @ 2019-09-06 15:12 UTC (permalink / raw)
To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
David S. Miller
Cc: Arnd Bergmann, Jakub Kicinski, wenxu, netfilter-devel, coreteam,
netdev, linux-kernel
The nft_offload_ctx structure is much too large to put on the
stack:
net/netfilter/nf_tables_offload.c:31:23: error: stack frame size of 1200 bytes in function 'nft_flow_rule_create' [-Werror,-Wframe-larger-than=]
Use dynamic allocation here, as we do elsewhere in the same
function.
Fixes: c9626a2cbdb2 ("netfilter: nf_tables: add hardware offload support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
Since we only really care about two members of the structure, an
alternative would be a larger rewrite, but that is probably too
late for v5.4.
---
net/netfilter/nf_tables_offload.c | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 3c2725ade61b..c94331aae552 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -30,15 +30,13 @@ static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions)
struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule)
{
- struct nft_offload_ctx ctx = {
- .dep = {
- .type = NFT_OFFLOAD_DEP_UNSPEC,
- },
- };
+ struct nft_offload_ctx *ctx;
+
struct nft_flow_rule *flow;
int num_actions = 0, err;
struct nft_expr *expr;
+
expr = nft_expr_first(rule);
while (expr->ops && expr != nft_expr_last(rule)) {
if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION)
@@ -52,21 +50,31 @@ struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule)
return ERR_PTR(-ENOMEM);
expr = nft_expr_first(rule);
+
+ ctx = kzalloc(sizeof(struct nft_offload_ctx), GFP_KERNEL);
+ if (!ctx) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;
+
while (expr->ops && expr != nft_expr_last(rule)) {
if (!expr->ops->offload) {
err = -EOPNOTSUPP;
goto err_out;
}
- err = expr->ops->offload(&ctx, flow, expr);
+ err = expr->ops->offload(ctx, flow, expr);
if (err < 0)
goto err_out;
expr = nft_expr_next(expr);
}
- flow->proto = ctx.dep.l3num;
+ flow->proto = ctx->dep.l3num;
+ kfree(ctx);
return flow;
err_out:
+ kfree(ctx);
nft_flow_rule_destroy(flow);
return ERR_PTR(err);
--
2.20.0
^ permalink raw reply related
* Re: [PATCH net-next,v3 0/4] flow_offload: update mangle action representation
From: Edward Cree @ 2019-09-06 15:13 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: netfilter-devel, davem, netdev, jakub.kicinski, jiri, saeedm,
vishal, vladbu
In-Reply-To: <20190906145019.2bggchaq43tcqdyc@salvia>
On 06/09/2019 15:50, Pablo Neira Ayuso wrote:
> On Fri, Sep 06, 2019 at 02:37:16PM +0100, Edward Cree wrote:
>> On 06/09/2019 14:14, Pablo Neira Ayuso wrote:
>>> OK, I can document this semantics, I need just _time_ to write that
>>> documentation. I was expecting this patch description is enough by now
>>> until I can get to finish that documentation.
>> I think for two structs with apparently the same contents but different
>> semantics (one has the mask bitwise complemented) it's best to hold up
>> the code change until the comment is ready to come with it, because
>> until then it's a dangerously confusing situation.
> The idea is that flow rule API != tc front-end anymore. So the flow
> rule API can evolve regardless the front-end requirements. Before this
> update there was no other choice than using the tc pedit layout since
> it was exposed to the drivers, this is not the case anymore.
I'm not saying that they have to be the same.
I'm saying that they're _almost_ the same, and having things that are
_almost_ the same but subtly different is a recipe for misunderstandings
and bugs, and so must must must have very visible comments in the code.
>> So an IPv6 address mangle only comes as a single action if it's from
>> netfilter, not if it's coming from TC pedit.
> Driver gets one single action from tc/netfilter (and potentially
> ethtool if it would support for this), it comes as a single action
> from both subsystems:
>
> front-end -----> flow_rule API -----> drivers
>
> Front-end need to translate their representation to this
> FLOW_ACTION_MANGLE layout.
>
> By front-end, I refer to ethtool/netfilter/tc.
In your patch, flow_action_mangle() looks only at the offset and type
(add vs. set) of each pedit, coalesces them if compatible (so, unless
I'm misreading the code, it _will_ coalesce adjacent pedits to
contiguous fields like UDP sport & dport, unlike what you said),
because it doesn't know whether two contiguous pedits are part of the
same field or not (it doesn't have any knowledge of 'fields' at all).
And for you to change that, while still coalescing IPv6 pedits, you
would need flow_action_mangle() to know what fields each pedit is in.
It is not possible for code that doesn't know about fields to both:
(a) not coalesce edits of contiguous fields, and
(b) coalesce edits of larger-than-u32 fields
>> Yes, but we don't add code/features to the kernel based on what we
>> *could* use it for later
> This is already useful: Look at the cxgb driver in particular, it's
> way easier to read.
Have you tested it? Again, I might be misreading, but it looks like
your flow_action_mangle() *will* coalesce sport & dport, which it
appears will break that cxgb code.
> Other existing drivers do not need to do things like:
>
> case round_down(offsetof(struct iphdr, tos), 4):
>
> and the play with masks to infer if the user wants to mangle the TOS
> field, they can just do:
>
> case offsetof(struct iphdr, tos):
>
> which is way more natural representation.
Proper thing to do is have helper functions available to drivers to test
the pedit, and not just switch on the offset. Why do I say that?
Well, consider a pedit on UDP dport, with mask 0x00ff (network endian).
Now as a u32 pedit that's 0x000000ff offset 0, so field-blind offset
calculation (ffs in flow_action_mangle_entry()) will turn that into
offset 3 mask 0xff. Now driver does
switch(offset) { /* 3 */
case offsetof(struct udphdr, dest): /* 2 */
/* Whoops, we never get here! */
}
Do you see the problem?
-Ed
^ permalink raw reply
* Re: [PATCH bpf-next 0/6] selftests/bpf: move sockopt tests under test_progs
From: Stanislav Fomichev @ 2019-09-06 15:18 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: Alexei Starovoitov, Stanislav Fomichev, Networking, bpf,
David S. Miller, Alexei Starovoitov, Daniel Borkmann
In-Reply-To: <CAEf4Bzaoh0Ur6Ze0VLNYqhTJ21Vp6D2NBMkb7yAeseqom=TyKA@mail.gmail.com>
On 09/06, Andrii Nakryiko wrote:
> On Wed, Sep 4, 2019 at 4:03 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Wed, Sep 04, 2019 at 09:25:03AM -0700, Stanislav Fomichev wrote:
> > > Now that test_progs is shaping into more generic test framework,
> > > let's convert sockopt tests to it. This requires adding
> > > a helper to create and join a cgroup first (test__join_cgroup).
> > > Since we already hijack stdout/stderr that shouldn't be
> > > a problem (cgroup helpers log to stderr).
> > >
> > > The rest of the patches just move sockopt tests files under prog_tests/
> > > and do the required small adjustments.
> >
> > Looks good. Thank you for working on it.
> > Could you de-verbose setsockopt test a bit?
> > #23/32 setsockopt: deny write ctx->retval:OK
> > #23/33 setsockopt: deny read ctx->retval:OK
> > #23/34 setsockopt: deny writing to ctx->optval:OK
> > #23/35 setsockopt: deny writing to ctx->optval_end:OK
> > #23/36 setsockopt: allow IP_TOS <= 128:OK
> > #23/37 setsockopt: deny IP_TOS > 128:OK
> > 37 subtests is a bit too much spam.
>
> If we merged test_btf into test_progs, we'd have >150 subtests, which
> would be pretty verbose as well. But the benefit of subtest is that
> you can run just that sub-test and debug/verify just it, without all
> the rest stuff.
>
> So I'm wondering, if too many lines of default output is the only
> problem, should we just not output per-subtest line by default?
Ack, we can output per-subtest line if it fails so it's easy to re-run;
otherwise, hiding by default sounds good. I'll prepare a v3 sometime
today; Alexei, let us know if you disagree.
^ permalink raw reply
* Re: [PATCH bpf-next v10 2/4] bpf: new helper to obtain namespace data from current task New bpf helper bpf_get_current_pidns_info.
From: Al Viro @ 2019-09-06 15:24 UTC (permalink / raw)
To: Carlos Neira; +Cc: netdev, yhs, ebiederm, brouer, bpf
In-Reply-To: <20190906150952.23066-3-cneirabustos@gmail.com>
On Fri, Sep 06, 2019 at 11:09:50AM -0400, Carlos Neira wrote:
> +BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
> + size)
> +{
> + const char *pidns_path = "/proc/self/ns/pid";
> + fname = kmem_cache_alloc(names_cachep, GFP_ATOMIC);
> + if (unlikely(!fname)) {
> + ret = -ENOMEM;
> + goto clear;
> + }
> + const size_t fnamesize = offsetof(struct filename, iname[1]);
> + struct filename *tmp;
> +
> + tmp = kmalloc(fnamesize, GFP_ATOMIC);
> + if (unlikely(!tmp)) {
> + __putname(fname);
> + ret = -ENOMEM;
> + goto clear;
> + }
> +
> + tmp->name = (char *)fname;
> + fname = tmp;
> + len = strlen(pidns_path) + 1;
> + memcpy((char *)fname->name, pidns_path, len);
> + fname->uptr = NULL;
> + fname->aname = NULL;
> + fname->refcnt = 1;
> +
> + ret = filename_lookup(AT_FDCWD, fname, 0, &kp, NULL);
> + if (ret)
> + goto clear;
Where do I begin?
* getname_kernel() is there for purpose
* so's kern_path(), damnit
> +
> + inode = d_backing_inode(kp.dentry);
> + pidns_info->dev = (u32)inode->i_rdev;
* ... and this is utter bollocks - userland doesn't
have to have procfs mounted anywhere; it doesn't have to
have it mounted on /proc; it can bloody well bind a symlink
to anywhere and anythin on top of /proc/self even if its
has procfs mounted there.
This is fundamentally wrong; nothing in the kernel
(bpf very much included) has any business assuming anything
about what's mounted where. And while we are at it,
how deep on kernel stack can that thing be called?
Because pathname resolution can bring all kinds of interesting
crap into the game - consider e.g. NFS4 referral traversal.
And it can occur - see above about the lack of warranties
that your pathwalk will go to procfs and will remain there.
NAKed-by: Al Viro <viro@zeniv.linux.org.uk>
^ permalink raw reply
* Re: [PATCH] net/skbuff: silence warnings under memory pressure
From: Petr Mladek @ 2019-09-06 15:32 UTC (permalink / raw)
To: Sergey Senozhatsky
Cc: Steven Rostedt, davem, Eric Dumazet, Sergey Senozhatsky,
Michal Hocko, linux-mm, Qian Cai, linux-kernel, netdev
In-Reply-To: <20190906033900.GB1253@jagdpanzerIV>
On Fri 2019-09-06 12:39:00, Sergey Senozhatsky wrote:
> On (09/05/19 13:23), Steven Rostedt wrote:
> > > I think we can queue significantly much less irq_work-s from printk().
> > >
> > > Petr, Steven, what do you think?
>
> [..]
> > I mean, really, do we need to keep calling wake up if it
> > probably never even executed?
>
> I guess ratelimiting you are talking about ("if it probably never even
> executed") would be to check if we have already called wake up on the
> log_wait ->head. For that we need to, at least, take log_wait spin_lock
> and check that ->head is still in TASK_INTERRUPTIBLE; which is (quite,
> but not exactly) close to what wake_up_interruptible() does - it doesn't
> wake up the same task twice, it bails out on `p->state & state' check.
I have just realized that only sleeping tasks are in the waitqueue.
It is already handled by waitqueue_active() check.
I am afraid that we could not ratelimit the wakeups. The userspace
loggers might then miss the last lines for a long.
We could move wake_up_klogd() back to console_unlock(). But it might
end up with a back-and-forth games according to who is currently
complaining.
Sigh, I still suggest to ratelimit the warning about failed
allocation.
Best Regards,
Petr
^ permalink raw reply
* Re: [PATCH net-next] ionic: Remove unused including <linux/version.h>
From: Shannon Nelson @ 2019-09-06 15:36 UTC (permalink / raw)
To: YueHaibing, Pensando Drivers, David S . Miller; +Cc: netdev, kernel-janitors
In-Reply-To: <20190906095410.107596-1-yuehaibing@huawei.com>
On 9/6/19 2:54 AM, YueHaibing wrote:
> Remove including <linux/version.h> that don't need it.
>
> Signed-off-by: YueHaibing <yuehaibing@huawei.com>
> ---
> drivers/net/ethernet/pensando/ionic/ionic_main.c | 1 -
> 1 file changed, 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
> index 5ec67f3f1853..15e432386b35 100644
> --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
> +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
> @@ -2,7 +2,6 @@
> /* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
>
> #include <linux/module.h>
> -#include <linux/version.h>
> #include <linux/netdevice.h>
> #include <linux/utsname.h>
>
>
Acked-by: Shannon Nelson <snelson@pensando.io>
^ permalink raw reply
* Re: [PATCH net] net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list
From: Shmulik Ladkani @ 2019-09-06 15:37 UTC (permalink / raw)
To: Willem de Bruijn, Eric Dumazet, Alexander Duyck
Cc: Daniel Borkmann, eyal, netdev, Shmulik Ladkani
In-Reply-To: <CAF=yD-JB6TMQuyaxzLX8=9CZZF+Zk5EmniSkx_F81bVc87XqJw@mail.gmail.com>
On Fri, 6 Sep 2019 10:49:55 -0400
Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote:
> But I wonder whether it is a given that head_skb has headlen.
This is what I observed for GRO packets that do have headlen frag_list
members: the 'head_skb' itself had a headlen too, and its head was
built using the original gso_size (similar to the frag_list members).
Maybe Eric can comment better.
> Btw, it seems slightly odd to me tot test head_frag before testing
> headlen in the v2 patch.
Requested by Alexander. I'm fine either way.
Thanks
Shmulik
^ permalink raw reply
* Re: [PATCH net] net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list
From: Alexander Duyck @ 2019-09-06 15:42 UTC (permalink / raw)
To: Shmulik Ladkani
Cc: Willem de Bruijn, Eric Dumazet, Daniel Borkmann, eyal, netdev,
Shmulik Ladkani
In-Reply-To: <20190906183707.3eaacd79@pixies>
On Fri, Sep 6, 2019 at 8:37 AM Shmulik Ladkani <shmulik@metanetworks.com> wrote:
>
> On Fri, 6 Sep 2019 10:49:55 -0400
> Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote:
>
> > But I wonder whether it is a given that head_skb has headlen.
>
> This is what I observed for GRO packets that do have headlen frag_list
> members: the 'head_skb' itself had a headlen too, and its head was
> built using the original gso_size (similar to the frag_list members).
>
> Maybe Eric can comment better.
>
> > Btw, it seems slightly odd to me tot test head_frag before testing
> > headlen in the v2 patch.
>
> Requested by Alexander. I'm fine either way.
Yeah, my thought on that was "do we care about the length if the data
is stored in a head_frag?". I suppose you could flip the logic and
make it "do we care about it being a head_frag if there is no data
there?". The reason I had suggested the head_frag test first was
because it was a single test bit whereas the length requires reading
two fields and doing a comparison.
For either ordering it is fine by me. So if we need to feel free to
swap those two tests for a v3.
Reviewed-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
^ permalink raw reply
* Re: [PATCH bpf-next v10 2/4] bpf: new helper to obtain namespace data from current task New bpf helper bpf_get_current_pidns_info.
From: Al Viro @ 2019-09-06 15:46 UTC (permalink / raw)
To: Carlos Neira; +Cc: netdev, yhs, ebiederm, brouer, bpf
In-Reply-To: <20190906152435.GW1131@ZenIV.linux.org.uk>
On Fri, Sep 06, 2019 at 04:24:35PM +0100, Al Viro wrote:
> > + tmp = kmalloc(fnamesize, GFP_ATOMIC);
> > + if (unlikely(!tmp)) {
> > + __putname(fname);
> > + ret = -ENOMEM;
> > + goto clear;
> > + }
> > +
> > + tmp->name = (char *)fname;
> > + fname = tmp;
> > + len = strlen(pidns_path) + 1;
> > + memcpy((char *)fname->name, pidns_path, len);
> > + fname->uptr = NULL;
> > + fname->aname = NULL;
> > + fname->refcnt = 1;
> > +
> > + ret = filename_lookup(AT_FDCWD, fname, 0, &kp, NULL);
> > + if (ret)
> > + goto clear;
>
> Where do I begin?
> * getname_kernel() is there for purpose
> * so's kern_path(), damnit
Oh, and filename_lookup() *CAN* sleep, obviously. So that
GFP_ATOMIC above is completely pointless.
> > +
> > + inode = d_backing_inode(kp.dentry);
> > + pidns_info->dev = (u32)inode->i_rdev;
Why are plaing with device number, anyway? And why would it
be anything other than 0?
^ permalink raw reply
* Re: [PATCH net-next,v3 0/4] flow_offload: update mangle action representation
From: Pablo Neira Ayuso @ 2019-09-06 15:58 UTC (permalink / raw)
To: Edward Cree
Cc: netfilter-devel, davem, netdev, jakub.kicinski, jiri, saeedm,
vishal, vladbu
In-Reply-To: <be6eee6b-9d58-f0f7-571b-7e473612e2b3@solarflare.com>
Hi Edward,
On Fri, Sep 06, 2019 at 04:13:17PM +0100, Edward Cree wrote:
> On 06/09/2019 15:50, Pablo Neira Ayuso wrote:
> > On Fri, Sep 06, 2019 at 02:37:16PM +0100, Edward Cree wrote:
[...]
> >> So an IPv6 address mangle only comes as a single action if it's from
> >> netfilter, not if it's coming from TC pedit.
> > Driver gets one single action from tc/netfilter (and potentially
> > ethtool if it would support for this), it comes as a single action
> > from both subsystems:
> >
> > front-end -----> flow_rule API -----> drivers
> >
> > Front-end need to translate their representation to this
> > FLOW_ACTION_MANGLE layout.
> >
> > By front-end, I refer to ethtool/netfilter/tc.
>
> In your patch, flow_action_mangle() looks only at the offset and type
> (add vs. set) of each pedit, coalesces them if compatible (so, unless
> I'm misreading the code, it _will_ coalesce adjacent pedits to
> contiguous fields like UDP sport & dport, unlike what you said),
> because it doesn't know whether two contiguous pedits are part of the
> same field or not (it doesn't have any knowledge of 'fields' at all).
In tc pedit ex, those are _indeed_ two separated actions:
* One to mangle UDP sport.
* One to mangle UDP dport.
They are _not_ one as you describe above.
The coalesce routine I made does _not_ coalesce fields in two
different actions.
> And for you to change that, while still coalescing IPv6 pedits, you
> would need flow_action_mangle() to know what fields each pedit is in.
In the particular case of IPv6 address, 'tc pedit ex' generates one
single action with 4 nkeys. So this is:
* One action to mangle four 32-bits words (the number of words in tc
pedit is stored in the nkeys field).
The coalesce routine I made in this case merges the four 32-bits words
into one single action.
[...]
> >> Yes, but we don't add code/features to the kernel based on what we
> >> *could* use it for later
> > This is already useful: Look at the cxgb driver in particular, it's
> > way easier to read.
>
> Have you tested it? Again, I might be misreading, but it looks like
> your flow_action_mangle() *will* coalesce sport & dport, which it
> appears will break that cxgb code.
Because sport and dport are not place in the same action by tc pedit
ex, _that cannot happen_.
> > Other existing drivers do not need to do things like:
> >
> > case round_down(offsetof(struct iphdr, tos), 4):
> >
> > and the play with masks to infer if the user wants to mangle the TOS
> > field, they can just do:
> >
> > case offsetof(struct iphdr, tos):
> >
> > which is way more natural representation.
>
> Proper thing to do is have helper functions available to drivers to test
> the pedit, and not just switch on the offset. Why do I say that?
>
> Well, consider a pedit on UDP dport, with mask 0x00ff (network endian).
> Now as a u32 pedit that's 0x000000ff offset 0, so field-blind offset
> calculation (ffs in flow_action_mangle_entry()) will turn that into
> offset 3 mask 0xff. Now driver does
> switch(offset) { /* 3 */
> case offsetof(struct udphdr, dest): /* 2 */
> /* Whoops, we never get here! */
> }
>
> Do you see the problem?
This scenario you describe cannot _work_ right now, with the existing
code. Without my patchset, this scenario you describe does _not_ work,
The drivers in the tree need a mask of 0xffff to infer that this is
UDP dport.
The 'tc pedit ex' infrastructure does not allow for the scenario that
you describe above.
No driver in the tree allow for what you describe already.
^ permalink raw reply
* Re: [PATCH bpf-next v10 2/4] bpf: new helper to obtain namespace data from current task New bpf helper bpf_get_current_pidns_info.
From: Al Viro @ 2019-09-06 16:00 UTC (permalink / raw)
To: Carlos Neira; +Cc: netdev, yhs, ebiederm, brouer, bpf
In-Reply-To: <20190906154647.GA19707@ZenIV.linux.org.uk>
On Fri, Sep 06, 2019 at 04:46:47PM +0100, Al Viro wrote:
> > Where do I begin?
> > * getname_kernel() is there for purpose
> > * so's kern_path(), damnit
>
> Oh, and filename_lookup() *CAN* sleep, obviously. So that
> GFP_ATOMIC above is completely pointless.
>
> > > +
> > > + inode = d_backing_inode(kp.dentry);
> > > + pidns_info->dev = (u32)inode->i_rdev;
In the original variant of patchset it used to be ->i_sb->s_dev,
which is also bloody strange - you are not asking filename_lookup()
to follow symlinks, so you'd get that of whatever filesystem
/proc/self/ns resides on.
->i_rdev use makes no sense whatsoever - it's a symlink and
neither it nor its target are device nodes; ->i_rdev will be
left zero for both.
What data are you really trying to get there?
^ permalink raw reply
* [net-next 00/11] nfp: implement firmware loading policy
From: Simon Horman @ 2019-09-06 16:00 UTC (permalink / raw)
To: David Miller
Cc: Jakub Kicinski, netdev, oss-drivers, Dirk van der Merwe,
Simon Horman
Hi Dave,
I am handling maintenance of the nfp diver in Jakub's absence while he is
on vacation this week and next, and I am sending this patchset in that
capacity.
Regarding the patches, Dirk says:
This series adds configuration capabilities to the firmware loading policy of
the NFP driver.
NFP firmware loading is controlled via three HWinfo keys which can be set per
device: 'abi_drv_reset', 'abi_drv_load_ifc' and 'app_fw_from_flash'.
Refer to patch #11 for more detail on how these control the firmware loading.
In order to configure the full extend of FW loading policy, a new devlink
parameter has been introduced, 'reset_dev_on_drv_probe', which controls if the
driver should reset the device when it's probed. This, inconjunction with the
existing 'fw_load_policy' (extended to include a 'disk' option) provides the
means to tweak the NFP HWinfo keys as required by users.
Patches 1 and 2 adds the devlink modifications and patches 3 through 9 adds the
support into the NFP driver. Furthermore, the last 2 patches are documentation
only.
Dirk van der Merwe (11):
devlink: extend 'fw_load_policy' values
devlink: add 'reset_dev_on_drv_probe' param
nfp: nsp: add support for fw_loaded command
nfp: nsp: add support for optional hwinfo lookup
nfp: nsp: add support for hwinfo set operation
nfp: honor FW reset and loading policies
nfp: add devlink param infrastructure
nfp: devlink: add 'fw_load_policy' support
nfp: devlink: add 'reset_dev_on_drv_probe' support
kdoc: fix nfp_fw_load documentation
Documentation: nfp: add nfp driver specific notes
.../networking/device_drivers/netronome/nfp.rst | 133 +++++++++++
Documentation/networking/devlink-params-nfp.txt | 5 +
Documentation/networking/devlink-params.txt | 16 ++
drivers/net/ethernet/netronome/nfp/Makefile | 1 +
drivers/net/ethernet/netronome/nfp/devlink_param.c | 253 +++++++++++++++++++++
drivers/net/ethernet/netronome/nfp/nfp_main.c | 141 +++++++++---
drivers/net/ethernet/netronome/nfp/nfp_main.h | 5 +
drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 7 +
.../net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c | 77 ++++++-
.../net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h | 29 +++
include/net/devlink.h | 4 +
include/uapi/linux/devlink.h | 8 +
net/core/devlink.c | 5 +
13 files changed, 654 insertions(+), 30 deletions(-)
create mode 100644 Documentation/networking/device_drivers/netronome/nfp.rst
create mode 100644 Documentation/networking/devlink-params-nfp.txt
create mode 100644 drivers/net/ethernet/netronome/nfp/devlink_param.c
--
2.11.0
^ permalink raw reply
* [net-next 01/11] devlink: extend 'fw_load_policy' values
From: Simon Horman @ 2019-09-06 16:00 UTC (permalink / raw)
To: David Miller
Cc: Jakub Kicinski, netdev, oss-drivers, Dirk van der Merwe,
Simon Horman
In-Reply-To: <20190906160101.14866-1-simon.horman@netronome.com>
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Add the 'disk' value to the generic 'fw_load_policy' devlink parameter.
This value indicates that firmware should always be loaded from disk
only.
Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
Documentation/networking/devlink-params.txt | 2 ++
include/uapi/linux/devlink.h | 1 +
2 files changed, 3 insertions(+)
diff --git a/Documentation/networking/devlink-params.txt b/Documentation/networking/devlink-params.txt
index 2d26434ddcf8..fadb5436188d 100644
--- a/Documentation/networking/devlink-params.txt
+++ b/Documentation/networking/devlink-params.txt
@@ -48,4 +48,6 @@ fw_load_policy [DEVICE, GENERIC]
Load firmware version preferred by the driver.
* DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH (1)
Load firmware currently stored in flash.
+ * DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK (2)
+ Load firmware currently available on host's disk.
Type: u8
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 546e75dd74ac..c25cc29a6647 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -202,6 +202,7 @@ enum devlink_param_cmode {
enum devlink_param_fw_load_policy_value {
DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER,
DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH,
+ DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK,
};
enum {
--
2.11.0
^ permalink raw reply related
* [net-next 02/11] devlink: add 'reset_dev_on_drv_probe' param
From: Simon Horman @ 2019-09-06 16:00 UTC (permalink / raw)
To: David Miller
Cc: Jakub Kicinski, netdev, oss-drivers, Dirk van der Merwe,
Simon Horman
In-Reply-To: <20190906160101.14866-1-simon.horman@netronome.com>
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Add the 'reset_dev_on_drv_probe' devlink parameter, controlling the
device reset policy on driver probe.
This parameter is useful in conjunction with the existing
'fw_load_policy' parameter.
Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
Documentation/networking/devlink-params.txt | 14 ++++++++++++++
include/net/devlink.h | 4 ++++
include/uapi/linux/devlink.h | 7 +++++++
net/core/devlink.c | 5 +++++
4 files changed, 30 insertions(+)
diff --git a/Documentation/networking/devlink-params.txt b/Documentation/networking/devlink-params.txt
index fadb5436188d..f9e30d686243 100644
--- a/Documentation/networking/devlink-params.txt
+++ b/Documentation/networking/devlink-params.txt
@@ -51,3 +51,17 @@ fw_load_policy [DEVICE, GENERIC]
* DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK (2)
Load firmware currently available on host's disk.
Type: u8
+
+reset_dev_on_drv_probe [DEVICE, GENERIC]
+ Controls the device's reset policy on driver probe.
+ Valid values:
+ * DEVLINK_PARAM_RESET_DEV_VALUE_UNKNOWN (0)
+ Unknown or invalid value.
+ * DEVLINK_PARAM_RESET_DEV_VALUE_ALWAYS (1)
+ Always reset device on driver probe.
+ * DEVLINK_PARAM_RESET_DEV_VALUE_NEVER (2)
+ Never reset device on driver probe.
+ * DEVLINK_PARAM_RESET_DEV_VALUE_DISK (3)
+ Reset only if device firmware can be found in the
+ filesystem.
+ Type: u8
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 460bc629d1a4..d880de5b8d3a 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -398,6 +398,7 @@ enum devlink_param_generic_id {
DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
+ DEVLINK_PARAM_GENERIC_ID_RESET_DEV,
/* add new param generic ids above here*/
__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -428,6 +429,9 @@ enum devlink_param_generic_id {
#define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME "fw_load_policy"
#define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE DEVLINK_PARAM_TYPE_U8
+#define DEVLINK_PARAM_GENERIC_RESET_DEV_NAME "reset_dev_on_drv_probe"
+#define DEVLINK_PARAM_GENERIC_RESET_DEV_TYPE DEVLINK_PARAM_TYPE_U8
+
#define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \
{ \
.id = DEVLINK_PARAM_GENERIC_ID_##_id, \
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index c25cc29a6647..3172d1b3329f 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -205,6 +205,13 @@ enum devlink_param_fw_load_policy_value {
DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK,
};
+enum devlink_param_reset_dev_value {
+ DEVLINK_PARAM_RESET_DEV_VALUE_UNKNOWN,
+ DEVLINK_PARAM_RESET_DEV_VALUE_ALWAYS,
+ DEVLINK_PARAM_RESET_DEV_VALUE_NEVER,
+ DEVLINK_PARAM_RESET_DEV_VALUE_DISK,
+};
+
enum {
DEVLINK_ATTR_STATS_RX_PACKETS, /* u64 */
DEVLINK_ATTR_STATS_RX_BYTES, /* u64 */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 6e52d639dac6..e8bc96f104a7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2852,6 +2852,11 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
.type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
},
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_RESET_DEV,
+ .name = DEVLINK_PARAM_GENERIC_RESET_DEV_NAME,
+ .type = DEVLINK_PARAM_GENERIC_RESET_DEV_TYPE,
+ },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
--
2.11.0
^ permalink raw reply related
* [net-next 04/11] nfp: nsp: add support for optional hwinfo lookup
From: Simon Horman @ 2019-09-06 16:00 UTC (permalink / raw)
To: David Miller
Cc: Jakub Kicinski, netdev, oss-drivers, Dirk van der Merwe,
Simon Horman
In-Reply-To: <20190906160101.14866-1-simon.horman@netronome.com>
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
There are cases where we want to read a hwinfo entry from the NFP, and
if it doesn't exist, use a default value instead.
To support this, we must silence warning/error messages when the hwinfo
entry doesn't exist since this is a valid use case. The NSP command
structure provides the ability to silence command errors, in which case
the caller should log any command errors appropriately. Protocol errors
are unaffected by this.
Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
.../net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c | 52 ++++++++++++++++++++--
.../net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h | 2 +
2 files changed, 50 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
index b4c5dc5f7404..deee91cbf1b2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
@@ -144,6 +144,8 @@ struct nfp_nsp {
* @option: NFP SP Command Argument
* @buf: NFP SP Buffer Address
* @error_cb: Callback for interpreting option if error occurred
+ * @error_quiet:Don't print command error/warning. Protocol errors are still
+ * logged.
*/
struct nfp_nsp_command_arg {
u16 code;
@@ -152,6 +154,7 @@ struct nfp_nsp_command_arg {
u32 option;
u64 buf;
void (*error_cb)(struct nfp_nsp *state, u32 ret_val);
+ bool error_quiet;
};
/**
@@ -406,8 +409,10 @@ __nfp_nsp_command(struct nfp_nsp *state, const struct nfp_nsp_command_arg *arg)
err = FIELD_GET(NSP_STATUS_RESULT, reg);
if (err) {
- nfp_warn(cpp, "Result (error) code set: %d (%d) command: %d\n",
- -err, (int)ret_val, arg->code);
+ if (!arg->error_quiet)
+ nfp_warn(cpp, "Result (error) code set: %d (%d) command: %d\n",
+ -err, (int)ret_val, arg->code);
+
if (arg->error_cb)
arg->error_cb(state, ret_val);
else
@@ -892,12 +897,14 @@ int nfp_nsp_load_stored_fw(struct nfp_nsp *state)
}
static int
-__nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size)
+__nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size,
+ bool optional)
{
struct nfp_nsp_command_buf_arg hwinfo_lookup = {
{
.code = SPCODE_HWINFO_LOOKUP,
.option = size,
+ .error_quiet = optional,
},
.in_buf = buf,
.in_size = size,
@@ -914,7 +921,7 @@ int nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size)
size = min_t(u32, size, NFP_HWINFO_LOOKUP_SIZE);
- err = __nfp_nsp_hwinfo_lookup(state, buf, size);
+ err = __nfp_nsp_hwinfo_lookup(state, buf, size, false);
if (err)
return err;
@@ -926,6 +933,43 @@ int nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size)
return 0;
}
+int nfp_nsp_hwinfo_lookup_optional(struct nfp_nsp *state, void *buf,
+ unsigned int size, const char *default_val)
+{
+ int err;
+
+ /* Ensure that the default value is usable irrespective of whether
+ * it is actually going to be used.
+ */
+ if (strnlen(default_val, size) == size)
+ return -EINVAL;
+
+ if (!nfp_nsp_has_hwinfo_lookup(state)) {
+ strcpy(buf, default_val);
+ return 0;
+ }
+
+ size = min_t(u32, size, NFP_HWINFO_LOOKUP_SIZE);
+
+ err = __nfp_nsp_hwinfo_lookup(state, buf, size, true);
+ if (err) {
+ if (err == -ENOENT) {
+ strcpy(buf, default_val);
+ return 0;
+ }
+
+ nfp_err(state->cpp, "NSP HWinfo lookup failed: %d\n", err);
+ return err;
+ }
+
+ if (strnlen(buf, size) == size) {
+ nfp_err(state->cpp, "NSP HWinfo value not NULL-terminated\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int nfp_nsp_fw_loaded(struct nfp_nsp *state)
{
const struct nfp_nsp_command_arg arg = {
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
index 4ceecff347c6..a8985c2eb1f1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
@@ -22,6 +22,8 @@ int nfp_nsp_write_flash(struct nfp_nsp *state, const struct firmware *fw);
int nfp_nsp_mac_reinit(struct nfp_nsp *state);
int nfp_nsp_load_stored_fw(struct nfp_nsp *state);
int nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size);
+int nfp_nsp_hwinfo_lookup_optional(struct nfp_nsp *state, void *buf,
+ unsigned int size, const char *default_val);
int nfp_nsp_fw_loaded(struct nfp_nsp *state);
int nfp_nsp_read_module_eeprom(struct nfp_nsp *state, int eth_index,
unsigned int offset, void *data,
--
2.11.0
^ permalink raw reply related
* [net-next 05/11] nfp: nsp: add support for hwinfo set operation
From: Simon Horman @ 2019-09-06 16:00 UTC (permalink / raw)
To: David Miller
Cc: Jakub Kicinski, netdev, oss-drivers, Dirk van der Merwe,
Simon Horman
In-Reply-To: <20190906160101.14866-1-simon.horman@netronome.com>
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Add support for the NSP HWinfo set command. This closely follows the
HWinfo lookup command.
Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c | 15 +++++++++++++++
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h | 6 ++++++
2 files changed, 21 insertions(+)
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
index deee91cbf1b2..f18e787fa9ad 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
@@ -96,6 +96,7 @@ enum nfp_nsp_cmd {
SPCODE_NSP_IDENTIFY = 13, /* Read NSP version */
SPCODE_FW_STORED = 16, /* If no FW loaded, load flash app FW */
SPCODE_HWINFO_LOOKUP = 17, /* Lookup HWinfo with overwrites etc. */
+ SPCODE_HWINFO_SET = 18, /* Set HWinfo entry */
SPCODE_FW_LOADED = 19, /* Is application firmware loaded */
SPCODE_VERSIONS = 21, /* Report FW versions */
SPCODE_READ_SFF_EEPROM = 22, /* Read module EEPROM */
@@ -970,6 +971,20 @@ int nfp_nsp_hwinfo_lookup_optional(struct nfp_nsp *state, void *buf,
return 0;
}
+int nfp_nsp_hwinfo_set(struct nfp_nsp *state, void *buf, unsigned int size)
+{
+ struct nfp_nsp_command_buf_arg hwinfo_set = {
+ {
+ .code = SPCODE_HWINFO_SET,
+ .option = size,
+ },
+ .in_buf = buf,
+ .in_size = size,
+ };
+
+ return nfp_nsp_command_buf(state, &hwinfo_set);
+}
+
int nfp_nsp_fw_loaded(struct nfp_nsp *state)
{
const struct nfp_nsp_command_arg arg = {
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
index a8985c2eb1f1..055fda05880d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
@@ -24,6 +24,7 @@ int nfp_nsp_load_stored_fw(struct nfp_nsp *state);
int nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size);
int nfp_nsp_hwinfo_lookup_optional(struct nfp_nsp *state, void *buf,
unsigned int size, const char *default_val);
+int nfp_nsp_hwinfo_set(struct nfp_nsp *state, void *buf, unsigned int size);
int nfp_nsp_fw_loaded(struct nfp_nsp *state);
int nfp_nsp_read_module_eeprom(struct nfp_nsp *state, int eth_index,
unsigned int offset, void *data,
@@ -44,6 +45,11 @@ static inline bool nfp_nsp_has_hwinfo_lookup(struct nfp_nsp *state)
return nfp_nsp_get_abi_ver_minor(state) > 24;
}
+static inline bool nfp_nsp_has_hwinfo_set(struct nfp_nsp *state)
+{
+ return nfp_nsp_get_abi_ver_minor(state) > 25;
+}
+
static inline bool nfp_nsp_has_fw_loaded(struct nfp_nsp *state)
{
return nfp_nsp_get_abi_ver_minor(state) > 25;
--
2.11.0
^ permalink raw reply related
* [net-next 03/11] nfp: nsp: add support for fw_loaded command
From: Simon Horman @ 2019-09-06 16:00 UTC (permalink / raw)
To: David Miller
Cc: Jakub Kicinski, netdev, oss-drivers, Dirk van der Merwe,
Simon Horman
In-Reply-To: <20190906160101.14866-1-simon.horman@netronome.com>
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Add support for the simple command that indicates whether application
firmware is loaded.
Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c | 10 ++++++++++
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h | 6 ++++++
2 files changed, 16 insertions(+)
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
index 9a08623c325d..b4c5dc5f7404 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
@@ -96,6 +96,7 @@ enum nfp_nsp_cmd {
SPCODE_NSP_IDENTIFY = 13, /* Read NSP version */
SPCODE_FW_STORED = 16, /* If no FW loaded, load flash app FW */
SPCODE_HWINFO_LOOKUP = 17, /* Lookup HWinfo with overwrites etc. */
+ SPCODE_FW_LOADED = 19, /* Is application firmware loaded */
SPCODE_VERSIONS = 21, /* Report FW versions */
SPCODE_READ_SFF_EEPROM = 22, /* Read module EEPROM */
};
@@ -925,6 +926,15 @@ int nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size)
return 0;
}
+int nfp_nsp_fw_loaded(struct nfp_nsp *state)
+{
+ const struct nfp_nsp_command_arg arg = {
+ .code = SPCODE_FW_LOADED,
+ };
+
+ return __nfp_nsp_command(state, &arg);
+}
+
int nfp_nsp_versions(struct nfp_nsp *state, void *buf, unsigned int size)
{
struct nfp_nsp_command_buf_arg versions = {
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
index 22ee6985ee1c..4ceecff347c6 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
@@ -22,6 +22,7 @@ int nfp_nsp_write_flash(struct nfp_nsp *state, const struct firmware *fw);
int nfp_nsp_mac_reinit(struct nfp_nsp *state);
int nfp_nsp_load_stored_fw(struct nfp_nsp *state);
int nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size);
+int nfp_nsp_fw_loaded(struct nfp_nsp *state);
int nfp_nsp_read_module_eeprom(struct nfp_nsp *state, int eth_index,
unsigned int offset, void *data,
unsigned int len, unsigned int *read_len);
@@ -41,6 +42,11 @@ static inline bool nfp_nsp_has_hwinfo_lookup(struct nfp_nsp *state)
return nfp_nsp_get_abi_ver_minor(state) > 24;
}
+static inline bool nfp_nsp_has_fw_loaded(struct nfp_nsp *state)
+{
+ return nfp_nsp_get_abi_ver_minor(state) > 25;
+}
+
static inline bool nfp_nsp_has_versions(struct nfp_nsp *state)
{
return nfp_nsp_get_abi_ver_minor(state) > 27;
--
2.11.0
^ permalink raw reply related
* [net-next 06/11] nfp: honor FW reset and loading policies
From: Simon Horman @ 2019-09-06 16:00 UTC (permalink / raw)
To: David Miller
Cc: Jakub Kicinski, netdev, oss-drivers, Dirk van der Merwe,
Simon Horman
In-Reply-To: <20190906160101.14866-1-simon.horman@netronome.com>
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
The firmware reset and loading policies can be controlled with the
combination of three hwinfo keys, 'abi_drv_reset', 'abi_drv_load_ifc'
and 'app_fw_from_flash'.
'app_fw_from_flash' defines which firmware should take precedence,
'Disk', 'Flash' or the 'Preferred' firmware. When 'Preferred'
is selected, the management firmware makes the decision on which
firmware will be loaded by comparing versions of the flash firmware
and the host supplied firmware.
'abi_drv_reset' defines when the driver should reset the firmware when
the driver is probed, either 'Disk' if firmware was found on disk,
'Always' reset or 'Never' reset. Note that the device is always reset
on driver unload if firmware was loaded when the driver was probed.
'abi_drv_load_ifc' defines a list of PF devices allowed to load FW on
the device.
Furthermore, we limit the cases to where the driver will unload firmware
again when the driver is removed to only when firmware was loaded by the
driver and only if this particular device was the only one that could
have loaded firmware. This is needed to avoid firmware being removed
while in use on multi-host platforms.
Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
drivers/net/ethernet/netronome/nfp/nfp_main.c | 139 +++++++++++++++++----
drivers/net/ethernet/netronome/nfp/nfp_main.h | 2 +
.../net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h | 15 +++
3 files changed, 131 insertions(+), 25 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 81679647e842..0d8649024505 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -352,7 +352,7 @@ nfp_net_fw_request(struct pci_dev *pdev, struct nfp_pf *pf, const char *name)
err = request_firmware_direct(&fw, name, &pdev->dev);
nfp_info(pf->cpp, " %s: %s\n",
- name, err ? "not found" : "found, loading...");
+ name, err ? "not found" : "found");
if (err)
return NULL;
@@ -430,6 +430,33 @@ nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf)
return nfp_net_fw_request(pdev, pf, fw_name);
}
+static int
+nfp_get_fw_policy_value(struct pci_dev *pdev, struct nfp_nsp *nsp,
+ const char *key, const char *default_val, int max_val,
+ int *value)
+{
+ char hwinfo[64];
+ long hi_val;
+ int err;
+
+ snprintf(hwinfo, sizeof(hwinfo), key);
+ err = nfp_nsp_hwinfo_lookup_optional(nsp, hwinfo, sizeof(hwinfo),
+ default_val);
+ if (err)
+ return err;
+
+ err = kstrtol(hwinfo, 0, &hi_val);
+ if (err || hi_val < 0 || hi_val > max_val) {
+ dev_warn(&pdev->dev,
+ "Invalid value '%s' from '%s', ignoring\n",
+ hwinfo, key);
+ err = kstrtol(default_val, 0, &hi_val);
+ }
+
+ *value = hi_val;
+ return err;
+}
+
/**
* nfp_net_fw_load() - Load the firmware image
* @pdev: PCI Device structure
@@ -441,44 +468,106 @@ nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf)
static int
nfp_fw_load(struct pci_dev *pdev, struct nfp_pf *pf, struct nfp_nsp *nsp)
{
- const struct firmware *fw;
+ bool do_reset, fw_loaded = false;
+ const struct firmware *fw = NULL;
+ int err, reset, policy, ifcs = 0;
+ char *token, *ptr;
+ char hwinfo[64];
u16 interface;
- int err;
+
+ snprintf(hwinfo, sizeof(hwinfo), "abi_drv_load_ifc");
+ err = nfp_nsp_hwinfo_lookup_optional(nsp, hwinfo, sizeof(hwinfo),
+ NFP_NSP_DRV_LOAD_IFC_DEFAULT);
+ if (err)
+ return err;
interface = nfp_cpp_interface(pf->cpp);
- if (NFP_CPP_INTERFACE_UNIT_of(interface) != 0) {
- /* Only Unit 0 should reset or load firmware */
+ ptr = hwinfo;
+ while ((token = strsep(&ptr, ","))) {
+ unsigned long interface_hi;
+
+ err = kstrtoul(token, 0, &interface_hi);
+ if (err) {
+ dev_err(&pdev->dev,
+ "Failed to parse interface '%s': %d\n",
+ token, err);
+ return err;
+ }
+
+ ifcs++;
+ if (interface == interface_hi)
+ break;
+ }
+
+ if (!token) {
dev_info(&pdev->dev, "Firmware will be loaded by partner\n");
return 0;
}
+ err = nfp_get_fw_policy_value(pdev, nsp, "abi_drv_reset",
+ NFP_NSP_DRV_RESET_DEFAULT,
+ NFP_NSP_DRV_RESET_NEVER, &reset);
+ if (err)
+ return err;
+
+ err = nfp_get_fw_policy_value(pdev, nsp, "app_fw_from_flash",
+ NFP_NSP_APP_FW_LOAD_DEFAULT,
+ NFP_NSP_APP_FW_LOAD_PREF, &policy);
+ if (err)
+ return err;
+
fw = nfp_net_fw_find(pdev, pf);
- if (!fw) {
- if (nfp_nsp_has_stored_fw_load(nsp))
- nfp_nsp_load_stored_fw(nsp);
- return 0;
+ do_reset = reset == NFP_NSP_DRV_RESET_ALWAYS ||
+ (fw && reset == NFP_NSP_DRV_RESET_DISK);
+
+ if (do_reset) {
+ dev_info(&pdev->dev, "Soft-resetting the NFP\n");
+ err = nfp_nsp_device_soft_reset(nsp);
+ if (err < 0) {
+ dev_err(&pdev->dev,
+ "Failed to soft reset the NFP: %d\n", err);
+ goto exit_release_fw;
+ }
}
- dev_info(&pdev->dev, "Soft-reset, loading FW image\n");
- err = nfp_nsp_device_soft_reset(nsp);
- if (err < 0) {
- dev_err(&pdev->dev, "Failed to soft reset the NFP: %d\n",
- err);
- goto exit_release_fw;
- }
+ if (fw && policy != NFP_NSP_APP_FW_LOAD_FLASH) {
+ if (nfp_nsp_has_fw_loaded(nsp) && nfp_nsp_fw_loaded(nsp))
+ goto exit_release_fw;
- err = nfp_nsp_load_fw(nsp, fw);
- if (err < 0) {
- dev_err(&pdev->dev, "FW loading failed: %d\n", err);
- goto exit_release_fw;
+ err = nfp_nsp_load_fw(nsp, fw);
+ if (err < 0) {
+ dev_err(&pdev->dev, "FW loading failed: %d\n",
+ err);
+ goto exit_release_fw;
+ }
+ dev_info(&pdev->dev, "Finished loading FW image\n");
+ fw_loaded = true;
+ } else if (policy != NFP_NSP_APP_FW_LOAD_DISK &&
+ nfp_nsp_has_stored_fw_load(nsp)) {
+ /* Don't propagate this error to stick with legacy driver
+ * behavior, failure will be detected later during init.
+ */
+ if (!nfp_nsp_load_stored_fw(nsp))
+ dev_info(&pdev->dev, "Finished loading stored FW image\n");
+
+ /* Don't flag the fw_loaded in this case since other devices
+ * may reuse the firmware when configured this way
+ */
+ } else {
+ dev_warn(&pdev->dev, "Didn't load firmware, please update flash or reconfigure card\n");
}
- dev_info(&pdev->dev, "Finished loading FW image\n");
-
exit_release_fw:
release_firmware(fw);
- return err < 0 ? err : 1;
+ /* We don't want to unload firmware when other devices may still be
+ * dependent on it, which could be the case if there are multiple
+ * devices that could load firmware.
+ */
+ if (fw_loaded && ifcs == 1)
+ pf->unload_fw_on_remove = true;
+
+ return err < 0 ? err : fw_loaded;
}
static void
@@ -704,7 +793,7 @@ static int nfp_pci_probe(struct pci_dev *pdev,
err_fw_unload:
kfree(pf->rtbl);
nfp_mip_close(pf->mip);
- if (pf->fw_loaded)
+ if (pf->unload_fw_on_remove)
nfp_fw_unload(pf);
kfree(pf->eth_tbl);
kfree(pf->nspi);
@@ -744,7 +833,7 @@ static void __nfp_pci_shutdown(struct pci_dev *pdev, bool unload_fw)
vfree(pf->dumpspec);
kfree(pf->rtbl);
nfp_mip_close(pf->mip);
- if (unload_fw && pf->fw_loaded)
+ if (unload_fw && pf->unload_fw_on_remove)
nfp_fw_unload(pf);
destroy_workqueue(pf->wq);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h
index b7211f200d22..bd6450b0f23f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h
@@ -64,6 +64,7 @@ struct nfp_dumpspec {
* @limit_vfs: Number of VFs supported by firmware (~0 for PCI limit)
* @num_vfs: Number of SR-IOV VFs enabled
* @fw_loaded: Is the firmware loaded?
+ * @unload_fw_on_remove:Do we need to unload firmware on driver removal?
* @ctrl_vnic: Pointer to the control vNIC if available
* @mip: MIP handle
* @rtbl: RTsym table
@@ -110,6 +111,7 @@ struct nfp_pf {
unsigned int num_vfs;
bool fw_loaded;
+ bool unload_fw_on_remove;
struct nfp_net *ctrl_vnic;
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
index 055fda05880d..1531c1870020 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
@@ -102,6 +102,21 @@ enum nfp_eth_fec {
#define NFP_FEC_REED_SOLOMON BIT(NFP_FEC_REED_SOLOMON_BIT)
#define NFP_FEC_DISABLED BIT(NFP_FEC_DISABLED_BIT)
+/* Defines the valid values of the 'abi_drv_reset' hwinfo key */
+#define NFP_NSP_DRV_RESET_DISK 0
+#define NFP_NSP_DRV_RESET_ALWAYS 1
+#define NFP_NSP_DRV_RESET_NEVER 2
+#define NFP_NSP_DRV_RESET_DEFAULT "0"
+
+/* Defines the valid values of the 'app_fw_from_flash' hwinfo key */
+#define NFP_NSP_APP_FW_LOAD_DISK 0
+#define NFP_NSP_APP_FW_LOAD_FLASH 1
+#define NFP_NSP_APP_FW_LOAD_PREF 2
+#define NFP_NSP_APP_FW_LOAD_DEFAULT "2"
+
+/* Define the default value for the 'abi_drv_load_ifc' key */
+#define NFP_NSP_DRV_LOAD_IFC_DEFAULT "0x10ff"
+
/**
* struct nfp_eth_table - ETH table information
* @count: number of table entries
--
2.11.0
^ permalink raw reply related
* [net-next 08/11] nfp: devlink: add 'fw_load_policy' support
From: Simon Horman @ 2019-09-06 16:00 UTC (permalink / raw)
To: David Miller
Cc: Jakub Kicinski, netdev, oss-drivers, Dirk van der Merwe,
Simon Horman
In-Reply-To: <20190906160101.14866-1-simon.horman@netronome.com>
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Add support for the 'fw_load_policy' devlink parameter. The FW load
policy is controlled by the 'app_fw_from_flash' hwinfo key.
Remap the values from devlink to the hwinfo key and back.
Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
Documentation/networking/devlink-params-nfp.txt | 2 +
drivers/net/ethernet/netronome/nfp/devlink_param.c | 165 +++++++++++++++++++++
2 files changed, 167 insertions(+)
create mode 100644 Documentation/networking/devlink-params-nfp.txt
diff --git a/Documentation/networking/devlink-params-nfp.txt b/Documentation/networking/devlink-params-nfp.txt
new file mode 100644
index 000000000000..85b1e36f73a8
--- /dev/null
+++ b/Documentation/networking/devlink-params-nfp.txt
@@ -0,0 +1,2 @@
+fw_load_policy [DEVICE, GENERIC]
+ Configuration mode: permanent
diff --git a/drivers/net/ethernet/netronome/nfp/devlink_param.c b/drivers/net/ethernet/netronome/nfp/devlink_param.c
index aece98586e32..d9c74cfba560 100644
--- a/drivers/net/ethernet/netronome/nfp/devlink_param.c
+++ b/drivers/net/ethernet/netronome/nfp/devlink_param.c
@@ -3,10 +3,175 @@
#include <net/devlink.h>
+#include "nfpcore/nfp.h"
#include "nfpcore/nfp_nsp.h"
#include "nfp_main.h"
+/**
+ * struct nfp_devlink_param_u8_arg - Devlink u8 parameter get/set arguments
+ * @hwinfo_name: HWinfo key name
+ * @default_hi_val: Default HWinfo value if HWinfo doesn't exist
+ * @invalid_dl_val: Devlink value to use if HWinfo is unknown/invalid.
+ * -errno if there is no unknown/invalid value available
+ * @hi_to_dl: HWinfo to devlink value mapping
+ * @dl_to_hi: Devlink to hwinfo value mapping
+ * @max_dl_val: Maximum devlink value supported, for validation only
+ * @max_hi_val: Maximum HWinfo value supported, for validation only
+ */
+struct nfp_devlink_param_u8_arg {
+ const char *hwinfo_name;
+ const char *default_hi_val;
+ int invalid_dl_val;
+ u8 hi_to_dl[4];
+ u8 dl_to_hi[4];
+ u8 max_dl_val;
+ u8 max_hi_val;
+};
+
+static const struct nfp_devlink_param_u8_arg nfp_devlink_u8_args[] = {
+ [DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY] = {
+ .hwinfo_name = "app_fw_from_flash",
+ .default_hi_val = NFP_NSP_APP_FW_LOAD_DEFAULT,
+ .invalid_dl_val = -EINVAL,
+ .hi_to_dl = {
+ [NFP_NSP_APP_FW_LOAD_DISK] =
+ DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK,
+ [NFP_NSP_APP_FW_LOAD_FLASH] =
+ DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH,
+ [NFP_NSP_APP_FW_LOAD_PREF] =
+ DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER,
+ },
+ .dl_to_hi = {
+ [DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER] =
+ NFP_NSP_APP_FW_LOAD_PREF,
+ [DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH] =
+ NFP_NSP_APP_FW_LOAD_FLASH,
+ [DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK] =
+ NFP_NSP_APP_FW_LOAD_DISK,
+ },
+ .max_dl_val = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK,
+ .max_hi_val = NFP_NSP_APP_FW_LOAD_PREF,
+ },
+};
+
+static int
+nfp_devlink_param_u8_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ const struct nfp_devlink_param_u8_arg *arg;
+ struct nfp_pf *pf = devlink_priv(devlink);
+ struct nfp_nsp *nsp;
+ char hwinfo[32];
+ long value;
+ int err;
+
+ if (id >= ARRAY_SIZE(nfp_devlink_u8_args))
+ return -EOPNOTSUPP;
+
+ arg = &nfp_devlink_u8_args[id];
+
+ nsp = nfp_nsp_open(pf->cpp);
+ if (IS_ERR(nsp)) {
+ err = PTR_ERR(nsp);
+ nfp_warn(pf->cpp, "can't access NSP: %d\n", err);
+ return err;
+ }
+
+ snprintf(hwinfo, sizeof(hwinfo), arg->hwinfo_name);
+ err = nfp_nsp_hwinfo_lookup_optional(nsp, hwinfo, sizeof(hwinfo),
+ arg->default_hi_val);
+ if (err) {
+ nfp_warn(pf->cpp, "HWinfo lookup failed: %d\n", err);
+ goto exit_close_nsp;
+ }
+
+ err = kstrtol(hwinfo, 0, &value);
+ if (err || value < 0 || value > arg->max_hi_val) {
+ nfp_warn(pf->cpp, "HWinfo '%s' value %li invalid\n",
+ arg->hwinfo_name, value);
+
+ if (arg->invalid_dl_val >= 0)
+ ctx->val.vu8 = arg->invalid_dl_val;
+ else
+ err = arg->invalid_dl_val;
+
+ goto exit_close_nsp;
+ }
+
+ ctx->val.vu8 = arg->hi_to_dl[value];
+
+exit_close_nsp:
+ nfp_nsp_close(nsp);
+ return err;
+}
+
+static int
+nfp_devlink_param_u8_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ const struct nfp_devlink_param_u8_arg *arg;
+ struct nfp_pf *pf = devlink_priv(devlink);
+ struct nfp_nsp *nsp;
+ char hwinfo[32];
+ int err;
+
+ if (id >= ARRAY_SIZE(nfp_devlink_u8_args))
+ return -EOPNOTSUPP;
+
+ arg = &nfp_devlink_u8_args[id];
+
+ nsp = nfp_nsp_open(pf->cpp);
+ if (IS_ERR(nsp)) {
+ err = PTR_ERR(nsp);
+ nfp_warn(pf->cpp, "can't access NSP: %d\n", err);
+ return err;
+ }
+
+ /* Note the value has already been validated. */
+ snprintf(hwinfo, sizeof(hwinfo), "%s=%u",
+ arg->hwinfo_name, arg->dl_to_hi[ctx->val.vu8]);
+ err = nfp_nsp_hwinfo_set(nsp, hwinfo, sizeof(hwinfo));
+ if (err) {
+ nfp_warn(pf->cpp, "HWinfo set failed: %d\n", err);
+ goto exit_close_nsp;
+ }
+
+exit_close_nsp:
+ nfp_nsp_close(nsp);
+ return err;
+}
+
+static int
+nfp_devlink_param_u8_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ const struct nfp_devlink_param_u8_arg *arg;
+
+ if (id >= ARRAY_SIZE(nfp_devlink_u8_args))
+ return -EOPNOTSUPP;
+
+ arg = &nfp_devlink_u8_args[id];
+
+ if (val.vu8 > arg->max_dl_val) {
+ NL_SET_ERR_MSG_MOD(extack, "parameter out of range");
+ return -EINVAL;
+ }
+
+ if (val.vu8 == arg->invalid_dl_val) {
+ NL_SET_ERR_MSG_MOD(extack, "unknown/invalid value specified");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static const struct devlink_param nfp_devlink_params[] = {
+ DEVLINK_PARAM_GENERIC(FW_LOAD_POLICY,
+ BIT(DEVLINK_PARAM_CMODE_PERMANENT),
+ nfp_devlink_param_u8_get,
+ nfp_devlink_param_u8_set,
+ nfp_devlink_param_u8_validate),
};
static int nfp_devlink_supports_params(struct nfp_pf *pf)
--
2.11.0
^ permalink raw reply related
* [net-next 11/11] Documentation: nfp: add nfp driver specific notes
From: Simon Horman @ 2019-09-06 16:01 UTC (permalink / raw)
To: David Miller
Cc: Jakub Kicinski, netdev, oss-drivers, Dirk van der Merwe,
Simon Horman
In-Reply-To: <20190906160101.14866-1-simon.horman@netronome.com>
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
This adds the initial documentation for the NFP driver specific
documentation.
Right now, only basic information is provided about acquiring firmware
and configuring device firmware loading.
Original driver documentation can be found here:
https://github.com/Netronome/nfp-drv-kmods/blob/master/README.md
Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
.../networking/device_drivers/netronome/nfp.rst | 133 +++++++++++++++++++++
1 file changed, 133 insertions(+)
create mode 100644 Documentation/networking/device_drivers/netronome/nfp.rst
diff --git a/Documentation/networking/device_drivers/netronome/nfp.rst b/Documentation/networking/device_drivers/netronome/nfp.rst
new file mode 100644
index 000000000000..6c08ac8b5147
--- /dev/null
+++ b/Documentation/networking/device_drivers/netronome/nfp.rst
@@ -0,0 +1,133 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+=============================================
+Netronome Flow Processor (NFP) Kernel Drivers
+=============================================
+
+Copyright (c) 2019, Netronome Systems, Inc.
+
+Contents
+========
+
+- `Overview`_
+- `Acquiring Firmware`_
+
+Overview
+========
+
+This driver supports Netronome's line of Flow Processor devices,
+including the NFP4000, NFP5000, and NFP6000 models, which are also
+incorporated in the company's family of Agilio SmartNICs. The SR-IOV
+physical and virtual functions for these devices are supported by
+the driver.
+
+Acquiring Firmware
+==================
+
+The NFP4000 and NFP6000 devices require application specific firmware
+to function. Application firmware can be located either on the host file system
+or in the device flash (if supported by management firmware).
+
+Firmware files on the host filesystem contain card type (`AMDA-*` string), media
+config etc. They should be placed in `/lib/firmware/netronome` directory to
+load firmware from the host file system.
+
+Firmware for basic NIC operation is available in the upstream
+`linux-firmware.git` repository.
+
+Firmware in NVRAM
+-----------------
+
+Recent versions of management firmware supports loading application
+firmware from flash when the host driver gets probed. The firmware loading
+policy configuration may be used to configure this feature appropriately.
+
+Devlink or ethtool can be used to update the application firmware on the device
+flash by providing the appropriate `nic_AMDA*.nffw` file to the respective
+command. Users need to take care to write the correct firmware image for the
+card and media configuration to flash.
+
+Available storage space in flash depends on the card being used.
+
+Dealing with multiple projects
+------------------------------
+
+NFP hardware is fully programmable therefore there can be different
+firmware images targeting different applications.
+
+When using application firmware from host, we recommend placing
+actual firmware files in application-named subdirectories in
+`/lib/firmware/netronome` and linking the desired files, e.g.::
+
+ $ tree /lib/firmware/netronome/
+ /lib/firmware/netronome/
+ ├── bpf
+ │ ├── nic_AMDA0081-0001_1x40.nffw
+ │ └── nic_AMDA0081-0001_4x10.nffw
+ ├── flower
+ │ ├── nic_AMDA0081-0001_1x40.nffw
+ │ └── nic_AMDA0081-0001_4x10.nffw
+ ├── nic
+ │ ├── nic_AMDA0081-0001_1x40.nffw
+ │ └── nic_AMDA0081-0001_4x10.nffw
+ ├── nic_AMDA0081-0001_1x40.nffw -> bpf/nic_AMDA0081-0001_1x40.nffw
+ └── nic_AMDA0081-0001_4x10.nffw -> bpf/nic_AMDA0081-0001_4x10.nffw
+
+ 3 directories, 8 files
+
+You may need to use hard instead of symbolic links on distributions
+which use old `mkinitrd` command instead of `dracut` (e.g. Ubuntu).
+
+After changing firmware files you may need to regenerate the initramfs
+image. Initramfs contains drivers and firmware files your system may
+need to boot. Refer to the documentation of your distribution to find
+out how to update initramfs. Good indication of stale initramfs
+is system loading wrong driver or firmware on boot, but when driver is
+later reloaded manually everything works correctly.
+
+Selecting firmware per device
+-----------------------------
+
+Most commonly all cards on the system use the same type of firmware.
+If you want to load specific firmware image for a specific card, you
+can use either the PCI bus address or serial number. Driver will print
+which files it's looking for when it recognizes a NFP device::
+
+ nfp: Looking for firmware file in order of priority:
+ nfp: netronome/serial-00-12-34-aa-bb-cc-10-ff.nffw: not found
+ nfp: netronome/pci-0000:02:00.0.nffw: not found
+ nfp: netronome/nic_AMDA0081-0001_1x40.nffw: found, loading...
+
+In this case if file (or link) called *serial-00-12-34-aa-bb-5d-10-ff.nffw*
+or *pci-0000:02:00.0.nffw* is present in `/lib/firmware/netronome` this
+firmware file will take precedence over `nic_AMDA*` files.
+
+Note that `serial-*` and `pci-*` files are **not** automatically included
+in initramfs, you will have to refer to documentation of appropriate tools
+to find out how to include them.
+
+Firmware loading policy
+-----------------------
+
+Firmware loading policy is controlled via three HWinfo parameters
+stored as key value pairs in the device flash:
+
+app_fw_from_flash
+ Defines which firmware should take precedence, 'Disk' (0), 'Flash' (1) or
+ the 'Preferred' (2) firmware. When 'Preferred' is selected, the management
+ firmware makes the decision over which firmware will be loaded by comparing
+ versions of the flash firmware and the host supplied firmware.
+ This variable is configurable using the 'fw_load_policy'
+ devlink parameter.
+
+abi_drv_reset
+ Defines if the driver should reset the firmware when
+ the driver is probed, either 'Disk' (0) if firmware was found on disk,
+ 'Always' (1) reset or 'Never' (2) reset. Note that the device is always
+ reset on driver unload if firmware was loaded when the driver was probed.
+ This variable is configurable using the 'reset_dev_on_drv_probe'
+ devlink parameter.
+
+abi_drv_load_ifc
+ Defines a list of PF devices allowed to load FW on the device.
+ This variable is not currently user configurable.
--
2.11.0
^ permalink raw reply related
* [net-next 07/11] nfp: add devlink param infrastructure
From: Simon Horman @ 2019-09-06 16:00 UTC (permalink / raw)
To: David Miller
Cc: Jakub Kicinski, netdev, oss-drivers, Dirk van der Merwe,
Simon Horman
In-Reply-To: <20190906160101.14866-1-simon.horman@netronome.com>
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Register devlink parameters for driver use. Subsequent patches will add
support for specific parameters.
In order to support devlink parameters, the management firmware needs to
be able to lookup and set hwinfo keys.
Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
drivers/net/ethernet/netronome/nfp/Makefile | 1 +
drivers/net/ethernet/netronome/nfp/devlink_param.c | 60 ++++++++++++++++++++++
drivers/net/ethernet/netronome/nfp/nfp_main.h | 3 ++
drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 7 +++
4 files changed, 71 insertions(+)
create mode 100644 drivers/net/ethernet/netronome/nfp/devlink_param.c
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 2805641965f3..d31772ae511d 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -17,6 +17,7 @@ nfp-objs := \
nfpcore/nfp_target.o \
ccm.o \
ccm_mbox.o \
+ devlink_param.o \
nfp_asm.o \
nfp_app.o \
nfp_app_nic.o \
diff --git a/drivers/net/ethernet/netronome/nfp/devlink_param.c b/drivers/net/ethernet/netronome/nfp/devlink_param.c
new file mode 100644
index 000000000000..aece98586e32
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/devlink_param.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <net/devlink.h>
+
+#include "nfpcore/nfp_nsp.h"
+#include "nfp_main.h"
+
+static const struct devlink_param nfp_devlink_params[] = {
+};
+
+static int nfp_devlink_supports_params(struct nfp_pf *pf)
+{
+ struct nfp_nsp *nsp;
+ bool supported;
+ int err;
+
+ nsp = nfp_nsp_open(pf->cpp);
+ if (IS_ERR(nsp)) {
+ err = PTR_ERR(nsp);
+ dev_err(&pf->pdev->dev, "Failed to access the NSP: %d\n", err);
+ return err;
+ }
+
+ supported = nfp_nsp_has_hwinfo_lookup(nsp) &&
+ nfp_nsp_has_hwinfo_set(nsp);
+
+ nfp_nsp_close(nsp);
+ return supported;
+}
+
+int nfp_devlink_params_register(struct nfp_pf *pf)
+{
+ struct devlink *devlink = priv_to_devlink(pf);
+ int err;
+
+ err = nfp_devlink_supports_params(pf);
+ if (err <= 0)
+ return err;
+
+ err = devlink_params_register(devlink, nfp_devlink_params,
+ ARRAY_SIZE(nfp_devlink_params));
+ if (err)
+ return err;
+
+ devlink_params_publish(devlink);
+ return 0;
+}
+
+void nfp_devlink_params_unregister(struct nfp_pf *pf)
+{
+ int err;
+
+ err = nfp_devlink_supports_params(pf);
+ if (err <= 0)
+ return;
+
+ devlink_params_unregister(priv_to_devlink(pf), nfp_devlink_params,
+ ARRAY_SIZE(nfp_devlink_params));
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h
index bd6450b0f23f..5d5812fd9317 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h
@@ -187,4 +187,7 @@ int nfp_shared_buf_pool_get(struct nfp_pf *pf, unsigned int sb, u16 pool_index,
int nfp_shared_buf_pool_set(struct nfp_pf *pf, unsigned int sb,
u16 pool_index, u32 size,
enum devlink_sb_threshold_type threshold_type);
+
+int nfp_devlink_params_register(struct nfp_pf *pf);
+void nfp_devlink_params_unregister(struct nfp_pf *pf);
#endif /* NFP_MAIN_H */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 986464d4a206..47addac104fe 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -711,6 +711,10 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
if (err)
goto err_devlink_unreg;
+ err = nfp_devlink_params_register(pf);
+ if (err)
+ goto err_shared_buf_unreg;
+
mutex_lock(&pf->lock);
pf->ddir = nfp_net_debugfs_device_add(pf->pdev);
@@ -744,6 +748,8 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
err_clean_ddir:
nfp_net_debugfs_dir_clean(&pf->ddir);
mutex_unlock(&pf->lock);
+ nfp_devlink_params_unregister(pf);
+err_shared_buf_unreg:
nfp_shared_buf_unregister(pf);
err_devlink_unreg:
cancel_work_sync(&pf->port_refresh_work);
@@ -773,6 +779,7 @@ void nfp_net_pci_remove(struct nfp_pf *pf)
mutex_unlock(&pf->lock);
+ nfp_devlink_params_unregister(pf);
nfp_shared_buf_unregister(pf);
devlink_unregister(priv_to_devlink(pf));
--
2.11.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox