* [PATCH net 1/3] devlink: Fix param set handling for string type
From: Moshe Shemesh @ 2018-10-10 13:09 UTC (permalink / raw)
To: David S. Miller; +Cc: Jiri Pirko, netdev, linux-kernel, Moshe Shemesh
In-Reply-To: <1539176967-22172-1-git-send-email-moshe@mellanox.com>
In case devlink param type is string, it needs to copy the string value
it got from the input to devlink_param_value.
Fixes: e3b7ca18ad7b ("devlink: Add param set command")
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
---
include/net/devlink.h | 2 +-
net/core/devlink.c | 11 ++++++++---
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/include/net/devlink.h b/include/net/devlink.h
index b9b89d6..b0e17c0 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -311,7 +311,7 @@ enum devlink_param_type {
u8 vu8;
u16 vu16;
u32 vu32;
- const char *vstr;
+ char vstr[DEVLINK_PARAM_MAX_STRING_VALUE];
bool vbool;
};
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 8c0ed22..d808af7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2995,6 +2995,8 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
struct genl_info *info,
union devlink_param_value *value)
{
+ int len;
+
if (param->type != DEVLINK_PARAM_TYPE_BOOL &&
!info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])
return -EINVAL;
@@ -3010,10 +3012,13 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
break;
case DEVLINK_PARAM_TYPE_STRING:
- if (nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) >
- DEVLINK_PARAM_MAX_STRING_VALUE)
+ len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]),
+ nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
+ if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) ||
+ len >= DEVLINK_PARAM_MAX_STRING_VALUE)
return -EINVAL;
- value->vstr = nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ strcpy(value->vstr,
+ nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
break;
case DEVLINK_PARAM_TYPE_BOOL:
value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ?
--
1.8.3.1
^ permalink raw reply related
* [PATCH bpf-next v2 7/7] selftests/bpf: add test cases for queue and stack maps
From: Mauricio Vasquez B @ 2018-10-10 14:06 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev; +Cc: Song Liu
In-Reply-To: <153918035266.8915.12458090320382567494.stgit@kernel>
test_maps:
Tests that queue/stack maps are behaving correctly even in corner cases
test_progs:
Tests new ebpf helpers
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
tools/lib/bpf/bpf.c | 12 ++
tools/lib/bpf/bpf.h | 1
tools/testing/selftests/bpf/Makefile | 5 +
tools/testing/selftests/bpf/bpf_helpers.h | 7 +
tools/testing/selftests/bpf/test_maps.c | 122 ++++++++++++++++++++
tools/testing/selftests/bpf/test_progs.c | 99 ++++++++++++++++
tools/testing/selftests/bpf/test_queue_map.c | 4 +
tools/testing/selftests/bpf/test_queue_stack_map.h | 59 ++++++++++
tools/testing/selftests/bpf/test_stack_map.c | 4 +
9 files changed, 312 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
create mode 100644 tools/testing/selftests/bpf/test_stack_map.c
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index d70a255cb05e..ad2d41a6e3dd 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -278,6 +278,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value)
return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
}
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+
+ return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+}
+
int bpf_map_delete_elem(int fd, const void *key)
{
union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 87520a87a75f..57497185afaa 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -86,6 +86,7 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
__u64 flags);
int bpf_map_lookup_elem(int fd, const void *key, void *value);
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value);
int bpf_map_delete_elem(int fd, const void *key);
int bpf_map_get_next_key(int fd, const void *key, void *next_key);
int bpf_obj_pin(int fd, const char *pathname);
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index d24afe8b821d..710fc1356c87 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -37,7 +37,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
- test_sk_lookup_kern.o test_xdp_vlan.o
+ test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@@ -116,6 +116,9 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
+$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
+$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
+
BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index fda8c162d0df..6407a3df0f3b 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void *value,
(void *) BPF_FUNC_map_update_elem;
static int (*bpf_map_delete_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_map_push_elem)(void *map, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_map_push_elem;
+static int (*bpf_map_pop_elem)(void *map, void *value) =
+ (void *) BPF_FUNC_map_pop_elem;
+static int (*bpf_map_peek_elem)(void *map, void *value) =
+ (void *) BPF_FUNC_map_peek_elem;
static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
(void *) BPF_FUNC_probe_read;
static unsigned long long (*bpf_ktime_get_ns)(void) =
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 9b552c0fc47d..4db2116e52be 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -15,6 +15,7 @@
#include <string.h>
#include <assert.h>
#include <stdlib.h>
+#include <time.h>
#include <sys/wait.h>
#include <sys/socket.h>
@@ -471,6 +472,122 @@ static void test_devmap(int task, void *data)
close(fd);
}
+static void test_queuemap(int task, void *data)
+{
+ const int MAP_SIZE = 32;
+ __u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+ int fd, i;
+
+ /* Fill test values to be used */
+ for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++)
+ vals[i] = rand();
+
+ /* Invalid key size */
+ fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE,
+ map_flags);
+ assert(fd < 0 && errno == EINVAL);
+
+ fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE,
+ map_flags);
+ /* Queue map does not support BPF_F_NO_PREALLOC */
+ if (map_flags & BPF_F_NO_PREALLOC) {
+ assert(fd < 0 && errno == EINVAL);
+ return;
+ }
+ if (fd < 0) {
+ printf("Failed to create queuemap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ /* Push MAP_SIZE elements */
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
+
+ /* Check that element cannot be pushed due to max_entries limit */
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ errno == E2BIG);
+
+ /* Peek element */
+ assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[0]);
+
+ /* Replace half elements */
+ for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++)
+ assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0);
+
+ /* Pop all elements */
+ for (i = MAP_SIZE/2; i < MAP_SIZE + MAP_SIZE/2; i++)
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 &&
+ val == vals[i]);
+
+ /* Check that there are not elements left */
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ errno == ENOENT);
+
+ /* Check that non supported functions set errno to EINVAL */
+ assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+
+ close(fd);
+}
+
+static void test_stackmap(int task, void *data)
+{
+ const int MAP_SIZE = 32;
+ __u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+ int fd, i;
+
+ /* Fill test values to be used */
+ for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++)
+ vals[i] = rand();
+
+ /* Invalid key size */
+ fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE,
+ map_flags);
+ assert(fd < 0 && errno == EINVAL);
+
+ fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE,
+ map_flags);
+ /* Stack map does not support BPF_F_NO_PREALLOC */
+ if (map_flags & BPF_F_NO_PREALLOC) {
+ assert(fd < 0 && errno == EINVAL);
+ return;
+ }
+ if (fd < 0) {
+ printf("Failed to create stackmap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ /* Push MAP_SIZE elements */
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
+
+ /* Check that element cannot be pushed due to max_entries limit */
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ errno == E2BIG);
+
+ /* Peek element */
+ assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[i - 1]);
+
+ /* Replace half elements */
+ for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++)
+ assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0);
+
+ /* Pop all elements */
+ for (i = MAP_SIZE + MAP_SIZE/2 - 1; i >= MAP_SIZE/2; i--)
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 &&
+ val == vals[i]);
+
+ /* Check that there are not elements left */
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ errno == ENOENT);
+
+ /* Check that non supported functions set errno to EINVAL */
+ assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+
+ close(fd);
+}
+
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <arpa/inet.h>
@@ -1434,10 +1551,15 @@ static void run_all_tests(void)
test_map_wronly();
test_reuseport_array();
+
+ test_queuemap(0, NULL);
+ test_stackmap(0, NULL);
}
int main(void)
{
+ srand(time(NULL));
+
map_flags = 0;
run_all_tests();
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index e8becca9c521..2d3c04f45530 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1735,8 +1735,105 @@ static void test_reference_tracking()
bpf_object__close(obj);
}
+enum {
+ QUEUE,
+ STACK,
+};
+
+static void test_queue_stack_map(int type)
+{
+ const int MAP_SIZE = 32;
+ __u32 vals[MAP_SIZE], duration, retval, size, val;
+ int i, err, prog_fd, map_in_fd, map_out_fd;
+ char file[32], buf[128];
+ struct bpf_object *obj;
+ struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+
+ /* Fill test values to be used */
+ for (i = 0; i < MAP_SIZE; i++)
+ vals[i] = rand();
+
+ if (type == QUEUE)
+ strncpy(file, "./test_queue_map.o", sizeof(file));
+ else if (type == STACK)
+ strncpy(file, "./test_stack_map.o", sizeof(file));
+ else
+ return;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ map_in_fd = bpf_find_map(__func__, obj, "map_in");
+ if (map_in_fd < 0)
+ goto out;
+
+ map_out_fd = bpf_find_map(__func__, obj, "map_out");
+ if (map_out_fd < 0)
+ goto out;
+
+ /* Push 32 elements to the input map */
+ for (i = 0; i < MAP_SIZE; i++) {
+ err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0);
+ if (err) {
+ error_cnt++;
+ goto out;
+ }
+ }
+
+ /* The eBPF program pushes iph.saddr in the output map,
+ * pops the input map and saves this value in iph.daddr
+ */
+ for (i = 0; i < MAP_SIZE; i++) {
+ if (type == QUEUE) {
+ val = vals[i];
+ pkt_v4.iph.saddr = vals[i] * 5;
+ } else if (type == STACK) {
+ val = vals[MAP_SIZE - 1 - i];
+ pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5;
+ }
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+ if (err || retval || size != sizeof(pkt_v4) ||
+ iph->daddr != val)
+ break;
+ }
+
+ CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val,
+ "bpf_map_pop_elem",
+ "err %d errno %d retval %d size %d iph->daddr %u\n",
+ err, errno, retval, size, iph->daddr);
+
+ /* Queue is empty, program should return TC_ACT_SHOT */
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+ CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4),
+ "check-queue-stack-map-empty",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+
+ /* Check that the program pushed elements correctly */
+ for (i = 0; i < MAP_SIZE; i++) {
+ err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val);
+ if (err || val != vals[i] * 5)
+ break;
+ }
+
+ CHECK(i != MAP_SIZE && (err || val != vals[i] * 5),
+ "bpf_map_push_elem", "err %d value %u\n", err, val);
+
+out:
+ pkt_v4.iph.saddr = 0;
+ bpf_object__close(obj);
+}
+
int main(void)
{
+ srand(time(NULL));
+
jit_enabled = is_jit_enabled();
test_pkt_access();
@@ -1757,6 +1854,8 @@ int main(void)
test_task_fd_query_rawtp();
test_task_fd_query_tp();
test_reference_tracking();
+ test_queue_stack_map(QUEUE);
+ test_queue_stack_map(STACK);
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_queue_map.c b/tools/testing/selftests/bpf/test_queue_map.c
new file mode 100644
index 000000000000..87db1f9da33d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_queue_map.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Politecnico di Torino
+#define MAP_TYPE BPF_MAP_TYPE_QUEUE
+#include "test_queue_stack_map.h"
diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/test_queue_stack_map.h
new file mode 100644
index 000000000000..295b9b3bc5c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_queue_stack_map.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2018 Politecnico di Torino
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) map_in = {
+ .type = MAP_TYPE,
+ .key_size = 0,
+ .value_size = sizeof(__u32),
+ .max_entries = 32,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) map_out = {
+ .type = MAP_TYPE,
+ .key_size = 0,
+ .value_size = sizeof(__u32),
+ .max_entries = 32,
+ .map_flags = 0,
+};
+
+SEC("test")
+int _test(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth = (struct ethhdr *)(data);
+ __u32 value;
+ int err;
+
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ struct iphdr *iph = (struct iphdr *)(eth + 1);
+
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ err = bpf_map_pop_elem(&map_in, &value);
+ if (err)
+ return TC_ACT_SHOT;
+
+ iph->daddr = value;
+
+ err = bpf_map_push_elem(&map_out, &iph->saddr, 0);
+ if (err)
+ return TC_ACT_SHOT;
+
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_stack_map.c b/tools/testing/selftests/bpf/test_stack_map.c
new file mode 100644
index 000000000000..31c3880e6da0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stack_map.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Politecnico di Torino
+#define MAP_TYPE BPF_MAP_TYPE_STACK
+#include "test_queue_stack_map.h"
^ permalink raw reply related
* [PATCH bpf-next v2 6/7] Sync uapi/bpf.h to tools/include
From: Mauricio Vasquez B @ 2018-10-10 14:06 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev; +Cc: Song Liu
In-Reply-To: <153918035266.8915.12458090320382567494.stgit@kernel>
Sync both files.
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
tools/include/uapi/linux/bpf.h | 30 +++++++++++++++++++++++++++++-
1 file changed, 29 insertions(+), 1 deletion(-)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f9187b41dff6..c8824d5364ff 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+ BPF_MAP_LOOKUP_AND_DELETE_ELEM,
};
enum bpf_map_type {
@@ -128,6 +129,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ BPF_MAP_TYPE_QUEUE,
+ BPF_MAP_TYPE_STACK,
};
enum bpf_prog_type {
@@ -462,6 +465,28 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
* int bpf_probe_read(void *dst, u32 size, const void *src)
* Description
* For tracing programs, safely attempt to read *size* bytes from
@@ -2303,7 +2328,10 @@ union bpf_attr {
FN(skb_ancestor_cgroup_id), \
FN(sk_lookup_tcp), \
FN(sk_lookup_udp), \
- FN(sk_release),
+ FN(sk_release), \
+ FN(map_push_elem), \
+ FN(map_pop_elem), \
+ FN(map_peek_elem),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
^ permalink raw reply related
* [PATCH bpf-next v2 5/7] bpf: add queue and stack maps
From: Mauricio Vasquez B @ 2018-10-10 14:06 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev; +Cc: Song Liu
In-Reply-To: <153918035266.8915.12458090320382567494.stgit@kernel>
Queue/stack maps implement a FIFO/LIFO data storage for ebpf programs.
These maps support peek, pop and push operations that are exposed to eBPF
programs through the new bpf_map[peek/pop/push] helpers. Those operations
are exposed to userspace applications through the already existing
syscalls in the following way:
BPF_MAP_LOOKUP_ELEM -> peek
BPF_MAP_LOOKUP_AND_DELETE_ELEM -> pop
BPF_MAP_UPDATE_ELEM -> push
Queue/stack maps are implemented using a buffer, tail and head indexes,
hence BPF_F_NO_PREALLOC is not supported.
As opposite to other maps, queue and stack do not use RCU for protecting
maps values, the bpf_map[peek/pop] have a ARG_PTR_TO_UNINIT_MAP_VALUE
argument that is a pointer to a memory zone where to save the value of a
map. Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not
be passed as an extra argument.
Our main motivation for implementing queue/stack maps was to keep track
of a pool of elements, like network ports in a SNAT, however we forsee
other use cases, like for exampling saving last N kernel events in a map
and then analysing from userspace.
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
include/linux/bpf.h | 6 +
include/linux/bpf_types.h | 2
include/uapi/linux/bpf.h | 29 ++++
kernel/bpf/Makefile | 2
kernel/bpf/core.c | 3
kernel/bpf/helpers.c | 43 ++++++
kernel/bpf/queue_stack_maps.c | 288 +++++++++++++++++++++++++++++++++++++++++
kernel/bpf/syscall.c | 11 +-
kernel/bpf/verifier.c | 19 +++
net/core/filter.c | 6 +
10 files changed, 405 insertions(+), 4 deletions(-)
create mode 100644 kernel/bpf/queue_stack_maps.c
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e37b4986bb45..2c4854c2c2dc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,9 @@ struct bpf_map_ops {
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
+ int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+ int (*map_pop_elem)(struct bpf_map *map, void *value);
+ int (*map_peek_elem)(struct bpf_map *map, void *value);
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -827,6 +830,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 658509daacd4..a2ec73aa1ec7 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
#endif
#endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3bb94aa2d408..c8824d5364ff 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -129,6 +129,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ BPF_MAP_TYPE_QUEUE,
+ BPF_MAP_TYPE_STACK,
};
enum bpf_prog_type {
@@ -463,6 +465,28 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
* int bpf_probe_read(void *dst, u32 size, const void *src)
* Description
* For tracing programs, safely attempt to read *size* bytes from
@@ -2304,7 +2328,10 @@ union bpf_attr {
FN(skb_ancestor_cgroup_id), \
FN(sk_lookup_tcp), \
FN(sk_lookup_udp), \
- FN(sk_release),
+ FN(sk_release), \
+ FN(map_push_elem), \
+ FN(map_pop_elem), \
+ FN(map_peek_elem),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 0488b8258321..17afae9e65f3 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -3,7 +3,7 @@ obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
-obj-$(CONFIG_BPF_SYSCALL) += local_storage.o
+obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3f5bf1af0826..8d2db076d123 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1783,6 +1783,9 @@ BPF_CALL_0(bpf_user_rnd_u32)
const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
const struct bpf_func_proto bpf_map_update_elem_proto __weak;
const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
+const struct bpf_func_proto bpf_map_push_elem_proto __weak;
+const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
+const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6502115e8f55..ab0d5e3f9892 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -76,6 +76,49 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
.arg2_type = ARG_PTR_TO_MAP_KEY,
};
+BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
+{
+ return map->ops->map_push_elem(map, value, flags);
+}
+
+const struct bpf_func_proto bpf_map_push_elem_proto = {
+ .func = bpf_map_push_elem,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_MAP_VALUE,
+ .arg3_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
+{
+ return map->ops->map_pop_elem(map, value);
+}
+
+const struct bpf_func_proto bpf_map_pop_elem_proto = {
+ .func = bpf_map_pop_elem,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
+};
+
+BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
+{
+ return map->ops->map_peek_elem(map, value);
+}
+
+const struct bpf_func_proto bpf_map_peek_elem_proto = {
+ .func = bpf_map_pop_elem,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
+};
+
const struct bpf_func_proto bpf_get_prandom_u32_proto = {
.func = bpf_user_rnd_u32,
.gpl_only = false,
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
new file mode 100644
index 000000000000..12a93fb37449
--- /dev/null
+++ b/kernel/bpf/queue_stack_maps.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * queue_stack_maps.c: BPF queue and stack maps
+ *
+ * Copyright (c) 2018 Politecnico di Torino
+ */
+#include <linux/bpf.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include "percpu_freelist.h"
+
+#define QUEUE_STACK_CREATE_FLAG_MASK \
+ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
+
+struct bpf_queue_stack {
+ struct bpf_map map;
+ raw_spinlock_t lock;
+ u32 head, tail;
+ u32 size; /* max_entries + 1 */
+
+ char elements[0] __aligned(8);
+};
+
+static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
+{
+ return container_of(map, struct bpf_queue_stack, map);
+}
+
+static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs)
+{
+ return qs->head == qs->tail;
+}
+
+static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
+{
+ u32 head = qs->head + 1;
+
+ if (unlikely(head >= qs->size))
+ head = 0;
+
+ return head == qs->tail;
+}
+
+/* Called from syscall */
+static int queue_stack_map_alloc_check(union bpf_attr *attr)
+{
+ /* check sanity of attributes */
+ if (attr->max_entries == 0 || attr->key_size != 0 ||
+ attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
+ return -EINVAL;
+
+ if (attr->value_size > KMALLOC_MAX_SIZE)
+ /* if value_size is bigger, the user space won't be able to
+ * access the elements.
+ */
+ return -E2BIG;
+
+ return 0;
+}
+
+static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
+{
+ int ret, numa_node = bpf_map_attr_numa_node(attr);
+ struct bpf_queue_stack *qs;
+ u32 size, value_size;
+ u64 queue_size, cost;
+
+ size = attr->max_entries + 1;
+ value_size = attr->value_size;
+
+ queue_size = sizeof(*qs) + (u64) value_size * size;
+
+ cost = queue_size;
+ if (cost >= U32_MAX - PAGE_SIZE)
+ return ERR_PTR(-E2BIG);
+
+ cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+ ret = bpf_map_precharge_memlock(cost);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ qs = bpf_map_area_alloc(queue_size, numa_node);
+ if (!qs)
+ return ERR_PTR(-ENOMEM);
+
+ memset(qs, 0, sizeof(*qs));
+
+ bpf_map_init_from_attr(&qs->map, attr);
+
+ qs->map.pages = cost;
+ qs->size = size;
+
+ raw_spin_lock_init(&qs->lock);
+
+ return &qs->map;
+}
+
+/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
+static void queue_stack_map_free(struct bpf_map *map)
+{
+ struct bpf_queue_stack *qs = bpf_queue_stack(map);
+
+ /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+ * so the programs (can be more than one that used this map) were
+ * disconnected from events. Wait for outstanding critical sections in
+ * these programs to complete
+ */
+ synchronize_rcu();
+
+ bpf_map_area_free(qs);
+}
+
+static int __queue_map_get(struct bpf_map *map, void *value, bool delete)
+{
+ struct bpf_queue_stack *qs = bpf_queue_stack(map);
+ unsigned long flags;
+ int err = 0;
+ void *ptr;
+
+ raw_spin_lock_irqsave(&qs->lock, flags);
+
+ if (queue_stack_map_is_empty(qs)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ ptr = &qs->elements[qs->tail * qs->map.value_size];
+ memcpy(value, ptr, qs->map.value_size);
+
+ if (delete) {
+ if (unlikely(++qs->tail >= qs->size))
+ qs->tail = 0;
+ }
+
+out:
+ raw_spin_unlock_irqrestore(&qs->lock, flags);
+ return err;
+}
+
+
+static int __stack_map_get(struct bpf_map *map, void *value, bool delete)
+{
+ struct bpf_queue_stack *qs = bpf_queue_stack(map);
+ unsigned long flags;
+ int err = 0;
+ void *ptr;
+ u32 index;
+
+ raw_spin_lock_irqsave(&qs->lock, flags);
+
+ if (queue_stack_map_is_empty(qs)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ index = qs->head - 1;
+ if (unlikely(index >= qs->size))
+ index = qs->size - 1;
+
+ ptr = &qs->elements[index * qs->map.value_size];
+ memcpy(value, ptr, qs->map.value_size);
+
+ if (delete)
+ qs->head = index;
+
+out:
+ raw_spin_unlock_irqrestore(&qs->lock, flags);
+ return err;
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_map_peek_elem(struct bpf_map *map, void *value)
+{
+ return __queue_map_get(map, value, false);
+}
+
+/* Called from syscall or from eBPF program */
+static int stack_map_peek_elem(struct bpf_map *map, void *value)
+{
+ return __stack_map_get(map, value, false);
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_map_pop_elem(struct bpf_map *map, void *value)
+{
+ return __queue_map_get(map, value, true);
+}
+
+/* Called from syscall or from eBPF program */
+static int stack_map_pop_elem(struct bpf_map *map, void *value)
+{
+ return __stack_map_get(map, value, true);
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_stack_map_push_elem(struct bpf_map *map, void *value,
+ u64 flags)
+{
+ struct bpf_queue_stack *qs = bpf_queue_stack(map);
+ unsigned long irq_flags;
+ int err = 0;
+ void *dst;
+
+ /* BPF_EXIST is used to force making room for a new element in case the
+ * map is full
+ */
+ bool replace = (flags & BPF_EXIST);
+
+ /* Check supported flags for queue and stack maps */
+ if (flags & BPF_NOEXIST || flags > BPF_EXIST)
+ return -EINVAL;
+
+ raw_spin_lock_irqsave(&qs->lock, irq_flags);
+
+ if (queue_stack_map_is_full(qs)) {
+ if (!replace) {
+ err = -E2BIG;
+ goto out;
+ }
+ /* advance tail pointer to overwrite oldest element */
+ if (unlikely(++qs->tail >= qs->size))
+ qs->tail = 0;
+ }
+
+ dst = &qs->elements[qs->head * qs->map.value_size];
+ memcpy(dst, value, qs->map.value_size);
+
+ if (unlikely(++qs->head >= qs->size))
+ qs->head = 0;
+
+out:
+ raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
+ return err;
+}
+
+/* Called from syscall or from eBPF program */
+static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
+{
+ return NULL;
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_stack_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 flags)
+{
+ return -EINVAL;
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_stack_map_delete_elem(struct bpf_map *map, void *key)
+{
+ return -EINVAL;
+}
+
+/* Called from syscall */
+static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
+ void *next_key)
+{
+ return -EINVAL;
+}
+
+const struct bpf_map_ops queue_map_ops = {
+ .map_alloc_check = queue_stack_map_alloc_check,
+ .map_alloc = queue_stack_map_alloc,
+ .map_free = queue_stack_map_free,
+ .map_lookup_elem = queue_stack_map_lookup_elem,
+ .map_update_elem = queue_stack_map_update_elem,
+ .map_delete_elem = queue_stack_map_delete_elem,
+ .map_push_elem = queue_stack_map_push_elem,
+ .map_pop_elem = queue_map_pop_elem,
+ .map_peek_elem = queue_map_peek_elem,
+ .map_get_next_key = queue_stack_map_get_next_key,
+};
+
+const struct bpf_map_ops stack_map_ops = {
+ .map_alloc_check = queue_stack_map_alloc_check,
+ .map_alloc = queue_stack_map_alloc,
+ .map_free = queue_stack_map_free,
+ .map_lookup_elem = queue_stack_map_lookup_elem,
+ .map_update_elem = queue_stack_map_update_elem,
+ .map_delete_elem = queue_stack_map_delete_elem,
+ .map_push_elem = queue_stack_map_push_elem,
+ .map_pop_elem = stack_map_pop_elem,
+ .map_peek_elem = stack_map_peek_elem,
+ .map_get_next_key = queue_stack_map_get_next_key,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6907d661dea5..07fedc537e8e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -727,6 +727,9 @@ static int map_lookup_elem(union bpf_attr *attr)
err = bpf_fd_htab_map_lookup_elem(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
+ } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+ map->map_type == BPF_MAP_TYPE_STACK) {
+ err = map->ops->map_peek_elem(map, value);
} else {
rcu_read_lock();
ptr = map->ops->map_lookup_elem(map, key);
@@ -846,6 +849,9 @@ static int map_update_elem(union bpf_attr *attr)
/* rcu_read_lock() is not needed */
err = bpf_fd_reuseport_array_update_elem(map, key, value,
attr->flags);
+ } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+ map->map_type == BPF_MAP_TYPE_STACK) {
+ err = map->ops->map_push_elem(map, value, attr->flags);
} else {
rcu_read_lock();
err = map->ops->map_update_elem(map, key, value, attr->flags);
@@ -1028,7 +1034,10 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
*/
preempt_disable();
__this_cpu_inc(bpf_prog_active);
- if (map->ops->map_lookup_and_delete_elem) {
+ if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+ map->map_type == BPF_MAP_TYPE_STACK) {
+ err = map->ops->map_pop_elem(map, value);
+ } else if (map->ops->map_lookup_and_delete_elem) {
rcu_read_lock();
ptr = map->ops->map_lookup_and_delete_elem(map, key);
if (ptr)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d84c91ac3b70..7d6d9cf9ebd5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2324,6 +2324,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (func_id != BPF_FUNC_sk_select_reuseport)
goto error;
break;
+ case BPF_MAP_TYPE_QUEUE:
+ case BPF_MAP_TYPE_STACK:
+ if (func_id != BPF_FUNC_map_peek_elem &&
+ func_id != BPF_FUNC_map_pop_elem &&
+ func_id != BPF_FUNC_map_push_elem)
+ goto error;
+ break;
default:
break;
}
@@ -2380,6 +2387,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
goto error;
break;
+ case BPF_FUNC_map_peek_elem:
+ case BPF_FUNC_map_pop_elem:
+ case BPF_FUNC_map_push_elem:
+ if (map->map_type != BPF_MAP_TYPE_QUEUE &&
+ map->map_type != BPF_MAP_TYPE_STACK)
+ goto error;
+ break;
default:
break;
}
@@ -2675,7 +2689,10 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
if (func_id != BPF_FUNC_tail_call &&
func_id != BPF_FUNC_map_lookup_elem &&
func_id != BPF_FUNC_map_update_elem &&
- func_id != BPF_FUNC_map_delete_elem)
+ func_id != BPF_FUNC_map_delete_elem &&
+ func_id != BPF_FUNC_map_push_elem &&
+ func_id != BPF_FUNC_map_pop_elem &&
+ func_id != BPF_FUNC_map_peek_elem)
return 0;
if (meta->map_ptr == NULL) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 4bbc6567fcb8..6fb4f56ce500 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4995,6 +4995,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
+ case BPF_FUNC_map_push_elem:
+ return &bpf_map_push_elem_proto;
+ case BPF_FUNC_map_pop_elem:
+ return &bpf_map_pop_elem_proto;
+ case BPF_FUNC_map_peek_elem:
+ return &bpf_map_peek_elem_proto;
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_get_smp_processor_id:
^ permalink raw reply related
* [PATCH bpf-next v2 4/7] bpf/verifier: add ARG_PTR_TO_UNINIT_MAP_VALUE
From: Mauricio Vasquez B @ 2018-10-10 14:06 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev; +Cc: Song Liu
In-Reply-To: <153918035266.8915.12458090320382567494.stgit@kernel>
ARG_PTR_TO_UNINIT_MAP_VALUE argument is a pointer to a memory zone
used to save the value of a map. Basically the same as
ARG_PTR_TO_UNINIT_MEM, but the size has not be passed as an extra
argument.
This will be used in the following patch that implements some new
helpers that receive a pointer to be filled with a map value.
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
include/linux/bpf.h | 1 +
kernel/bpf/verifier.c | 9 ++++++---
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5793f0c7fbb5..e37b4986bb45 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -139,6 +139,7 @@ enum bpf_arg_type {
ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */
ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */
+ ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */
/* the following constraints used to prototype bpf_memcmp() and other
* functions that access data on eBPF program stack
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3f93a548a642..d84c91ac3b70 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2117,7 +2117,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
}
if (arg_type == ARG_PTR_TO_MAP_KEY ||
- arg_type == ARG_PTR_TO_MAP_VALUE) {
+ arg_type == ARG_PTR_TO_MAP_VALUE ||
+ arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
expected_type = PTR_TO_STACK;
if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
type != expected_type)
@@ -2187,7 +2188,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
err = check_helper_mem_access(env, regno,
meta->map_ptr->key_size, false,
NULL);
- } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
+ } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
+ arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
/* bpf_map_xxx(..., map_ptr, ..., value) call:
* check [value, value + map->value_size) validity
*/
@@ -2196,9 +2198,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
verbose(env, "invalid map_ptr to access map->value\n");
return -EACCES;
}
+ meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
err = check_helper_mem_access(env, regno,
meta->map_ptr->value_size, false,
- NULL);
+ meta);
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
^ permalink raw reply related
* [PATCH bpf-next v2 3/7] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall
From: Mauricio Vasquez B @ 2018-10-10 14:06 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev; +Cc: Song Liu
In-Reply-To: <153918035266.8915.12458090320382567494.stgit@kernel>
The following patch implements a bpf queue/stack maps that
provides the peek/pop/push functions. There is not a direct
relationship between those functions and the current maps
syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added,
this is mapped to the pop operation in the queue/stack maps
and it is still to implement in other kind of maps.
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 1 +
kernel/bpf/syscall.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 84 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9b558713447f..5793f0c7fbb5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,7 @@ struct bpf_map_ops {
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
+ void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f9187b41dff6..3bb94aa2d408 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+ BPF_MAP_LOOKUP_AND_DELETE_ELEM,
};
enum bpf_map_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f36c080ad356..6907d661dea5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -980,6 +980,85 @@ static int map_get_next_key(union bpf_attr *attr)
return err;
}
+#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
+
+static int map_lookup_and_delete_elem(union bpf_attr *attr)
+{
+ void __user *ukey = u64_to_user_ptr(attr->key);
+ void __user *uvalue = u64_to_user_ptr(attr->value);
+ int ufd = attr->map_fd;
+ struct bpf_map *map;
+ void *key, *value, *ptr;
+ u32 value_size;
+ struct fd f;
+ int err;
+
+ if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+ err = -EPERM;
+ goto err_put;
+ }
+
+ key = __bpf_copy_key(ukey, map->key_size);
+ if (IS_ERR(key)) {
+ err = PTR_ERR(key);
+ goto err_put;
+ }
+
+ value_size = map->value_size;
+
+ err = -ENOMEM;
+ value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+ if (!value)
+ goto free_key;
+
+ err = -EFAULT;
+ if (copy_from_user(value, uvalue, value_size) != 0)
+ goto free_value;
+
+ /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+ * inside bpf map update or delete otherwise deadlocks are possible
+ */
+ preempt_disable();
+ __this_cpu_inc(bpf_prog_active);
+ if (map->ops->map_lookup_and_delete_elem) {
+ rcu_read_lock();
+ ptr = map->ops->map_lookup_and_delete_elem(map, key);
+ if (ptr)
+ memcpy(value, ptr, value_size);
+ rcu_read_unlock();
+ err = ptr ? 0 : -ENOENT;
+ } else {
+ err = -ENOTSUPP;
+ }
+
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
+
+ if (err)
+ goto free_value;
+
+ if (copy_to_user(uvalue, value, value_size) != 0)
+ goto free_value;
+
+ err = 0;
+
+free_value:
+ kfree(value);
+free_key:
+ kfree(key);
+err_put:
+ fdput(f);
+ return err;
+}
+
static const struct bpf_prog_ops * const bpf_prog_types[] = {
#define BPF_PROG_TYPE(_id, _name) \
[_id] = & _name ## _prog_ops,
@@ -2453,6 +2532,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_TASK_FD_QUERY:
err = bpf_task_fd_query(&attr, uattr);
break;
+ case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
+ err = map_lookup_and_delete_elem(&attr);
+ break;
default:
err = -EINVAL;
break;
^ permalink raw reply related
* [PATCH bpf-next v2 2/7] bpf/syscall: allow key to be null in map functions
From: Mauricio Vasquez B @ 2018-10-10 14:06 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev; +Cc: Song Liu
In-Reply-To: <153918035266.8915.12458090320382567494.stgit@kernel>
This commit adds the required logic to allow key being NULL
in case the key_size of the map is 0.
A new __bpf_copy_key function helper only copies the key from
userpsace when key_size != 0, otherwise it enforces that key must be
null.
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
kernel/bpf/syscall.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4f416234251f..f36c080ad356 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -651,6 +651,17 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
return -ENOTSUPP;
}
+static void *__bpf_copy_key(void __user *ukey, u64 key_size)
+{
+ if (key_size)
+ return memdup_user(ukey, key_size);
+
+ if (ukey)
+ return ERR_PTR(-EINVAL);
+
+ return NULL;
+}
+
/* last field in 'union bpf_attr' used by this command */
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
@@ -678,7 +689,7 @@ static int map_lookup_elem(union bpf_attr *attr)
goto err_put;
}
- key = memdup_user(ukey, map->key_size);
+ key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -774,7 +785,7 @@ static int map_update_elem(union bpf_attr *attr)
goto err_put;
}
- key = memdup_user(ukey, map->key_size);
+ key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -876,7 +887,7 @@ static int map_delete_elem(union bpf_attr *attr)
goto err_put;
}
- key = memdup_user(ukey, map->key_size);
+ key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -928,7 +939,7 @@ static int map_get_next_key(union bpf_attr *attr)
}
if (ukey) {
- key = memdup_user(ukey, map->key_size);
+ key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
^ permalink raw reply related
* [PATCH bpf-next v2 1/7] bpf: rename stack trace map operations
From: Mauricio Vasquez B @ 2018-10-10 14:05 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev; +Cc: Song Liu
In-Reply-To: <153918035266.8915.12458090320382567494.stgit@kernel>
In the following patches queue and stack maps (FIFO and LIFO
datastructures) will be implemented. In order to avoid confusion and
a possible name clash rename stack_map_ops to stack_trace_map_ops
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
include/linux/bpf_types.h | 2 +-
kernel/bpf/stackmap.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5432f4c9f50e..658509daacd4 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -51,7 +51,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops)
#ifdef CONFIG_PERF_EVENTS
-BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b2ade10f7ec3..90daf285de03 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -600,7 +600,7 @@ static void stack_map_free(struct bpf_map *map)
put_callchain_buffers();
}
-const struct bpf_map_ops stack_map_ops = {
+const struct bpf_map_ops stack_trace_map_ops = {
.map_alloc = stack_map_alloc,
.map_free = stack_map_free,
.map_get_next_key = stack_map_get_next_key,
^ permalink raw reply related
* [PATCH bpf-next v2 0/7] Implement queue/stack maps
From: Mauricio Vasquez B @ 2018-10-10 14:05 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev; +Cc: Song Liu
In some applications this is needed have a pool of free elements, for
example the list of free L4 ports in a SNAT. None of the current maps allow
to do it as it is not possible to get any element without having they key
it is associated to, even if it were possible, the lack of locking mecanishms in
eBPF would do it almost impossible to be implemented without data races.
This patchset implements two new kind of eBPF maps: queue and stack.
Those maps provide to eBPF programs the peek, push and pop operations, and for
userspace applications a new bpf_map_lookup_and_delete_elem() is added.
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
v1 -> v2:
- Put ARG_PTR_TO_UNINIT_MAP_VALUE logic into a separated patch
- Fix missing __this_cpu_dec & preempt_enable calls in kernel/bpf/syscall.c
RFC v4 -> v1:
- Remove roundup to power of 2 in memory allocation
- Remove count and use a free slot to check if queue/stack is empty
- Use if + assigment for wrapping indexes
- Fix some minor style issues
- Squash two patches together
RFC v3 -> RFC v4:
- Revert renaming of kernel/bpf/stackmap.c
- Remove restriction on value size
- Remove len arguments from peek/pop helpers
- Add new ARG_PTR_TO_UNINIT_MAP_VALUE
RFC v2 -> RFC v3:
- Return elements by value instead that by reference
- Implement queue/stack base on array and head + tail indexes
- Rename stack trace related files to avoid confusion and conflicts
RFC v1 -> RFC v2:
- Create two separate maps instead of single one + flags
- Implement bpf_map_lookup_and_delete syscall
- Support peek operation
- Define replacement policy through flags in the update() method
- Add eBPF side tests
---
Mauricio Vasquez B (7):
bpf: rename stack trace map operations
bpf/syscall: allow key to be null in map functions
bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall
bpf/verifier: add ARG_PTR_TO_UNINIT_MAP_VALUE
bpf: add queue and stack maps
Sync uapi/bpf.h to tools/include
selftests/bpf: add test cases for queue and stack maps
include/linux/bpf.h | 8 +
include/linux/bpf_types.h | 4
include/uapi/linux/bpf.h | 30 ++
kernel/bpf/Makefile | 2
kernel/bpf/core.c | 3
kernel/bpf/helpers.c | 43 +++
kernel/bpf/queue_stack_maps.c | 288 ++++++++++++++++++++
kernel/bpf/stackmap.c | 2
kernel/bpf/syscall.c | 110 +++++++-
kernel/bpf/verifier.c | 28 ++
net/core/filter.c | 6
tools/include/uapi/linux/bpf.h | 30 ++
tools/lib/bpf/bpf.c | 12 +
tools/lib/bpf/bpf.h | 1
tools/testing/selftests/bpf/Makefile | 5
tools/testing/selftests/bpf/bpf_helpers.h | 7
tools/testing/selftests/bpf/test_maps.c | 122 ++++++++
tools/testing/selftests/bpf/test_progs.c | 99 +++++++
tools/testing/selftests/bpf/test_queue_map.c | 4
tools/testing/selftests/bpf/test_queue_stack_map.h | 59 ++++
tools/testing/selftests/bpf/test_stack_map.c | 4
21 files changed, 853 insertions(+), 14 deletions(-)
create mode 100644 kernel/bpf/queue_stack_maps.c
create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
create mode 100644 tools/testing/selftests/bpf/test_stack_map.c
^ permalink raw reply
* Re: BUG: corrupted list in p9_read_work
From: Dmitry Vyukov @ 2018-10-10 14:03 UTC (permalink / raw)
To: Dominique Martinet, Leon Romanovsky
Cc: syzbot, David Miller, Eric Van Hensbergen, LKML, Latchesar Ionkov,
netdev, Ron Minnich, syzkaller-bugs, v9fs-developer
In-Reply-To: <20181009020949.GA29622@nautica>
On Tue, Oct 9, 2018 at 4:09 AM, Dominique Martinet
<asmadeus@codewreck.org> wrote:
> syzbot wrote on Mon, Oct 08, 2018:
>> syzbot has found a reproducer for the following crash on:
>>
>> HEAD commit: 0854ba5ff5c9 Merge git://git.kernel.org/pub/scm/linux/kern..
>> git tree: upstream
>> console output: https://syzkaller.appspot.com/x/log.txt?x=1514ec06400000
>> kernel config: https://syzkaller.appspot.com/x/.config?x=88e9a8a39dc0be2d
>> dashboard link: https://syzkaller.appspot.com/bug?extid=2222c34dc40b515f30dc
>> compiler: gcc (GCC) 8.0.1 20180413 (experimental)
>> syz repro: https://syzkaller.appspot.com/x/repro.syz?x=10b91685400000
>>
>> IMPORTANT: if you fix the bug, please add the following tag to the commit:
>> Reported-by: syzbot+2222c34dc40b515f30dc@syzkaller.appspotmail.com
>>
>> list_del corruption, ffff88019ae36ee8->next is LIST_POISON1
>> (dead000000000100)
>> ------------[ cut here ]------------
>> [...]
>> list_del include/linux/list.h:125 [inline]
>> p9_read_work+0xab6/0x10e0 net/9p/trans_fd.c:379
>
> Hmm this looks very much like the report from
> syzbot+735d926e9d1317c3310c@syzkaller.appspotmail.com
> which should have been fixed by Tomas in 9f476d7c540cb
> ("net/9p/trans_fd.c: fix race by holding the lock")...
>
> It looks like another double list_del, looking at the code again there
> actually are other ways this could happen around connection errors.
> For example,
> - p9_read_work receives something and lookup works... meanwhile
> - p9_write_work fails to write and calls p9_conn_cancel, which deletes
> from the req_list without waiting for other works to finish (could also
> happen in p9_poll_mux)
> - p9_read_work finishes processing the read and deletes from list again
>
> For this one the simplest fix would probably be to just not
> list_del/call p9_client_cb at all if m->r?req->status isn't
> REQ_STATUS_ERROR in p9_read_work after the "got new packet" debug print,
> and frankly I think that's saner so I'll send a patch shortly doing
> that, but I have zero confidence there aren't similar bugs around, the
> tcp code is so messy... Most of the syzbot reports recently have been
> around trans_fd which I don't think is used much in real life, and this
> is not really motivating (i.e. I think it would probably need a more
> extensive rewrite but nobody cares) :/
>
>
> Dmitry, on that note, do you think syzbot could possibly test other
> transports somehow? rdma or virtio cannot be faked as easily as passing
> a fd around, but I'd be very interested in seeing these flayed a bit.
Hi Dominique,
How can they be faked?
If we could create a private rdma/virtio stub instance per test
process, then we could I think easily use that instance for 9p. But is
it possible?
Testing on real hardware is mostly outside of our priorities at the
moment. I mean syzkaller itself can be run on anything, and one could
extend descriptions to use a known rdma interface and run on a real
hardware. But we can't afford this at the moment.
As far as I understand RDMA maintainers run syzkaller on real
hardware, but I don't know if they are up to including 9p into
testing. +Leon
^ permalink raw reply
* [PATCH net] net: make skb_partial_csum_set() more robust against overflows
From: Eric Dumazet @ 2018-10-10 13:59 UTC (permalink / raw)
To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet, Herbert Xu
syzbot managed to crash in skb_checksum_help() [1] :
BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
Root cause is the following check in skb_partial_csum_set()
if (unlikely(start > skb_headlen(skb)) ||
unlikely((int)start + off > skb_headlen(skb) - 2))
return false;
If skb_headlen(skb) is 1, then (skb_headlen(skb) - 2) becomes 0xffffffff
and the check fails to detect that ((int)start + off) is off the limit,
since the compare is unsigned.
When we fix that, then the first condition (start > skb_headlen(skb))
becomes obsolete.
Then we should also check that (skb_headroom(skb) + start) wont
overflow 16bit field.
[1]
kernel BUG at net/core/dev.c:2880!
invalid opcode: 0000 [#1] PREEMPT SMP KASAN
CPU: 1 PID: 7330 Comm: syz-executor4 Not tainted 4.19.0-rc6+ #253
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:skb_checksum_help+0x9e3/0xbb0 net/core/dev.c:2880
Code: 85 00 ff ff ff 48 c1 e8 03 42 80 3c 28 00 0f 84 09 fb ff ff 48 8b bd 00 ff ff ff e8 97 a8 b9 fb e9 f8 fa ff ff e8 2d 09 76 fb <0f> 0b 48 8b bd 28 ff ff ff e8 1f a8 b9 fb e9 b1 f6 ff ff 48 89 cf
RSP: 0018:ffff8801d83a6f60 EFLAGS: 00010293
RAX: ffff8801b9834380 RBX: ffff8801b9f8d8c0 RCX: ffffffff8608c6d7
RDX: 0000000000000000 RSI: ffffffff8608cc63 RDI: 0000000000000006
RBP: ffff8801d83a7068 R08: ffff8801b9834380 R09: 0000000000000000
R10: ffff8801d83a76d8 R11: 0000000000000000 R12: 0000000000000001
R13: 0000000000010001 R14: 000000000000ffff R15: 00000000000000a8
FS: 00007f1a66db5700(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f7d77f091b0 CR3: 00000001ba252000 CR4: 00000000001406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
skb_csum_hwoffload_help+0x8f/0xe0 net/core/dev.c:3269
validate_xmit_skb+0xa2a/0xf30 net/core/dev.c:3312
__dev_queue_xmit+0xc2f/0x3950 net/core/dev.c:3797
dev_queue_xmit+0x17/0x20 net/core/dev.c:3838
packet_snd net/packet/af_packet.c:2928 [inline]
packet_sendmsg+0x422d/0x64c0 net/packet/af_packet.c:2953
Fixes: 5ff8dda3035d ("net: Ensure partial checksum offset is inside the skb head")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Reported-by: syzbot <syzkaller@googlegroups.com>
---
net/core/skbuff.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b2c807f67aba5847fa0c9f07adabbff7cf1afd22..428094b577fc96f5f93ab3f93d27997935de35e3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4452,14 +4452,16 @@ EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
*/
bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
{
- if (unlikely(start > skb_headlen(skb)) ||
- unlikely((int)start + off > skb_headlen(skb) - 2)) {
- net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n",
- start, off, skb_headlen(skb));
+ u32 csum_end = (u32)start + (u32)off + sizeof(__sum16);
+ u32 csum_start = skb_headroom(skb) + (u32)start;
+
+ if (unlikely(csum_start > U16_MAX || csum_end > skb_headlen(skb))) {
+ net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n",
+ start, off, skb_headroom(skb), skb_headlen(skb));
return false;
}
skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_headroom(skb) + start;
+ skb->csum_start = csum_start;
skb->csum_offset = off;
skb_set_transport_header(skb, start);
return true;
--
2.19.0.605.g01d371f741-goog
^ permalink raw reply related
* Re: [PATCH v9 00/15] octeontx2-af: Add RVU Admin Function driver
From: Arnd Bergmann @ 2018-10-10 13:41 UTC (permalink / raw)
To: sunil.kovvuri; +Cc: netdev, davem, linux-soc, Sunil Goutham
In-Reply-To: <1539175475-5351-1-git-send-email-sunil.kovvuri@gmail.com>
On 10/10/18, sunil.kovvuri@gmail.com <sunil.kovvuri@gmail.com> wrote:
> From: Sunil Goutham <sgoutham@marvell.com>
> This is the first set of patches out of 80+ patches.
>
> Changes from v8:
> 1 Removed unnecessary typecasts in entire series
> - Suggested by David Miller
> 2 Added COMPILE_TEST to AF driver
> - Suggested by Arnd Bergmann
> 3 Changed udelay() to usleep_range() in rvu_poll_reg
> - Suggested by Arnd Bergmann
> 4 MSIX vector base IOMMU mapping is done using dma_map_resource()
> API instead of dma_map_single() as it accepts physical address.
> - Issue pointed by Arnd Bergmann
With those changes, I have no more review comments (other than
a single suggestion to improve the usleep_range() loop again).
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Arnd
^ permalink raw reply
* Re: [PATCH net-next 0/3] nfp: flower: speed up stats update loop
From: Or Gerlitz @ 2018-10-10 13:40 UTC (permalink / raw)
To: Jakub Kicinski; +Cc: David Miller, Linux Netdev List, oss-drivers
In-Reply-To: <20181009015736.30268-1-jakub.kicinski@netronome.com>
On Tue, Oct 9, 2018 at 4:58 AM Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
> Given that our statistic IDs are already array indices, and considering
> each statistic is only 24B in size, we decided to forego the use
8B packet + 8B bytes --> 16B -- does your FW/HW provide last use? how do
you express this lastuse value in host jiffies?
^ permalink raw reply
* Re: [PATCH v9 02/15] octeontx2-af: Reset all RVU blocks
From: Arnd Bergmann @ 2018-10-10 13:39 UTC (permalink / raw)
To: sunil.kovvuri; +Cc: netdev, davem, linux-soc, Sunil Goutham
In-Reply-To: <1539175475-5351-3-git-send-email-sunil.kovvuri@gmail.com>
> + void __iomem *reg;
> + int timeout = 100;
> + u64 reg_val;
> +
> + reg = rvu->afreg_base + ((block << 28) | offset);
> + while (timeout) {
> + reg_val = readq(reg);
> + if (zero && !(reg_val & mask))
> + return 0;
> + if (!zero && (reg_val & mask))
> + return 0;
> + usleep_range(1, 2);
> + timeout--;
> + }
One more clarification here: if you loop around a usleep_range(),
I would suggest using 'while (time_before(jiffies, end))'
or 'ktime_before(ktime_get(), end)' as the loop condition, otherwise
the maximum timeout can be fairly large depending on the
actual configuration and presence of timers. Then you can
also make the range much wider, e.g. 'usleep_range(1, 10)'
Aside from that, this looks much better than the delay loop
you had before.
Arnd
^ permalink raw reply
* Re: [RFC 0/2] net: sched: indirect/remote setup tc block cb registering
From: Or Gerlitz @ 2018-10-10 13:38 UTC (permalink / raw)
To: Jakub Kicinski
Cc: John Hurley, Linux Netdev List, Jiri Pirko, oss-drivers,
Oz Shlomo, Aviv Heller, Simon Horman
In-Reply-To: <20181004101941.509d04b9@cakuba.netronome.com>
On Thu, Oct 4, 2018 at 8:19 PM Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
> On Thu, 4 Oct 2018 17:20:43 +0100, John Hurley wrote:
> > > > In this case the hw driver will receive the rules from the tunnel device directly.
> > > > The driver can then offload them as it sees fit.
> > >
> > > if both instances of the hw drivers (uplink0, uplink1) register to get
> > > the rules installed on the block of the tunnel device we have exactly
> > > what we want, isn't that?
> > >
> >
> > The design here is that each hw driver should only need to register
> > for callbacks on a 'higher level' device's block once.
> > When a callback is triggered the driver receives one instance of the
> > rule and can make its own decision about what to do.
> > This is slightly different from registering ingress devs where each
> > uplink registers for its own block.
> > It is probably more akin to the egdev setup in that if a rule on a
> > block egresses to an uplink, the driver receives 1 callback on the
> > rule, irrespective of how may underlying netdevs are on the block.
>
> Right, though nothing stops the driver from registering multiple
> callbacks for the same device, if its somehow useful.
I must be missing something.. put uplink bonding a side. If the user
is setting tc ingress rule
on a tunnel device (vxlan0/gre0) over a system with multiple unrelated
NICs/uplinks that support
TC decap offload, wouldn't each of these netdevs want to install the
rule into HW? why do we want
the HW driver to duplicate the rule between the potential candidates
among the netdev instances they created?
and not each of them to get the callback and decide??
we want each netdev instance of these NIC
^ permalink raw reply
* RE: [PATCH] qed: fix memory leak of pent on error exit paths
From: Bolotin, Denis @ 2018-10-10 13:37 UTC (permalink / raw)
To: Colin King, Elior, Ariel, Dept-Eng Everest Linux L2,
David S . Miller
Cc: kernel-janitors@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <20181008141703.21159-1-colin.king@canonical.com>
> +err:
> + qed_spq_return_entry(p_hwfn, *pp_ent);
> + *pp_ent = NULL;
> +
> + return rc;
Hi Colin,
This leak is a known issue and can be found in several locations in the code. We are working on fixing it globally and it is currently being tested.
Thank you for your fix but we would rather prepare a fix that would also cover the other leaks in the code.
To comment on your fix, qed_spq_return_entry() may not be the API needed to prevent the leak. If you look at qed_spq_get_entry(), you’ll see that an entry can be taken from the free_pool but also can be kzalloc’ed.
The proper solution would be the solution below, but like I said, we are working on a complete patch that will be submitted soon.
+ if (p_ent->queue == &p_hwfn->p_spq->unlimited_pending
+ kfree(p_ent);
+ else
+ qed_spq_return_entry(p_hwfn, *pp_ent);
Thanks,
Denis
^ permalink raw reply
* Re: [PATCH v9 10/15] octeontx2-af: Reconfig MSIX base with IOVA
From: Arnd Bergmann @ 2018-10-10 13:31 UTC (permalink / raw)
To: sunil.kovvuri; +Cc: netdev, davem, linux-soc, Geetha sowjanya, Sunil Goutham
In-Reply-To: <1539175475-5351-11-git-send-email-sunil.kovvuri@gmail.com>
On 10/10/18, sunil.kovvuri@gmail.com <sunil.kovvuri@gmail.com> wrote:
> From: Geetha sowjanya <gakula@marvell.com>
>
> HW interprets RVU_AF_MSIXTR_BASE address as an IOVA, hence
> create a IOMMU mapping for the physcial address configured by
> firmware and reconfig RVU_AF_MSIXTR_BASE with IOVA.
>
> Signed-off-by: Geetha sowjanya <gakula@marvell.com>
> Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Looks good to me now,
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
^ permalink raw reply
* sparc64 mystery with Cheetah+ D-cache parity error (n_tty_set_termios, bpf_check, cheetah_copy_page_insn)
From: Meelis Roos @ 2018-10-10 13:24 UTC (permalink / raw)
To: sparclinux, netdev; +Cc: Greg Kroah-Hartman, Jiri Slaby
I have seen multiple strange messages like this, on multiple sparc64 machines:
[ 55.523882] CPU[1]: Cheetah+ D-cache parity error at TPC[0000000000707e8c]
[ 55.626033] TPC<n_tty_set_termios+0x2c/0x3c0>
This specfic one ise from n_tty_set_termios and it is currently repeatable on a Sun V210.
I have seen these on V245 and V445 too, with different addresses. On V445, the same address caused
errors on multiple CPUs so it does not seem like a hardware problem, rather something software releated,
that's why I am reporting it here.
On V445 it is gone with my current custom kernels but was there with 4.16.0-1-sparc64-smp Debian kernel package,
probabaly because I do not have bpfilter compiled in:
Sep 27 22:51:52 v445 kernel: [ 51.635141] CPU[3]: Cheetah+ D-cache parity error at TPC[000000000057bc70]
Sep 27 22:51:52 v445 kernel: [ 51.733940] TPC<bpf_check+0x30/0x1780>
Sep 27 23:13:45 v445 kernel: [ 59.192005] CPU[1]: Cheetah+ D-cache parity error at TPC[000000000057bc70]
Sep 27 23:13:45 v445 kernel: [ 59.290762] TPC<bpf_check+0x30/0x1780>
Sep 27 23:22:14 v445 kernel: [ 336.892797] CPU[3]: Cheetah+ D-cache parity error at TPC[000000000057bc70]
Sep 27 23:22:14 v445 kernel: [ 336.991586] TPC<bpf_check+0x30/0x1780>
Sep 27 23:22:15 v445 kernel: [ 337.546927] CPU[0]: Cheetah+ D-cache parity error at TPC[000000000057bc70]
Sep 27 23:22:15 v445 kernel: [ 337.645689] TPC<bpf_check+0x30/0x1780>
Sep 27 23:44:40 v445 kernel: [ 1682.966655] CPU[1]: Cheetah+ D-cache parity error at TPC[000000000057bc70]
Sep 27 23:44:40 v445 kernel: [ 1683.065442] TPC<bpf_check+0x30/0x1780>
Sep 27 23:47:05 v445 kernel: [ 1828.099386] CPU[1]: Cheetah+ D-cache parity error at TPC[000000000057bc70]
Sep 27 23:47:05 v445 kernel: [ 1828.198145] TPC<bpf_check+0x30/0x1780>
Sep 27 23:47:37 v445 kernel: [ 1859.857710] CPU[0]: Cheetah+ D-cache parity error at TPC[000000000057bc70]
Sep 27 23:47:37 v445 kernel: [ 1859.956441] TPC<bpf_check+0x30/0x1780>
Sep 27 23:47:47 v445 kernel: [ 1869.732962] CPU[0]: Cheetah+ D-cache parity error at TPC[000000000057bc70]
Sep 27 23:47:47 v445 kernel: [ 1869.831721] TPC<bpf_check+0x30/0x1780>
On V245, it is also gone with current kernel. Here are logs from 4.18.0-rc7-00090-gc1d61e7 to 4.19.0-rc7-00009-g0854ba5:
Aug 13 09:18:13 v245 kernel: [ 50.639758] CPU[0]: Cheetah+ D-cache parity error at TPC[00000000006cf28c]
Aug 13 09:18:13 v245 kernel: [ 50.743561] TPC<n_tty_set_termios+0x2c/0x3c0>
Aug 13 09:18:13 v245 kernel: [ 62.151683] CPU[0]: Cheetah+ D-cache parity error at TPC[00000000006cf28c]
Aug 13 09:18:13 v245 kernel: [ 62.252996] TPC<n_tty_set_termios+0x2c/0x3c0>
Aug 26 11:35:37 v245 kernel: [ 55.341794] CPU[0]: Cheetah+ D-cache parity error at TPC[00000000006cdc8c]
Aug 26 11:35:37 v245 kernel: [ 55.437571] TPC<n_tty_set_termios+0x2c/0x3c0>
Aug 26 13:47:03 v245 kernel: [ 7941.436962] CPU[1]: Cheetah+ D-cache parity error at TPC[00000000006cdc8c]
Aug 26 13:47:03 v245 kernel: [ 7941.531823] TPC<n_tty_set_termios+0x2c/0x3c0>
Aug 26 13:49:07 v245 kernel: [ 65.179371] CPU[1]: Cheetah+ D-cache parity error at TPC[00000000006d340c]
Aug 26 13:49:07 v245 kernel: [ 65.285928] TPC<n_tty_set_termios+0x2c/0x3c0>
Sep 17 15:45:37 v245 kernel: [ 67.099122] CPU[1]: Cheetah+ D-cache parity error at TPC[00000000006d340c]
Sep 17 15:45:37 v245 kernel: [ 67.205581] TPC<n_tty_set_termios+0x2c/0x3c0>
Sep 17 20:49:06 v245 kernel: [ 47.923877] CPU[1]: Cheetah+ D-cache parity error at TPC[00000000006d352c]
Sep 17 20:49:06 v245 kernel: [ 48.021575] TPC<n_tty_set_termios+0x2c/0x3c0>
Sep 23 23:03:07 v245 kernel: [ 56.136992] CPU[0]: Cheetah+ D-cache parity error at TPC[00000000006d364c]
Sep 23 23:03:07 v245 kernel: [ 56.243463] TPC<n_tty_set_termios+0x2c/0x3c0>
Oct 8 19:02:41 v245 kernel: [ 55.197360] CPU[0]: Cheetah+ D-cache parity error at TPC[0000000000860e10]
Oct 8 19:02:41 v245 kernel: [ 55.303284] TPC<cheetah_copy_page_insn+0xa4/0x274>
The last one is from 4.19.0-rc7-00009-g0854ba5 and it has happened only on one boot of this kernel, out of 4,
so it is somewhat nondeterministic. This is the same kernel that made me think it does not happen with current
kernels - well, it did with the same image once.
On V210, it has happened since 4.19.0-rc4-00022-gad3273d and is still happening with 4.19.0-rc7-00009-g0854ba5.
Grep from logs:
Sep 23 22:11:23 v210 kernel: [ 61.080357] CPU[1]: Cheetah+ D-cache parity error at TPC[0000000000707b2c]
Sep 23 22:11:23 v210 kernel: [ 61.184336] TPC<n_tty_set_termios+0x2c/0x3c0>
Sep 23 22:11:24 v210 kernel: [ 65.234758] CPU[1]: Cheetah+ D-cache parity error at TPC[0000000000707b2c]
Sep 23 22:11:24 v210 kernel: [ 65.341131] TPC<n_tty_set_termios+0x2c/0x3c0>
Oct 8 19:21:08 v210 kernel: [ 55.303418] CPU[1]: Cheetah+ D-cache parity error at TPC[0000000000707e8c]
Oct 8 19:21:08 v210 kernel: [ 55.406668] TPC<n_tty_set_termios+0x2c/0x3c0>
TPC is different, address relative to n_tty_set_termios is the same.
Disassembly of n_tty_set_termios on V210:
0000000000707e60 <n_tty_set_termios>:
707e60: 9d e3 bf 50 save %sp, -176, %sp
707e64: 02 c6 40 55 brz,pn %i1, 707fb8 <n_tty_set_termios+0x158>
707e68: fa 5e 22 58 ldx [ %i0 + 0x258 ], %i5
707e6c: c2 06 21 1c ld [ %i0 + 0x11c ], %g1
707e70: 05 00 00 40 sethi %hi(0x10000), %g2
707e74: c6 06 60 0c ld [ %i1 + 0xc ], %g3
707e78: 84 10 a0 02 or %g2, 2, %g2
707e7c: 86 18 40 03 xor %g1, %g3, %g3
707e80: 80 88 c0 02 btst %g3, %g2
707e84: 12 48 00 4e bne %icc, 707fbc <n_tty_set_termios+0x15c>
707e88: b8 10 20 83 mov 0x83, %i4
707e8c: c6 0f 60 5d ldub [ %i5 + 0x5d ], %g3
707e90: 83 28 60 02 sll %g1, 2, %g1
707e94: 84 08 60 08 and %g1, 8, %g2
707e98: 82 08 ff f7 and %g3, -9, %g1
707e9c: 82 10 40 02 or %g1, %g2, %g1
707ea0: c2 2f 60 5d stb %g1, [ %i5 + 0x5d ]
707ea4: 84 10 20 1f mov 0x1f, %g2
707ea8: c6 5e 21 10 ldx [ %i0 + 0x110 ], %g3
707eac: 85 28 b0 25 sllx %g2, 0x25, %g2
707eb0: 84 08 c0 02 and %g3, %g2, %g2
707eb4: 0a c0 80 07 brnz,pn %g2, 707ed0 <n_tty_set_termios+0x70>
707eb8: b8 07 60 30 add %i5, 0x30, %i4
707ebc: c4 06 21 1c ld [ %i0 + 0x11c ], %g2
707ec0: 80 88 a0 02 btst 2, %g2
707ec4: 22 68 00 58 be,a %xcc, 708024 <n_tty_set_termios+0x1c4>
707ec8: c4 06 21 10 ld [ %i0 + 0x110 ], %g2
707ecc: b8 07 60 30 add %i5, 0x30, %i4
707ed0: 92 10 20 00 clr %o1
707ed4: 90 10 00 1c mov %i4, %o0
707ed8: 40 07 dc 08 call 8feef8 <memset>
707edc: 94 10 20 20 mov 0x20, %o2
707ee0: c4 5e 21 10 ldx [ %i0 + 0x110 ], %g2
707ee4: 82 10 20 03 mov 3, %g1
707ee8: 83 28 70 27 sllx %g1, 0x27, %g1
707eec: 82 08 80 01 and %g2, %g1, %g1
707ef0: 0a c0 40 b8 brnz,pn %g1, 7081d0 <n_tty_set_termios+0x370>
707ef4: 90 10 20 0d mov 0xd, %o0
707ef8: c2 06 21 10 ld [ %i0 + 0x110 ], %g1
707efc: 80 88 60 40 btst 0x40, %g1
707f00: 12 60 00 b0 bne,pn %xcc, 7081c0 <n_tty_set_termios+0x360>
707f04: 90 10 20 0a mov 0xa, %o0
707f08: c2 06 21 1c ld [ %i0 + 0x11c ], %g1
707f0c: 80 88 60 02 btst 2, %g1
707f10: 32 60 00 87 bne,a,pn %xcc, 70812c <n_tty_set_termios+0x2cc>
707f14: d0 0e 21 23 ldub [ %i0 + 0x123 ], %o0
707f18: c2 06 21 10 ld [ %i0 + 0x110 ], %g1
707f1c: 80 88 64 00 btst 0x400, %g1
707f20: 32 60 00 7c bne,a,pn %xcc, 708110 <n_tty_set_termios+0x2b0>
707f24: d0 0e 21 29 ldub [ %i0 + 0x129 ], %o0
707f28: c2 06 21 1c ld [ %i0 + 0x11c ], %g1
707f2c: 80 88 60 01 btst 1, %g1
707f30: 32 60 00 6e bne,a,pn %xcc, 7080e8 <n_tty_set_termios+0x288>
707f34: d0 0e 21 21 ldub [ %i0 + 0x121 ], %o0
707f38: 90 10 20 00 clr %o0
707f3c: 40 07 db b9 call 8fee20 <clear_bit>
707f40: 92 10 00 1c mov %i4, %o1
707f44: c2 0f 60 5d ldub [ %i5 + 0x5d ], %g1
707f48: 82 08 7f cf and %g1, -49, %g1
707f4c: c2 2f 60 5d stb %g1, [ %i5 + 0x5d ]
707f50: c2 06 21 10 ld [ %i0 + 0x110 ], %g1
707f54: 84 10 20 00 clr %g2
707f58: 85 7e 74 01 movrne %i1, 1, %g2
707f5c: 83 30 60 0a srl %g1, 0xa, %g1
707f60: 82 18 60 01 xor %g1, 1, %g1
707f64: 80 88 40 02 btst %g1, %g2
707f68: 02 40 00 0b be,pn %icc, 707f94 <n_tty_set_termios+0x134>
707f6c: 90 06 22 08 add %i0, 0x208, %o0
707f70: c2 06 40 00 ld [ %i1 ], %g1
707f74: 80 88 64 00 btst 0x400, %g1
707f78: 02 60 00 07 be,pn %xcc, 707f94 <n_tty_set_termios+0x134>
707f7c: 03 10 00 00 sethi %hi(0x40000000), %g1
707f80: c4 5e 21 d0 ldx [ %i0 + 0x1d0 ], %g2
707f84: 82 08 80 01 and %g2, %g1, %g1
707f88: 22 c0 40 96 brz,a,pn %g1, 7081e0 <n_tty_set_termios+0x380>
707f8c: 90 10 00 18 mov %i0, %o0
707f90: 90 06 22 08 add %i0, 0x208, %o0
707f94: 92 10 20 01 mov 1, %o1
707f98: 94 10 20 01 mov 1, %o2
707f9c: 7f f6 5c a9 call 49f240 <__wake_up>
707fa0: 96 10 20 00 clr %o3
707fa4: b0 06 22 20 add %i0, 0x220, %i0
707fa8: b2 10 20 01 mov 1, %i1
707fac: b4 10 20 01 mov 1, %i2
707fb0: 7f f6 5c a4 call 49f240 <__wake_up>
707fb4: 97 e8 20 00 restore %g0, 0, %o3
707fb8: b8 10 20 83 mov 0x83, %i4
707fbc: 92 10 20 00 clr %o1
707fc0: b9 2f 30 05 sllx %i4, 5, %i4
707fc4: 94 10 22 00 mov 0x200, %o2
707fc8: b8 07 40 1c add %i5, %i4, %i4
707fcc: 40 07 db cb call 8feef8 <memset>
707fd0: 90 10 00 1c mov %i4, %o0
707fd4: 82 10 21 13 mov 0x113, %g1
707fd8: 83 28 70 05 sllx %g1, 5, %g1
707fdc: c4 5f 40 01 ldx [ %i5 + %g1 ], %g2
707fe0: 82 10 24 4d mov 0x44d, %g1
707fe4: 83 28 70 03 sllx %g1, 3, %g1
707fe8: c4 77 40 01 stx %g2, [ %i5 + %g1 ]
707fec: c2 06 21 1c ld [ %i0 + 0x11c ], %g1
707ff0: 80 88 60 02 btst 2, %g1
707ff4: 12 68 00 31 bne %xcc, 7080b8 <n_tty_set_termios+0x258>
707ff8: d0 5f 40 00 ldx [ %i5 ], %o0
707ffc: c2 0f 60 5d ldub [ %i5 + 0x5d ], %g1
708000: c4 77 60 10 stx %g2, [ %i5 + 0x10 ]
708004: 82 08 7f fb and %g1, -5, %g1
708008: c2 2f 60 5d stb %g1, [ %i5 + 0x5d ]
70800c: c2 0f 60 5d ldub [ %i5 + 0x5d ], %g1
708010: d0 77 60 08 stx %o0, [ %i5 + 8 ]
708014: 82 08 60 3f and %g1, 0x3f, %g1
708018: c2 2f 60 5d stb %g1, [ %i5 + 0x5d ]
70801c: 10 6f ff 9c b %xcc, 707e8c <n_tty_set_termios+0x2c>
708020: c2 06 21 1c ld [ %i0 + 0x11c ], %g1
708024: 80 88 a4 00 btst 0x400, %g2
708028: 12 6f ff aa bne %xcc, 707ed0 <n_tty_set_termios+0x70>
70802c: b8 07 60 30 add %i5, 0x30, %i4
708030: c6 5e 21 18 ldx [ %i0 + 0x118 ], %g3
708034: 86 08 e0 09 and %g3, 9, %g3
708038: 0a f8 ff a7 brnz %g3, 707ed4 <n_tty_set_termios+0x74>
70803c: 92 10 20 00 clr %o1
708040: 80 88 a0 08 btst 8, %g2
708044: 12 6f ff a5 bne %xcc, 707ed8 <n_tty_set_termios+0x78>
708048: 90 10 00 1c mov %i4, %o0
70804c: 82 10 60 20 or %g1, 0x20, %g1
708050: c2 2f 60 5d stb %g1, [ %i5 + 0x5d ]
708054: c2 06 21 10 ld [ %i0 + 0x110 ], %g1
708058: 80 88 60 01 btst 1, %g1
70805c: 12 68 00 07 bne %xcc, 708078 <n_tty_set_termios+0x218>
708060: c6 5e 21 10 ldx [ %i0 + 0x110 ], %g3
708064: 84 10 20 05 mov 5, %g2
708068: 85 28 b0 21 sllx %g2, 0x21, %g2
70806c: 84 08 c0 02 and %g3, %g2, %g2
708070: 2a c8 80 69 brnz,a %g2, 708214 <n_tty_set_termios+0x3b4>
708074: c2 0f 60 5d ldub [ %i5 + 0x5d ], %g1
708078: 82 10 20 05 mov 5, %g1
70807c: 84 10 20 01 mov 1, %g2
708080: 83 28 70 22 sllx %g1, 0x22, %g1
708084: 85 28 b0 24 sllx %g2, 0x24, %g2
708088: 82 08 c0 01 and %g3, %g1, %g1
70808c: 80 a0 40 02 cmp %g1, %g2
708090: 22 60 00 61 be,a,pn %xcc, 708214 <n_tty_set_termios+0x3b4>
708094: c2 0f 60 5d ldub [ %i5 + 0x5d ], %g1
708098: c2 5e 20 10 ldx [ %i0 + 0x10 ], %g1
70809c: c2 58 60 68 ldx [ %g1 + 0x68 ], %g1
7080a0: 82 08 60 04 and %g1, 4, %g1
7080a4: 02 c8 40 5c brz %g1, 708214 <n_tty_set_termios+0x3b4>
7080a8: c2 0f 60 5d ldub [ %i5 + 0x5d ], %g1
7080ac: 82 10 60 10 or %g1, 0x10, %g1
7080b0: 10 6f ff a8 b %xcc, 707f50 <n_tty_set_termios+0xf0>
7080b4: c2 2f 60 5d stb %g1, [ %i5 + 0x5d ]
7080b8: 80 a0 80 08 cmp %g2, %o0
7080bc: 02 67 ff d0 be,pn %xcc, 707ffc <n_tty_set_termios+0x19c>
7080c0: 92 10 00 1c mov %i4, %o1
7080c4: 90 02 3f ff add %o0, -1, %o0
7080c8: 40 07 db 3b call 8fedb4 <set_bit>
7080cc: 90 0a 2f ff and %o0, 0xfff, %o0
7080d0: c2 0f 60 5d ldub [ %i5 + 0x5d ], %g1
7080d4: d0 5f 40 00 ldx [ %i5 ], %o0
7080d8: 82 10 60 04 or %g1, 4, %g1
7080dc: c2 2f 60 5d stb %g1, [ %i5 + 0x5d ]
7080e0: 10 6f ff cb b %xcc, 70800c <n_tty_set_termios+0x1ac>
7080e4: d0 77 60 10 stx %o0, [ %i5 + 0x10 ]
7080e8: 40 07 db 33 call 8fedb4 <set_bit>
7080ec: 92 10 00 1c mov %i4, %o1
7080f0: d0 0e 21 22 ldub [ %i0 + 0x122 ], %o0
7080f4: 40 07 db 30 call 8fedb4 <set_bit>
7080f8: 92 10 00 1c mov %i4, %o1
7080fc: d0 0e 21 2b ldub [ %i0 + 0x12b ], %o0
708100: 40 07 db 2d call 8fedb4 <set_bit>
708104: 92 10 00 1c mov %i4, %o1
708108: 10 6f ff 8d b %xcc, 707f3c <n_tty_set_termios+0xdc>
70810c: 90 10 20 00 clr %o0
708110: 40 07 db 29 call 8fedb4 <set_bit>
708114: 92 10 00 1c mov %i4, %o1
708118: d0 0e 21 2a ldub [ %i0 + 0x12a ], %o0
70811c: 40 07 db 26 call 8fedb4 <set_bit>
708120: 92 10 00 1c mov %i4, %o1
708124: 10 6f ff 82 b %xcc, 707f2c <n_tty_set_termios+0xcc>
708128: c2 06 21 1c ld [ %i0 + 0x11c ], %g1
70812c: 40 07 db 22 call 8fedb4 <set_bit>
708130: 92 10 00 1c mov %i4, %o1
708134: d0 0e 21 24 ldub [ %i0 + 0x124 ], %o0
708138: 40 07 db 1f call 8fedb4 <set_bit>
70813c: 92 10 00 1c mov %i4, %o1
708140: d0 0e 21 25 ldub [ %i0 + 0x125 ], %o0
708144: 40 07 db 1c call 8fedb4 <set_bit>
708148: 92 10 00 1c mov %i4, %o1
70814c: 90 10 20 0a mov 0xa, %o0
708150: 40 07 db 19 call 8fedb4 <set_bit>
708154: 92 10 00 1c mov %i4, %o1
708158: d0 0e 21 26 ldub [ %i0 + 0x126 ], %o0
70815c: 40 07 db 16 call 8fedb4 <set_bit>
708160: 92 10 00 1c mov %i4, %o1
708164: c4 06 21 1c ld [ %i0 + 0x11c ], %g2
708168: 03 00 00 20 sethi %hi(0x8000), %g1
70816c: 80 88 80 01 btst %g2, %g1
708170: 22 4f ff 6b be,a %icc, 707f1c <n_tty_set_termios+0xbc>
708174: c2 06 21 10 ld [ %i0 + 0x110 ], %g1
708178: d0 0e 21 2f ldub [ %i0 + 0x12f ], %o0
70817c: 40 07 db 0e call 8fedb4 <set_bit>
708180: 92 10 00 1c mov %i4, %o1
708184: d0 0e 21 30 ldub [ %i0 + 0x130 ], %o0
708188: 40 07 db 0b call 8fedb4 <set_bit>
70818c: 92 10 00 1c mov %i4, %o1
708190: d0 0e 21 27 ldub [ %i0 + 0x127 ], %o0
708194: 40 07 db 08 call 8fedb4 <set_bit>
708198: 92 10 00 1c mov %i4, %o1
70819c: c2 06 21 1c ld [ %i0 + 0x11c ], %g1
7081a0: 80 88 60 08 btst 8, %g1
7081a4: 22 6f ff 5e be,a %xcc, 707f1c <n_tty_set_termios+0xbc>
7081a8: c2 06 21 10 ld [ %i0 + 0x110 ], %g1
7081ac: d0 0e 21 2d ldub [ %i0 + 0x12d ], %o0
7081b0: 40 07 db 01 call 8fedb4 <set_bit>
7081b4: 92 10 00 1c mov %i4, %o1
7081b8: 10 6f ff 59 b %xcc, 707f1c <n_tty_set_termios+0xbc>
7081bc: c2 06 21 10 ld [ %i0 + 0x110 ], %g1
7081c0: 40 07 da fd call 8fedb4 <set_bit>
7081c4: 92 10 00 1c mov %i4, %o1
7081c8: 10 6f ff 51 b %xcc, 707f0c <n_tty_set_termios+0xac>
7081cc: c2 06 21 1c ld [ %i0 + 0x11c ], %g1
7081d0: 40 07 da f9 call 8fedb4 <set_bit>
7081d4: 92 10 00 1c mov %i4, %o1
7081d8: 10 6f ff 49 b %xcc, 707efc <n_tty_set_termios+0x9c>
7081dc: c2 06 21 10 ld [ %i0 + 0x110 ], %g1
7081e0: 7f ff f7 20 call 705e60 <start_tty>
7081e4: b2 10 20 01 mov 1, %i1
7081e8: b4 10 20 01 mov 1, %i2
7081ec: b6 10 20 00 clr %i3
7081f0: 7f ff fe fc call 707de0 <process_echoes>
7081f4: 90 10 00 18 mov %i0, %o0
7081f8: 90 06 22 08 add %i0, 0x208, %o0
7081fc: 92 10 20 01 mov 1, %o1
708200: 94 10 20 01 mov 1, %o2
708204: 7f f6 5c 0f call 49f240 <__wake_up>
708208: 96 10 20 00 clr %o3
70820c: 7f f6 5c 0d call 49f240 <__wake_up>
708210: 91 ee 22 20 restore %i0, 0x220, %o0
708214: 82 08 7f ef and %g1, -17, %g1
708218: 10 6f ff 4e b %xcc, 707f50 <n_tty_set_termios+0xf0>
70821c: c2 2f 60 5d stb %g1, [ %i5 + 0x5d ]
Disassembly of cheetah_copy_page_insn from V245:
000000000860d6c <cheetah_copy_page_insn>:
860d6c: 10 68 00 52 b %xcc, 860eb4 <cheetah_copy_page_insn+0x148>
860d70: 01 00 00 00 nop
860d74: 9b 41 80 00 rd %fprs, %o5
860d78: 80 8b 60 06 btst 6, %o5
860d7c: 02 48 00 05 be %icc, 860d90 <cheetah_copy_page_insn+0x24>
860d80: 0f 00 21 83 sethi %hi(0x860c00), %g7
860d84: 03 00 21 7e sethi %hi(0x85f800), %g1
860d88: 81 c0 63 80 jmp %g1 + 0x380 ! 85fb80 <VISenter>
860d8c: 8e 11 e1 90 or %g7, 0x190, %g7
860d90: 8d 80 20 04 wr %g0, 4, %fprs
860d94: 81 43 e0 0e membar #StoreStore|#LoadStore|#StoreLoad
860d98: 15 00 00 00 sethi %hi(0), %o2
860d9c: 82 10 00 08 mov %o0, %g1
860da0: c3 6a 60 00 prefetch [ %o1 ], #one_read
860da4: 94 12 a0 7e or %o2, 0x7e, %o2
860da8: c3 6a 60 40 prefetch [ %o1 + 0x40 ], #one_read
860dac: c3 6a 60 80 prefetch [ %o1 + 0x80 ], #one_read
860db0: c3 6a 60 c0 prefetch [ %o1 + 0xc0 ], #one_read
860db4: c1 1a 60 00 ldd [ %o1 ], %f0
860db8: c3 6a 61 00 prefetch [ %o1 + 0x100 ], #one_read
860dbc: c5 1a 60 08 ldd [ %o1 + 8 ], %f2
860dc0: c3 6a 61 40 prefetch [ %o1 + 0x140 ], #one_read
860dc4: c9 1a 60 10 ldd [ %o1 + 0x10 ], %f4
860dc8: c3 6a 61 80 prefetch [ %o1 + 0x180 ], #one_read
860dcc: a1 b0 0f 00 fsrc2d %f0, %f16
860dd0: cd 1a 60 18 ldd [ %o1 + 0x18 ], %f6
860dd4: a5 b0 0f 02 fsrc2d %f2, %f18
860dd8: d1 1a 60 20 ldd [ %o1 + 0x20 ], %f8
860ddc: a9 b0 0f 04 fsrc2d %f4, %f20
860de0: d5 1a 60 28 ldd [ %o1 + 0x28 ], %f10
860de4: ad b0 0f 06 fsrc2d %f6, %f22
860de8: d9 1a 60 30 ldd [ %o1 + 0x30 ], %f12
860dec: b1 b0 0f 08 fsrc2d %f8, %f24
860df0: dd 1a 60 38 ldd [ %o1 + 0x38 ], %f14
860df4: b5 b0 0f 0a fsrc2d %f10, %f26
860df8: c1 1a 60 40 ldd [ %o1 + 0x40 ], %f0
860dfc: c5 1a 60 48 ldd [ %o1 + 0x48 ], %f2
860e00: b9 b0 0f 0c fsrc2d %f12, %f28
860e04: c9 1a 60 50 ldd [ %o1 + 0x50 ], %f4
860e08: bd b0 0f 0e fsrc2d %f14, %f30
860e0c: e1 ba 1e 00 stda %f16, [ %o0 ] #ASI_BLK_P
860e10: cd 1a 60 58 ldd [ %o1 + 0x58 ], %f6
860e14: a1 b0 0f 00 fsrc2d %f0, %f16
860e18: d1 1a 60 60 ldd [ %o1 + 0x60 ], %f8
860e1c: a5 b0 0f 02 fsrc2d %f2, %f18
860e20: d5 1a 60 68 ldd [ %o1 + 0x68 ], %f10
860e24: a9 b0 0f 04 fsrc2d %f4, %f20
860e28: d9 1a 60 70 ldd [ %o1 + 0x70 ], %f12
860e2c: ad b0 0f 06 fsrc2d %f6, %f22
860e30: dd 1a 60 78 ldd [ %o1 + 0x78 ], %f14
860e34: b1 b0 0f 08 fsrc2d %f8, %f24
860e38: c1 1a 60 80 ldd [ %o1 + 0x80 ], %f0
860e3c: c3 6a 61 80 prefetch [ %o1 + 0x180 ], #one_read
860e40: b5 b0 0f 0a fsrc2d %f10, %f26
860e44: 94 a2 a0 01 deccc %o2
860e48: 90 02 20 40 add %o0, 0x40, %o0
860e4c: 12 6f ff ec bne %xcc, 860dfc <cheetah_copy_page_insn+0x90>
860e50: 92 02 60 40 add %o1, 0x40, %o1
860e54: c5 1a 60 48 ldd [ %o1 + 0x48 ], %f2
860e58: b9 b0 0f 0c fsrc2d %f12, %f28
860e5c: c9 1a 60 50 ldd [ %o1 + 0x50 ], %f4
860e60: bd b0 0f 0e fsrc2d %f14, %f30
860e64: e1 ba 1e 00 stda %f16, [ %o0 ] #ASI_BLK_P
860e68: cd 1a 60 58 ldd [ %o1 + 0x58 ], %f6
860e6c: a1 b0 0f 00 fsrc2d %f0, %f16
860e70: d1 1a 60 60 ldd [ %o1 + 0x60 ], %f8
860e74: a5 b0 0f 02 fsrc2d %f2, %f18
860e78: d5 1a 60 68 ldd [ %o1 + 0x68 ], %f10
860e7c: a9 b0 0f 04 fsrc2d %f4, %f20
860e80: d9 1a 60 70 ldd [ %o1 + 0x70 ], %f12
860e84: ad b0 0f 06 fsrc2d %f6, %f22
860e88: 90 02 20 40 add %o0, 0x40, %o0
860e8c: dd 1a 60 78 ldd [ %o1 + 0x78 ], %f14
860e90: b1 b0 0f 08 fsrc2d %f8, %f24
860e94: b5 b0 0f 0a fsrc2d %f10, %f26
860e98: b9 b0 0f 0c fsrc2d %f12, %f28
860e9c: bd b0 0f 0e fsrc2d %f14, %f30
860ea0: e1 ba 1e 00 stda %f16, [ %o0 ] #ASI_BLK_P
860ea4: 81 43 e0 40 membar #Sync
860ea8: 8d 80 20 00 wr %g0, 0, %fprs
860eac: 10 68 00 46 b %xcc, 860fc4 <cheetah_copy_page_insn+0x258>
860eb0: 01 00 00 00 nop
860eb4: 9b 41 80 00 rd %fprs, %o5
860eb8: 80 8b 60 06 btst 6, %o5
860ebc: 02 48 00 05 be %icc, 860ed0 <cheetah_copy_page_insn+0x164>
860ec0: 0f 00 21 83 sethi %hi(0x860c00), %g7
860ec4: 03 00 21 7e sethi %hi(0x85f800), %g1
860ec8: 81 c0 63 80 jmp %g1 + 0x380 ! 85fb80 <VISenter>
860ecc: 8e 11 e2 d0 or %g7, 0x2d0, %g7
860ed0: 8d 80 20 04 wr %g0, 4, %fprs
860ed4: c6 09 a0 08 ldub [ %g6 + 8 ], %g3
860ed8: 82 10 00 08 mov %o0, %g1
860edc: 80 a0 e0 00 cmp %g3, 0
860ee0: 87 40 c0 00 rd %asi, %g3
860ee4: 22 48 00 03 be,a %icc, 860ef0 <cheetah_copy_page_insn+0x184>
860ee8: 87 80 20 f0 wr %g0, 0xf0, %asi
860eec: 87 80 20 e0 wr %g0, 0xe0, %asi
860ef0: c1 9a 5e 00 ldda [ %o1 ] #ASI_BLK_P, %f0
860ef4: 92 02 60 40 add %o1, 0x40, %o1
860ef8: e1 9a 5e 00 ldda [ %o1 ] #ASI_BLK_P, %f16
860efc: 92 02 60 40 add %o1, 0x40, %o1
860f00: 15 00 00 08 sethi %hi(0x2000), %o2
860f04: a3 b0 0f 00 fsrc2d %f0, %f48
860f08: a7 b0 0f 02 fsrc2d %f2, %f50
860f0c: ab b0 0f 04 fsrc2d %f4, %f52
860f10: af b0 0f 06 fsrc2d %f6, %f54
860f14: b3 b0 0f 08 fsrc2d %f8, %f56
860f18: b7 b0 0f 0a fsrc2d %f10, %f58
860f1c: bb b0 0f 0c fsrc2d %f12, %f60
860f20: bf b0 0f 0e fsrc2d %f14, %f62
860f24: c3 9a 5e 00 ldda [ %o1 ] #ASI_BLK_P, %f32
860f28: e3 ba 20 00 stda %f48, [ %o0 ] %asi
860f2c: 92 02 60 40 add %o1, 0x40, %o1
860f30: 94 22 a0 40 sub %o2, 0x40, %o2
860f34: 90 02 20 40 add %o0, 0x40, %o0
860f38: a3 b0 0f 10 fsrc2d %f16, %f48
860f3c: a7 b0 0f 12 fsrc2d %f18, %f50
860f40: ab b0 0f 14 fsrc2d %f20, %f52
860f44: af b0 0f 16 fsrc2d %f22, %f54
860f48: b3 b0 0f 18 fsrc2d %f24, %f56
860f4c: b7 b0 0f 1a fsrc2d %f26, %f58
860f50: bb b0 0f 1c fsrc2d %f28, %f60
860f54: bf b0 0f 1e fsrc2d %f30, %f62
860f58: c1 9a 5e 00 ldda [ %o1 ] #ASI_BLK_P, %f0
860f5c: e3 ba 20 00 stda %f48, [ %o0 ] %asi
860f60: 92 02 60 40 add %o1, 0x40, %o1
860f64: 94 22 a0 40 sub %o2, 0x40, %o2
860f68: 90 02 20 40 add %o0, 0x40, %o0
860f6c: a3 b0 0f 01 fsrc2d %f32, %f48
860f70: a7 b0 0f 03 fsrc2d %f34, %f50
860f74: ab b0 0f 05 fsrc2d %f36, %f52
860f78: af b0 0f 07 fsrc2d %f38, %f54
860f7c: b3 b0 0f 09 fsrc2d %f40, %f56
860f80: b7 b0 0f 0b fsrc2d %f42, %f58
860f84: bb b0 0f 0d fsrc2d %f44, %f60
860f88: bf b0 0f 0f fsrc2d %f46, %f62
860f8c: e1 9a 5e 00 ldda [ %o1 ] #ASI_BLK_P, %f16
860f90: e3 ba 20 00 stda %f48, [ %o0 ] %asi
860f94: 94 22 a0 40 sub %o2, 0x40, %o2
860f98: 92 02 60 40 add %o1, 0x40, %o1
860f9c: 80 a2 a0 80 cmp %o2, 0x80
860fa0: 12 6f ff d9 bne %xcc, 860f04 <cheetah_copy_page_insn+0x198>
860fa4: 90 02 20 40 add %o0, 0x40, %o0
860fa8: 81 43 e0 40 membar #Sync
860fac: c1 ba 20 00 stda %f0, [ %o0 ] %asi
860fb0: 90 02 20 40 add %o0, 0x40, %o0
860fb4: e1 ba 20 00 stda %f16, [ %o0 ] %asi
860fb8: 81 43 e0 40 membar #Sync
860fbc: 87 80 e0 00 wr %g3, 0, %asi
860fc0: 8d 80 20 00 wr %g0, 0, %fprs
860fc4: c0 f0 4b e0 stxa %g0, [ %g1 ] #ASI_DMMU_DEMAP
860fc8: 81 43 e0 40 membar #Sync
860fcc: 05 00 00 10 sethi %hi(0x4000), %g2
860fd0: c0 f0 4b e2 stxa %g0, [ %g1 + %g2 ] #ASI_DMMU_DEMAP
860fd4: 81 43 e0 40 membar #Sync
860fd8: 81 c3 e0 08 retl
860fdc: d8 21 a0 30 st %o4, [ %g6 + 0x30 ]
--
Meelis Roos <mroos@linux.ee>
^ permalink raw reply
* Re: [PATCH net-next v7 28/28] net: WireGuard secure network tunnel
From: Jason A. Donenfeld @ 2018-10-10 20:27 UTC (permalink / raw)
To: Jiří Pírko; +Cc: LKML, Netdev, David Miller, Greg Kroah-Hartman
In-Reply-To: <20181010091358.GB5027@nanopsycho.orion>
Hey Jiri,
Actually, in the end I went with the suggestion from Andrew and Lukas,
which is to follow Dan's guideline:
https://lkml.org/lkml/2016/8/22/374 . It looks like this:
https://git.kernel.org/pub/scm/linux/kernel/git/zx2c4/linux.git/tree/drivers/net/wireguard/device.c?h=jd/wireguard#n280
Jason
^ permalink raw reply
* Re: [PATCH 06/31] netfilter: nf_tables: add xfrm expression
From: Florian Westphal @ 2018-10-10 12:53 UTC (permalink / raw)
To: Eyal Birger; +Cc: fw, Pablo Neira Ayuso, netfilter-devel, netdev
In-Reply-To: <20181010143947.44a8f385@jimi>
Eyal Birger <eyal.birger@gmail.com> wrote:
> > + state = sp->xvec[priv->spnum];
> > + nft_xfrm_state_get_key(priv, regs, state, nft_pf(pkt));
>
> I'm not familiar enough with nftables to be sure, but doesn't the use
> of nft_pf(pkt) in this context limit the matching of encapsulated
> packets to the same family?
Good point. I'll test this fix:
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -118,12 +118,13 @@ static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode)
static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,
struct nft_regs *regs,
- const struct xfrm_state *state,
- u8 family)
+ const struct xfrm_state *state)
{
u32 *dest = ®s->data[priv->dreg];
- if (!xfrm_state_addr_ok(priv->key, family, state->props.mode)) {
+ if (!xfrm_state_addr_ok(priv->key,
+ state->props.family,
+ state->props.mode)) {
regs->verdict.code = NFT_BREAK;
return;
}
@@ -169,7 +170,7 @@ static void nft_xfrm_get_eval_in(const struct nft_xfrm *priv,
}
state = sp->xvec[priv->spnum];
- nft_xfrm_state_get_key(priv, regs, state, nft_pf(pkt));
+ nft_xfrm_state_get_key(priv, regs, state);
}
static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv,
@@ -184,7 +185,7 @@ static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv,
if (i < priv->spnum)
continue;
- nft_xfrm_state_get_key(priv, regs, dst->xfrm, nft_pf(pkt));
+ nft_xfrm_state_get_key(priv, regs, dst->xfrm);
return;
}
^ permalink raw reply
* [PATCH v9 15/15] MAINTAINERS: Add entry for Marvell OcteonTX2 Admin Function driver
From: sunil.kovvuri @ 2018-10-10 12:44 UTC (permalink / raw)
To: netdev, davem; +Cc: arnd, linux-soc, Sunil Goutham
In-Reply-To: <1539175475-5351-1-git-send-email-sunil.kovvuri@gmail.com>
From: Sunil Goutham <sgoutham@marvell.com>
Added maintainers entry for Marvell OcteonTX2 SOC's RVU
admin function driver.
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
MAINTAINERS | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/MAINTAINERS b/MAINTAINERS
index 54e719d..fe223e6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8847,6 +8847,15 @@ S: Supported
F: drivers/mmc/host/sdhci-xenon*
F: Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt
+MARVELL OCTEONTX2 RVU ADMIN FUNCTION DRIVER
+M: Sunil Goutham <sgoutham@marvell.com>
+M: Linu Cherian <lcherian@marvell.com>
+M: Geetha sowjanya <gakula@marvell.com>
+M: Jerin Jacob <jerinj@marvell.com>
+L: netdev@vger.kernel.org
+S: Supported
+F: drivers/net/ethernet/marvell/octeontx2/af/
+
MATROX FRAMEBUFFER DRIVER
L: linux-fbdev@vger.kernel.org
S: Orphan
--
2.7.4
^ permalink raw reply related
* [PATCH v9 14/15] octeontx2-af: Register for CGX lmac events
From: sunil.kovvuri @ 2018-10-10 12:44 UTC (permalink / raw)
To: netdev, davem; +Cc: arnd, linux-soc, Linu Cherian, Sunil Goutham
In-Reply-To: <1539175475-5351-1-git-send-email-sunil.kovvuri@gmail.com>
From: Linu Cherian <lcherian@marvell.com>
Added support in RVU AF driver to register for
CGX LMAC link status change events from firmware
and managing them. Processing part will be added
in followup patches.
- Introduced eventqueue for posting events from cgx lmac.
Queueing mechanism will ensure that events can be posted
and firmware can be acked immediately and hence event
reception and processing are decoupled.
- Events gets added to the queue by notification callback.
Notification callback is expected to be atomic, since it
is called from interrupt context.
- Events are dequeued and processed in a worker thread.
Signed-off-by: Linu Cherian <lcherian@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 6 +-
drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 5 +
.../net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 101 ++++++++++++++++++++-
3 files changed, 108 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index e9021a8..2033f42 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -1564,10 +1564,11 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
err = rvu_register_interrupts(rvu);
if (err)
- goto err_mbox;
+ goto err_cgx;
return 0;
-
+err_cgx:
+ rvu_cgx_wq_destroy(rvu);
err_mbox:
rvu_mbox_destroy(rvu);
err_hwsetup:
@@ -1589,6 +1590,7 @@ static void rvu_remove(struct pci_dev *pdev)
struct rvu *rvu = pci_get_drvdata(pdev);
rvu_unregister_interrupts(rvu);
+ rvu_cgx_wq_destroy(rvu);
rvu_mbox_destroy(rvu);
rvu_reset_all_blocks(rvu);
rvu_free_hw_resources(rvu);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 385f597..d169fa9 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -110,6 +110,10 @@ struct rvu {
* every cgx lmac port
*/
void **cgx_idmap; /* cgx id to cgx data map table */
+ struct work_struct cgx_evh_work;
+ struct workqueue_struct *cgx_evh_wq;
+ spinlock_t cgx_evq_lock; /* cgx event queue lock */
+ struct list_head cgx_evq_head; /* cgx event queue head */
};
static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val)
@@ -150,4 +154,5 @@ int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero);
/* CGX APIs */
int rvu_cgx_probe(struct rvu *rvu);
+void rvu_cgx_wq_destroy(struct rvu *rvu);
#endif /* RVU_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index bf81507..5ecc223 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -15,6 +15,11 @@
#include "rvu.h"
#include "cgx.h"
+struct cgx_evq_entry {
+ struct list_head evq_node;
+ struct cgx_link_event link_event;
+};
+
static inline u8 cgxlmac_id_to_bmap(u8 cgx_id, u8 lmac_id)
{
return ((cgx_id & 0xF) << 4) | (lmac_id & 0xF);
@@ -72,9 +77,95 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
return 0;
}
+/* This is called from interrupt context and is expected to be atomic */
+static int cgx_lmac_postevent(struct cgx_link_event *event, void *data)
+{
+ struct cgx_evq_entry *qentry;
+ struct rvu *rvu = data;
+
+ /* post event to the event queue */
+ qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC);
+ if (!qentry)
+ return -ENOMEM;
+ qentry->link_event = *event;
+ spin_lock(&rvu->cgx_evq_lock);
+ list_add_tail(&qentry->evq_node, &rvu->cgx_evq_head);
+ spin_unlock(&rvu->cgx_evq_lock);
+
+ /* start worker to process the events */
+ queue_work(rvu->cgx_evh_wq, &rvu->cgx_evh_work);
+
+ return 0;
+}
+
+static void cgx_evhandler_task(struct work_struct *work)
+{
+ struct rvu *rvu = container_of(work, struct rvu, cgx_evh_work);
+ struct cgx_evq_entry *qentry;
+ struct cgx_link_event *event;
+ unsigned long flags;
+
+ do {
+ /* Dequeue an event */
+ spin_lock_irqsave(&rvu->cgx_evq_lock, flags);
+ qentry = list_first_entry_or_null(&rvu->cgx_evq_head,
+ struct cgx_evq_entry,
+ evq_node);
+ if (qentry)
+ list_del(&qentry->evq_node);
+ spin_unlock_irqrestore(&rvu->cgx_evq_lock, flags);
+ if (!qentry)
+ break; /* nothing more to process */
+
+ event = &qentry->link_event;
+
+ /* Do nothing for now */
+ kfree(qentry);
+ } while (1);
+}
+
+static void cgx_lmac_event_handler_init(struct rvu *rvu)
+{
+ struct cgx_event_cb cb;
+ int cgx, lmac, err;
+ void *cgxd;
+
+ spin_lock_init(&rvu->cgx_evq_lock);
+ INIT_LIST_HEAD(&rvu->cgx_evq_head);
+ INIT_WORK(&rvu->cgx_evh_work, cgx_evhandler_task);
+ rvu->cgx_evh_wq = alloc_workqueue("rvu_evh_wq", 0, 0);
+ if (!rvu->cgx_evh_wq) {
+ dev_err(rvu->dev, "alloc workqueue failed");
+ return;
+ }
+
+ cb.notify_link_chg = cgx_lmac_postevent; /* link change call back */
+ cb.data = rvu;
+
+ for (cgx = 0; cgx < rvu->cgx_cnt; cgx++) {
+ cgxd = rvu_cgx_pdata(cgx, rvu);
+ for (lmac = 0; lmac < cgx_get_lmac_cnt(cgxd); lmac++) {
+ err = cgx_lmac_evh_register(&cb, cgxd, lmac);
+ if (err)
+ dev_err(rvu->dev,
+ "%d:%d handler register failed\n",
+ cgx, lmac);
+ }
+ }
+}
+
+void rvu_cgx_wq_destroy(struct rvu *rvu)
+{
+ if (rvu->cgx_evh_wq) {
+ flush_workqueue(rvu->cgx_evh_wq);
+ destroy_workqueue(rvu->cgx_evh_wq);
+ rvu->cgx_evh_wq = NULL;
+ }
+}
+
int rvu_cgx_probe(struct rvu *rvu)
{
- int i;
+ int i, err;
/* find available cgx ports */
rvu->cgx_cnt = cgx_get_cgx_cnt();
@@ -93,5 +184,11 @@ int rvu_cgx_probe(struct rvu *rvu)
rvu->cgx_idmap[i] = cgx_get_pdata(i);
/* Map CGX LMAC interfaces to RVU PFs */
- return rvu_map_cgx_lmac_pf(rvu);
+ err = rvu_map_cgx_lmac_pf(rvu);
+ if (err)
+ return err;
+
+ /* Register for CGX events */
+ cgx_lmac_event_handler_init(rvu);
+ return 0;
}
--
2.7.4
^ permalink raw reply related
* [PATCH v9 13/15] octeontx2-af: Add support for CGX link management
From: sunil.kovvuri @ 2018-10-10 12:44 UTC (permalink / raw)
To: netdev, davem; +Cc: arnd, linux-soc, Linu Cherian, Nithya Mani, Sunil Goutham
In-Reply-To: <1539175475-5351-1-git-send-email-sunil.kovvuri@gmail.com>
From: Linu Cherian <lcherian@marvell.com>
CGX LMAC initialization, link status polling etc is done
by low level secure firmware. For link management this patch
adds a interface or communication mechanism between firmware
and this kernel CGX driver.
- Firmware interface specification is defined in cgx_fw_if.h.
- Support to send/receive commands/events to/form firmware.
- events/commands implemented
* link up
* link down
* reading firmware version
Signed-off-by: Linu Cherian <lcherian@marvell.com>
Signed-off-by: Nithya Mani <nmani@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 357 ++++++++++++++++++++-
drivers/net/ethernet/marvell/octeontx2/af/cgx.h | 32 ++
.../net/ethernet/marvell/octeontx2/af/cgx_fw_if.h | 186 +++++++++++
.../net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 97 ++++++
4 files changed, 668 insertions(+), 4 deletions(-)
create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 6ecae80..f290b1d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -24,16 +24,43 @@
#define DRV_NAME "octeontx2-cgx"
#define DRV_STRING "Marvell OcteonTX2 CGX/MAC Driver"
+/**
+ * struct lmac
+ * @wq_cmd_cmplt: waitq to keep the process blocked until cmd completion
+ * @cmd_lock: Lock to serialize the command interface
+ * @resp: command response
+ * @event_cb: callback for linkchange events
+ * @cmd_pend: flag set before new command is started
+ * flag cleared after command response is received
+ * @cgx: parent cgx port
+ * @lmac_id: lmac port id
+ * @name: lmac port name
+ */
+struct lmac {
+ wait_queue_head_t wq_cmd_cmplt;
+ struct mutex cmd_lock;
+ u64 resp;
+ struct cgx_event_cb event_cb;
+ bool cmd_pend;
+ struct cgx *cgx;
+ u8 lmac_id;
+ char *name;
+};
+
struct cgx {
void __iomem *reg_base;
struct pci_dev *pdev;
u8 cgx_id;
u8 lmac_count;
+ struct lmac *lmac_idmap[MAX_LMAC_PER_CGX];
struct list_head cgx_list;
};
static LIST_HEAD(cgx_list);
+/* CGX PHY management internal APIs */
+static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool en);
+
/* Supported devices */
static const struct pci_device_id cgx_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_CGX) },
@@ -42,11 +69,24 @@ static const struct pci_device_id cgx_id_table[] = {
MODULE_DEVICE_TABLE(pci, cgx_id_table);
+static void cgx_write(struct cgx *cgx, u64 lmac, u64 offset, u64 val)
+{
+ writeq(val, cgx->reg_base + (lmac << 18) + offset);
+}
+
static u64 cgx_read(struct cgx *cgx, u64 lmac, u64 offset)
{
return readq(cgx->reg_base + (lmac << 18) + offset);
}
+static inline struct lmac *lmac_pdata(u8 lmac_id, struct cgx *cgx)
+{
+ if (!cgx || lmac_id >= MAX_LMAC_PER_CGX)
+ return NULL;
+
+ return cgx->lmac_idmap[lmac_id];
+}
+
int cgx_get_cgx_cnt(void)
{
struct cgx *cgx_dev;
@@ -82,18 +122,312 @@ void *cgx_get_pdata(int cgx_id)
}
EXPORT_SYMBOL(cgx_get_pdata);
-static void cgx_lmac_init(struct cgx *cgx)
+/* CGX Firmware interface low level support */
+static int cgx_fwi_cmd_send(u64 req, u64 *resp, struct lmac *lmac)
+{
+ struct cgx *cgx = lmac->cgx;
+ struct device *dev;
+ int err = 0;
+ u64 cmd;
+
+ /* Ensure no other command is in progress */
+ err = mutex_lock_interruptible(&lmac->cmd_lock);
+ if (err)
+ return err;
+
+ /* Ensure command register is free */
+ cmd = cgx_read(cgx, lmac->lmac_id, CGX_COMMAND_REG);
+ if (FIELD_GET(CMDREG_OWN, cmd) != CGX_CMD_OWN_NS) {
+ err = -EBUSY;
+ goto unlock;
+ }
+
+ /* Update ownership in command request */
+ req = FIELD_SET(CMDREG_OWN, CGX_CMD_OWN_FIRMWARE, req);
+
+ /* Mark this lmac as pending, before we start */
+ lmac->cmd_pend = true;
+
+ /* Start command in hardware */
+ cgx_write(cgx, lmac->lmac_id, CGX_COMMAND_REG, req);
+
+ /* Ensure command is completed without errors */
+ if (!wait_event_timeout(lmac->wq_cmd_cmplt, !lmac->cmd_pend,
+ msecs_to_jiffies(CGX_CMD_TIMEOUT))) {
+ dev = &cgx->pdev->dev;
+ dev_err(dev, "cgx port %d:%d cmd timeout\n",
+ cgx->cgx_id, lmac->lmac_id);
+ err = -EIO;
+ goto unlock;
+ }
+
+ /* we have a valid command response */
+ smp_rmb(); /* Ensure the latest updates are visible */
+ *resp = lmac->resp;
+
+unlock:
+ mutex_unlock(&lmac->cmd_lock);
+
+ return err;
+}
+
+static inline int cgx_fwi_cmd_generic(u64 req, u64 *resp,
+ struct cgx *cgx, int lmac_id)
+{
+ struct lmac *lmac;
+ int err;
+
+ lmac = lmac_pdata(lmac_id, cgx);
+ if (!lmac)
+ return -ENODEV;
+
+ err = cgx_fwi_cmd_send(req, resp, lmac);
+
+ /* Check for valid response */
+ if (!err) {
+ if (FIELD_GET(EVTREG_STAT, *resp) == CGX_STAT_FAIL)
+ return -EIO;
+ else
+ return 0;
+ }
+
+ return err;
+}
+
+/* Hardware event handlers */
+static inline void cgx_link_change_handler(u64 lstat,
+ struct lmac *lmac)
+{
+ struct cgx *cgx = lmac->cgx;
+ struct cgx_link_event event;
+ struct device *dev;
+
+ dev = &cgx->pdev->dev;
+
+ event.lstat.link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat);
+ event.lstat.full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat);
+ event.lstat.speed = FIELD_GET(RESP_LINKSTAT_SPEED, lstat);
+ event.lstat.err_type = FIELD_GET(RESP_LINKSTAT_ERRTYPE, lstat);
+
+ event.cgx_id = cgx->cgx_id;
+ event.lmac_id = lmac->lmac_id;
+
+ if (!lmac->event_cb.notify_link_chg) {
+ dev_dbg(dev, "cgx port %d:%d Link change handler null",
+ cgx->cgx_id, lmac->lmac_id);
+ if (event.lstat.err_type != CGX_ERR_NONE) {
+ dev_err(dev, "cgx port %d:%d Link error %d\n",
+ cgx->cgx_id, lmac->lmac_id,
+ event.lstat.err_type);
+ }
+ dev_info(dev, "cgx port %d:%d Link status %s, speed %x\n",
+ cgx->cgx_id, lmac->lmac_id,
+ event.lstat.link_up ? "UP" : "DOWN",
+ event.lstat.speed);
+ return;
+ }
+
+ if (lmac->event_cb.notify_link_chg(&event, lmac->event_cb.data))
+ dev_err(dev, "event notification failure\n");
+}
+
+static inline bool cgx_cmdresp_is_linkevent(u64 event)
+{
+ u8 id;
+
+ id = FIELD_GET(EVTREG_ID, event);
+ if (id == CGX_CMD_LINK_BRING_UP ||
+ id == CGX_CMD_LINK_BRING_DOWN)
+ return true;
+ else
+ return false;
+}
+
+static inline bool cgx_event_is_linkevent(u64 event)
+{
+ if (FIELD_GET(EVTREG_ID, event) == CGX_EVT_LINK_CHANGE)
+ return true;
+ else
+ return false;
+}
+
+static irqreturn_t cgx_fwi_event_handler(int irq, void *data)
+{
+ struct lmac *lmac = data;
+ struct device *dev;
+ struct cgx *cgx;
+ u64 event;
+
+ cgx = lmac->cgx;
+
+ event = cgx_read(cgx, lmac->lmac_id, CGX_EVENT_REG);
+
+ if (!FIELD_GET(EVTREG_ACK, event))
+ return IRQ_NONE;
+
+ dev = &cgx->pdev->dev;
+
+ switch (FIELD_GET(EVTREG_EVT_TYPE, event)) {
+ case CGX_EVT_CMD_RESP:
+ /* Copy the response. Since only one command is active at a
+ * time, there is no way a response can get overwritten
+ */
+ lmac->resp = event;
+ /* Ensure response is updated before thread context starts */
+ smp_wmb();
+
+ /* There wont be separate events for link change initiated from
+ * software; Hence report the command responses as events
+ */
+ if (cgx_cmdresp_is_linkevent(event))
+ cgx_link_change_handler(event, lmac);
+
+ /* Release thread waiting for completion */
+ lmac->cmd_pend = false;
+ wake_up_interruptible(&lmac->wq_cmd_cmplt);
+ break;
+ case CGX_EVT_ASYNC:
+ if (cgx_event_is_linkevent(event))
+ cgx_link_change_handler(event, lmac);
+ break;
+ }
+
+ /* Any new event or command response will be posted by firmware
+ * only after the current status is acked.
+ * Ack the interrupt register as well.
+ */
+ cgx_write(lmac->cgx, lmac->lmac_id, CGX_EVENT_REG, 0);
+ cgx_write(lmac->cgx, lmac->lmac_id, CGXX_CMRX_INT, FW_CGX_INT);
+
+ return IRQ_HANDLED;
+}
+
+/* APIs for PHY management using CGX firmware interface */
+
+/* callback registration for hardware events like link change */
+int cgx_lmac_evh_register(struct cgx_event_cb *cb, void *cgxd, int lmac_id)
+{
+ struct cgx *cgx = cgxd;
+ struct lmac *lmac;
+
+ lmac = lmac_pdata(lmac_id, cgx);
+ if (!lmac)
+ return -ENODEV;
+
+ lmac->event_cb = *cb;
+
+ return 0;
+}
+EXPORT_SYMBOL(cgx_lmac_evh_register);
+
+static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool enable)
+{
+ u64 req = 0;
+ u64 resp;
+
+ if (enable)
+ req = FIELD_SET(CMDREG_ID, CGX_CMD_LINK_BRING_UP, req);
+ else
+ req = FIELD_SET(CMDREG_ID, CGX_CMD_LINK_BRING_DOWN, req);
+
+ return cgx_fwi_cmd_generic(req, &resp, cgx, lmac_id);
+}
+EXPORT_SYMBOL(cgx_fwi_link_change);
+
+static inline int cgx_fwi_read_version(u64 *resp, struct cgx *cgx)
+{
+ u64 req = 0;
+
+ req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_FW_VER, req);
+ return cgx_fwi_cmd_generic(req, resp, cgx, 0);
+}
+
+static int cgx_lmac_verify_fwi_version(struct cgx *cgx)
{
+ struct device *dev = &cgx->pdev->dev;
+ int major_ver, minor_ver;
+ u64 resp;
+ int err;
+
+ if (!cgx->lmac_count)
+ return 0;
+
+ err = cgx_fwi_read_version(&resp, cgx);
+ if (err)
+ return err;
+
+ major_ver = FIELD_GET(RESP_MAJOR_VER, resp);
+ minor_ver = FIELD_GET(RESP_MINOR_VER, resp);
+ dev_dbg(dev, "Firmware command interface version = %d.%d\n",
+ major_ver, minor_ver);
+ if (major_ver != CGX_FIRMWARE_MAJOR_VER ||
+ minor_ver != CGX_FIRMWARE_MINOR_VER)
+ return -EIO;
+ else
+ return 0;
+}
+
+static int cgx_lmac_init(struct cgx *cgx)
+{
+ struct lmac *lmac;
+ int i, err;
+
cgx->lmac_count = cgx_read(cgx, 0, CGXX_CMRX_RX_LMACS) & 0x7;
if (cgx->lmac_count > MAX_LMAC_PER_CGX)
cgx->lmac_count = MAX_LMAC_PER_CGX;
+
+ for (i = 0; i < cgx->lmac_count; i++) {
+ lmac = kcalloc(1, sizeof(struct lmac), GFP_KERNEL);
+ if (!lmac)
+ return -ENOMEM;
+ lmac->name = kcalloc(1, sizeof("cgx_fwi_xxx_yyy"), GFP_KERNEL);
+ if (!lmac->name)
+ return -ENOMEM;
+ sprintf(lmac->name, "cgx_fwi_%d_%d", cgx->cgx_id, i);
+ lmac->lmac_id = i;
+ lmac->cgx = cgx;
+ init_waitqueue_head(&lmac->wq_cmd_cmplt);
+ mutex_init(&lmac->cmd_lock);
+ err = request_irq(pci_irq_vector(cgx->pdev,
+ CGX_LMAC_FWI + i * 9),
+ cgx_fwi_event_handler, 0, lmac->name, lmac);
+ if (err)
+ return err;
+
+ /* Enable interrupt */
+ cgx_write(cgx, lmac->lmac_id, CGXX_CMRX_INT_ENA_W1S,
+ FW_CGX_INT);
+
+ /* Add reference */
+ cgx->lmac_idmap[i] = lmac;
+ }
+
+ return cgx_lmac_verify_fwi_version(cgx);
+}
+
+static int cgx_lmac_exit(struct cgx *cgx)
+{
+ struct lmac *lmac;
+ int i;
+
+ /* Free all lmac related resources */
+ for (i = 0; i < cgx->lmac_count; i++) {
+ lmac = cgx->lmac_idmap[i];
+ if (!lmac)
+ continue;
+ free_irq(pci_irq_vector(cgx->pdev, CGX_LMAC_FWI + i * 9), lmac);
+ kfree(lmac->name);
+ kfree(lmac);
+ }
+
+ return 0;
}
static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct device *dev = &pdev->dev;
struct cgx *cgx;
- int err;
+ int err, nvec;
cgx = devm_kzalloc(dev, sizeof(*cgx), GFP_KERNEL);
if (!cgx)
@@ -123,14 +457,27 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_release_regions;
}
+ nvec = CGX_NVEC;
+ err = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX);
+ if (err < 0 || err != nvec) {
+ dev_err(dev, "Request for %d msix vectors failed, err %d\n",
+ nvec, err);
+ goto err_release_regions;
+ }
+
list_add(&cgx->cgx_list, &cgx_list);
cgx->cgx_id = cgx_get_cgx_cnt() - 1;
- cgx_lmac_init(cgx);
+
+ err = cgx_lmac_init(cgx);
+ if (err)
+ goto err_release_lmac;
return 0;
-err_release_regions:
+err_release_lmac:
+ cgx_lmac_exit(cgx);
list_del(&cgx->cgx_list);
+err_release_regions:
pci_release_regions(pdev);
err_disable_device:
pci_disable_device(pdev);
@@ -142,7 +489,9 @@ static void cgx_remove(struct pci_dev *pdev)
{
struct cgx *cgx = pci_get_drvdata(pdev);
+ cgx_lmac_exit(cgx);
list_del(&cgx->cgx_list);
+ pci_free_irq_vectors(pdev);
pci_release_regions(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index acdc16e..a2a7a6d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -11,6 +11,8 @@
#ifndef CGX_H
#define CGX_H
+#include "cgx_fw_if.h"
+
/* PCI device IDs */
#define PCI_DEVID_OCTEONTX2_CGX 0xA059
@@ -22,12 +24,42 @@
#define CGX_OFFSET(x) ((x) * MAX_LMAC_PER_CGX)
/* Registers */
+#define CGXX_CMRX_INT 0x040
+#define FW_CGX_INT BIT_ULL(1)
+#define CGXX_CMRX_INT_ENA_W1S 0x058
#define CGXX_CMRX_RX_ID_MAP 0x060
#define CGXX_CMRX_RX_LMACS 0x128
+#define CGXX_SCRATCH0_REG 0x1050
+#define CGXX_SCRATCH1_REG 0x1058
+#define CGX_CONST 0x2000
+
+#define CGX_COMMAND_REG CGXX_SCRATCH1_REG
+#define CGX_EVENT_REG CGXX_SCRATCH0_REG
+#define CGX_CMD_TIMEOUT 2200 /* msecs */
+
+#define CGX_NVEC 37
+#define CGX_LMAC_FWI 0
+
+struct cgx_link_event {
+ struct cgx_lnk_sts lstat;
+ u8 cgx_id;
+ u8 lmac_id;
+};
+
+/**
+ * struct cgx_event_cb
+ * @notify_link_chg: callback for link change notification
+ * @data: data passed to callback function
+ */
+struct cgx_event_cb {
+ int (*notify_link_chg)(struct cgx_link_event *event, void *data);
+ void *data;
+};
extern struct pci_driver cgx_driver;
int cgx_get_cgx_cnt(void);
int cgx_get_lmac_cnt(void *cgxd);
void *cgx_get_pdata(int cgx_id);
+int cgx_lmac_evh_register(struct cgx_event_cb *cb, void *cgxd, int lmac_id);
#endif /* CGX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
new file mode 100644
index 0000000..fa17af3
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTx2 CGX driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CGX_FW_INTF_H__
+#define __CGX_FW_INTF_H__
+
+#include <linux/bitops.h>
+#include <linux/bitfield.h>
+
+#define CGX_FIRMWARE_MAJOR_VER 1
+#define CGX_FIRMWARE_MINOR_VER 0
+
+#define CGX_EVENT_ACK 1UL
+
+/* CGX error types. set for cmd response status as CGX_STAT_FAIL */
+enum cgx_error_type {
+ CGX_ERR_NONE,
+ CGX_ERR_LMAC_NOT_ENABLED,
+ CGX_ERR_LMAC_MODE_INVALID,
+ CGX_ERR_REQUEST_ID_INVALID,
+ CGX_ERR_PREV_ACK_NOT_CLEAR,
+ CGX_ERR_PHY_LINK_DOWN,
+ CGX_ERR_PCS_RESET_FAIL,
+ CGX_ERR_AN_CPT_FAIL,
+ CGX_ERR_TX_NOT_IDLE,
+ CGX_ERR_RX_NOT_IDLE,
+ CGX_ERR_SPUX_BR_BLKLOCK_FAIL,
+ CGX_ERR_SPUX_RX_ALIGN_FAIL,
+ CGX_ERR_SPUX_TX_FAULT,
+ CGX_ERR_SPUX_RX_FAULT,
+ CGX_ERR_SPUX_RESET_FAIL,
+ CGX_ERR_SPUX_AN_RESET_FAIL,
+ CGX_ERR_SPUX_USX_AN_RESET_FAIL,
+ CGX_ERR_SMUX_RX_LINK_NOT_OK,
+ CGX_ERR_PCS_RECV_LINK_FAIL,
+ CGX_ERR_TRAINING_FAIL,
+ CGX_ERR_RX_EQU_FAIL,
+ CGX_ERR_SPUX_BER_FAIL,
+ CGX_ERR_SPUX_RSFEC_ALGN_FAIL, /* = 22 */
+};
+
+/* LINK speed types */
+enum cgx_link_speed {
+ CGX_LINK_NONE,
+ CGX_LINK_10M,
+ CGX_LINK_100M,
+ CGX_LINK_1G,
+ CGX_LINK_2HG,
+ CGX_LINK_5G,
+ CGX_LINK_10G,
+ CGX_LINK_20G,
+ CGX_LINK_25G,
+ CGX_LINK_40G,
+ CGX_LINK_50G,
+ CGX_LINK_100G,
+ CGX_LINK_SPEED_MAX,
+};
+
+/* REQUEST ID types. Input to firmware */
+enum cgx_cmd_id {
+ CGX_CMD_NONE,
+ CGX_CMD_GET_FW_VER,
+ CGX_CMD_GET_MAC_ADDR,
+ CGX_CMD_SET_MTU,
+ CGX_CMD_GET_LINK_STS, /* optional to user */
+ CGX_CMD_LINK_BRING_UP,
+ CGX_CMD_LINK_BRING_DOWN,
+ CGX_CMD_INTERNAL_LBK,
+ CGX_CMD_EXTERNAL_LBK,
+ CGX_CMD_HIGIG,
+ CGX_CMD_LINK_STATE_CHANGE,
+ CGX_CMD_MODE_CHANGE, /* hot plug support */
+ CGX_CMD_INTF_SHUTDOWN,
+ CGX_CMD_IRQ_ENABLE,
+ CGX_CMD_IRQ_DISABLE,
+};
+
+/* async event ids */
+enum cgx_evt_id {
+ CGX_EVT_NONE,
+ CGX_EVT_LINK_CHANGE,
+};
+
+/* event types - cause of interrupt */
+enum cgx_evt_type {
+ CGX_EVT_ASYNC,
+ CGX_EVT_CMD_RESP
+};
+
+enum cgx_stat {
+ CGX_STAT_SUCCESS,
+ CGX_STAT_FAIL
+};
+
+enum cgx_cmd_own {
+ CGX_CMD_OWN_NS,
+ CGX_CMD_OWN_FIRMWARE,
+};
+
+/* m - bit mask
+ * y - value to be written in the bitrange
+ * x - input value whose bitrange to be modified
+ */
+#define FIELD_SET(m, y, x) \
+ (((x) & ~(m)) | \
+ FIELD_PREP((m), (y)))
+
+/* scratchx(0) CSR used for ATF->non-secure SW communication.
+ * This acts as the status register
+ * Provides details on command ack/status, command response, error details
+ */
+#define EVTREG_ACK BIT_ULL(0)
+#define EVTREG_EVT_TYPE BIT_ULL(1)
+#define EVTREG_STAT BIT_ULL(2)
+#define EVTREG_ID GENMASK_ULL(8, 3)
+
+/* Response to command IDs with command status as CGX_STAT_FAIL
+ *
+ * Not applicable for commands :
+ * CGX_CMD_LINK_BRING_UP/DOWN/CGX_EVT_LINK_CHANGE
+ */
+#define EVTREG_ERRTYPE GENMASK_ULL(18, 9)
+
+/* Response to cmd ID as CGX_CMD_GET_FW_VER with cmd status as
+ * CGX_STAT_SUCCESS
+ */
+#define RESP_MAJOR_VER GENMASK_ULL(12, 9)
+#define RESP_MINOR_VER GENMASK_ULL(16, 13)
+
+/* Response to cmd ID as CGX_CMD_GET_MAC_ADDR with cmd status as
+ * CGX_STAT_SUCCESS
+ */
+#define RESP_MAC_ADDR GENMASK_ULL(56, 9)
+
+/* Response to cmd ID - CGX_CMD_LINK_BRING_UP/DOWN, event ID CGX_EVT_LINK_CHANGE
+ * status can be either CGX_STAT_FAIL or CGX_STAT_SUCCESS
+ *
+ * In case of CGX_STAT_FAIL, it indicates CGX configuration failed
+ * when processing link up/down/change command.
+ * Both err_type and current link status will be updated
+ *
+ * In case of CGX_STAT_SUCCESS, err_type will be CGX_ERR_NONE and current
+ * link status will be updated
+ */
+struct cgx_lnk_sts {
+ uint64_t reserved1:9;
+ uint64_t link_up:1;
+ uint64_t full_duplex:1;
+ uint64_t speed:4; /* cgx_link_speed */
+ uint64_t err_type:10;
+ uint64_t reserved2:39;
+};
+
+#define RESP_LINKSTAT_UP GENMASK_ULL(9, 9)
+#define RESP_LINKSTAT_FDUPLEX GENMASK_ULL(10, 10)
+#define RESP_LINKSTAT_SPEED GENMASK_ULL(14, 11)
+#define RESP_LINKSTAT_ERRTYPE GENMASK_ULL(24, 15)
+
+/* scratchx(1) CSR used for non-secure SW->ATF communication
+ * This CSR acts as a command register
+ */
+#define CMDREG_OWN BIT_ULL(0)
+#define CMDREG_ID GENMASK_ULL(7, 2)
+
+/* Any command using enable/disable as an argument need
+ * to set this bitfield.
+ * Ex: Loopback, HiGig...
+ */
+#define CMDREG_ENABLE BIT_ULL(8)
+
+/* command argument to be passed for cmd ID - CGX_CMD_SET_MTU */
+#define CMDMTU_SIZE GENMASK_ULL(23, 8)
+
+/* command argument to be passed for cmd ID - CGX_CMD_LINK_CHANGE */
+#define CMDLINKCHANGE_LINKUP BIT_ULL(8)
+#define CMDLINKCHANGE_FULLDPLX BIT_ULL(9)
+#define CMDLINKCHANGE_SPEED GENMASK_ULL(13, 10)
+
+#endif /* __CGX_FW_INTF_H__ */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
new file mode 100644
index 0000000..bf81507
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "rvu.h"
+#include "cgx.h"
+
+static inline u8 cgxlmac_id_to_bmap(u8 cgx_id, u8 lmac_id)
+{
+ return ((cgx_id & 0xF) << 4) | (lmac_id & 0xF);
+}
+
+static void *rvu_cgx_pdata(u8 cgx_id, struct rvu *rvu)
+{
+ if (cgx_id >= rvu->cgx_cnt)
+ return NULL;
+
+ return rvu->cgx_idmap[cgx_id];
+}
+
+static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
+{
+ int cgx_cnt = rvu->cgx_cnt;
+ int cgx, lmac_cnt, lmac;
+ int pf = PF_CGXMAP_BASE;
+ int size;
+
+ if (!cgx_cnt)
+ return 0;
+
+ if (cgx_cnt > 0xF || MAX_LMAC_PER_CGX > 0xF)
+ return -EINVAL;
+
+ /* Alloc map table
+ * An additional entry is required since PF id starts from 1 and
+ * hence entry at offset 0 is invalid.
+ */
+ size = (cgx_cnt * MAX_LMAC_PER_CGX + 1) * sizeof(u8);
+ rvu->pf2cgxlmac_map = devm_kzalloc(rvu->dev, size, GFP_KERNEL);
+ if (!rvu->pf2cgxlmac_map)
+ return -ENOMEM;
+
+ /* Initialize offset 0 with an invalid cgx and lmac id */
+ rvu->pf2cgxlmac_map[0] = 0xFF;
+
+ /* Reverse map table */
+ rvu->cgxlmac2pf_map = devm_kzalloc(rvu->dev,
+ cgx_cnt * MAX_LMAC_PER_CGX * sizeof(u16),
+ GFP_KERNEL);
+ if (!rvu->cgxlmac2pf_map)
+ return -ENOMEM;
+
+ rvu->cgx_mapped_pfs = 0;
+ for (cgx = 0; cgx < cgx_cnt; cgx++) {
+ lmac_cnt = cgx_get_lmac_cnt(rvu_cgx_pdata(cgx, rvu));
+ for (lmac = 0; lmac < lmac_cnt; lmac++, pf++) {
+ rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac);
+ rvu->cgxlmac2pf_map[CGX_OFFSET(cgx) + lmac] = 1 << pf;
+ rvu->cgx_mapped_pfs++;
+ }
+ }
+ return 0;
+}
+
+int rvu_cgx_probe(struct rvu *rvu)
+{
+ int i;
+
+ /* find available cgx ports */
+ rvu->cgx_cnt = cgx_get_cgx_cnt();
+ if (!rvu->cgx_cnt) {
+ dev_info(rvu->dev, "No CGX devices found!\n");
+ return -ENODEV;
+ }
+
+ rvu->cgx_idmap = devm_kzalloc(rvu->dev, rvu->cgx_cnt * sizeof(void *),
+ GFP_KERNEL);
+ if (!rvu->cgx_idmap)
+ return -ENOMEM;
+
+ /* Initialize the cgxdata table */
+ for (i = 0; i < rvu->cgx_cnt; i++)
+ rvu->cgx_idmap[i] = cgx_get_pdata(i);
+
+ /* Map CGX LMAC interfaces to RVU PFs */
+ return rvu_map_cgx_lmac_pf(rvu);
+}
--
2.7.4
^ permalink raw reply related
* [PATCH v9 12/15] octeontx2-af: Set RVU PFs to CGX LMACs mapping
From: sunil.kovvuri @ 2018-10-10 12:44 UTC (permalink / raw)
To: netdev, davem
Cc: arnd, linux-soc, Linu Cherian, Geetha sowjanya, Sunil Goutham
In-Reply-To: <1539175475-5351-1-git-send-email-sunil.kovvuri@gmail.com>
From: Linu Cherian <lcherian@marvell.com>
Each of the enabled CGX LMAC is considered a physical
interface and RVU PFs are mapped to these. VFs of these
SRIOV PFs will be virtual interfaces and share CGX LMAC
along with PF.
This mapping info will be used later on for Rx/Tx pkt steering.
Signed-off-by: Linu Cherian <lcherian@marvell.com>
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/Makefile | 2 +-
drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 59 ++++++++++++++++++++++
drivers/net/ethernet/marvell/octeontx2/af/cgx.h | 15 +++++-
drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 4 ++
drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 12 +++++
5 files changed, 89 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
index 8646421..eaac264 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_OCTEONTX2_MBOX) += octeontx2_mbox.o
obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o
octeontx2_mbox-y := mbox.o
-octeontx2_af-y := cgx.o rvu.o
+octeontx2_af-y := cgx.o rvu.o rvu_cgx.o
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index c41d23f..6ecae80 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -28,8 +28,12 @@ struct cgx {
void __iomem *reg_base;
struct pci_dev *pdev;
u8 cgx_id;
+ u8 lmac_count;
+ struct list_head cgx_list;
};
+static LIST_HEAD(cgx_list);
+
/* Supported devices */
static const struct pci_device_id cgx_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_CGX) },
@@ -38,6 +42,53 @@ static const struct pci_device_id cgx_id_table[] = {
MODULE_DEVICE_TABLE(pci, cgx_id_table);
+static u64 cgx_read(struct cgx *cgx, u64 lmac, u64 offset)
+{
+ return readq(cgx->reg_base + (lmac << 18) + offset);
+}
+
+int cgx_get_cgx_cnt(void)
+{
+ struct cgx *cgx_dev;
+ int count = 0;
+
+ list_for_each_entry(cgx_dev, &cgx_list, cgx_list)
+ count++;
+
+ return count;
+}
+EXPORT_SYMBOL(cgx_get_cgx_cnt);
+
+int cgx_get_lmac_cnt(void *cgxd)
+{
+ struct cgx *cgx = cgxd;
+
+ if (!cgx)
+ return -ENODEV;
+
+ return cgx->lmac_count;
+}
+EXPORT_SYMBOL(cgx_get_lmac_cnt);
+
+void *cgx_get_pdata(int cgx_id)
+{
+ struct cgx *cgx_dev;
+
+ list_for_each_entry(cgx_dev, &cgx_list, cgx_list) {
+ if (cgx_dev->cgx_id == cgx_id)
+ return cgx_dev;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(cgx_get_pdata);
+
+static void cgx_lmac_init(struct cgx *cgx)
+{
+ cgx->lmac_count = cgx_read(cgx, 0, CGXX_CMRX_RX_LMACS) & 0x7;
+ if (cgx->lmac_count > MAX_LMAC_PER_CGX)
+ cgx->lmac_count = MAX_LMAC_PER_CGX;
+}
+
static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct device *dev = &pdev->dev;
@@ -72,9 +123,14 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_release_regions;
}
+ list_add(&cgx->cgx_list, &cgx_list);
+ cgx->cgx_id = cgx_get_cgx_cnt() - 1;
+ cgx_lmac_init(cgx);
+
return 0;
err_release_regions:
+ list_del(&cgx->cgx_list);
pci_release_regions(pdev);
err_disable_device:
pci_disable_device(pdev);
@@ -84,6 +140,9 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
static void cgx_remove(struct pci_dev *pdev)
{
+ struct cgx *cgx = pci_get_drvdata(pdev);
+
+ list_del(&cgx->cgx_list);
pci_release_regions(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index a7d4b39..acdc16e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -12,11 +12,22 @@
#define CGX_H
/* PCI device IDs */
-#define PCI_DEVID_OCTEONTX2_CGX 0xA059
+#define PCI_DEVID_OCTEONTX2_CGX 0xA059
/* PCI BAR nos */
-#define PCI_CFG_REG_BAR_NUM 0
+#define PCI_CFG_REG_BAR_NUM 0
+
+#define MAX_CGX 3
+#define MAX_LMAC_PER_CGX 4
+#define CGX_OFFSET(x) ((x) * MAX_LMAC_PER_CGX)
+
+/* Registers */
+#define CGXX_CMRX_RX_ID_MAP 0x060
+#define CGXX_CMRX_RX_LMACS 0x128
extern struct pci_driver cgx_driver;
+int cgx_get_cgx_cnt(void);
+int cgx_get_lmac_cnt(void *cgxd);
+void *cgx_get_pdata(int cgx_id);
#endif /* CGX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 72cb202..e9021a8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -1558,6 +1558,10 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
goto err_hwsetup;
+ err = rvu_cgx_probe(rvu);
+ if (err)
+ goto err_mbox;
+
err = rvu_register_interrupts(rvu);
if (err)
goto err_mbox;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 92c2022..385f597 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -100,6 +100,16 @@ struct rvu {
char *irq_name;
bool *irq_allocated;
dma_addr_t msix_base_iova;
+
+ /* CGX */
+#define PF_CGXMAP_BASE 1 /* PF 0 is reserved for RVU PF */
+ u8 cgx_mapped_pfs;
+ u8 cgx_cnt; /* available cgx ports */
+ u8 *pf2cgxlmac_map; /* pf to cgx_lmac map */
+ u16 *cgxlmac2pf_map; /* bitmap of mapped pfs for
+ * every cgx lmac port
+ */
+ void **cgx_idmap; /* cgx id to cgx data map table */
};
static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val)
@@ -138,4 +148,6 @@ int rvu_get_lf(struct rvu *rvu, struct rvu_block *block, u16 pcifunc, u16 slot);
int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc);
int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero);
+/* CGX APIs */
+int rvu_cgx_probe(struct rvu *rvu);
#endif /* RVU_H */
--
2.7.4
^ permalink raw reply related
* [PATCH v9 11/15] octeontx2-af: Add Marvell OcteonTX2 CGX driver
From: sunil.kovvuri @ 2018-10-10 12:44 UTC (permalink / raw)
To: netdev, davem; +Cc: arnd, linux-soc, Sunil Goutham
In-Reply-To: <1539175475-5351-1-git-send-email-sunil.kovvuri@gmail.com>
From: Sunil Goutham <sgoutham@marvell.com>
This patch adds basic template for Marvell OcteonTX2's
CGX ethernet interface driver. Just the probe.
RVU AF driver will use APIs exported by this driver
for various things like PF to physical interface mapping,
loopback mode, interface stats etc. Hence marged both
drivers into a single module.
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/Makefile | 2 +-
drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 97 ++++++++++++++++++++++
drivers/net/ethernet/marvell/octeontx2/af/cgx.h | 22 +++++
drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 14 +++-
4 files changed, 133 insertions(+), 2 deletions(-)
create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/cgx.c
create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/cgx.h
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
index ac17cb9..8646421 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_OCTEONTX2_MBOX) += octeontx2_mbox.o
obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o
octeontx2_mbox-y := mbox.o
-octeontx2_af-y := rvu.o
+octeontx2_af-y := cgx.o rvu.o
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
new file mode 100644
index 0000000..c41d23f
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 CGX driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+
+#include "cgx.h"
+
+#define DRV_NAME "octeontx2-cgx"
+#define DRV_STRING "Marvell OcteonTX2 CGX/MAC Driver"
+
+struct cgx {
+ void __iomem *reg_base;
+ struct pci_dev *pdev;
+ u8 cgx_id;
+};
+
+/* Supported devices */
+static const struct pci_device_id cgx_id_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_CGX) },
+ { 0, } /* end of table */
+};
+
+MODULE_DEVICE_TABLE(pci, cgx_id_table);
+
+static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct device *dev = &pdev->dev;
+ struct cgx *cgx;
+ int err;
+
+ cgx = devm_kzalloc(dev, sizeof(*cgx), GFP_KERNEL);
+ if (!cgx)
+ return -ENOMEM;
+ cgx->pdev = pdev;
+
+ pci_set_drvdata(pdev, cgx);
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(dev, "Failed to enable PCI device\n");
+ pci_set_drvdata(pdev, NULL);
+ return err;
+ }
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ dev_err(dev, "PCI request regions failed 0x%x\n", err);
+ goto err_disable_device;
+ }
+
+ /* MAP configuration registers */
+ cgx->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
+ if (!cgx->reg_base) {
+ dev_err(dev, "CGX: Cannot map CSR memory space, aborting\n");
+ err = -ENOMEM;
+ goto err_release_regions;
+ }
+
+ return 0;
+
+err_release_regions:
+ pci_release_regions(pdev);
+err_disable_device:
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+ return err;
+}
+
+static void cgx_remove(struct pci_dev *pdev)
+{
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+struct pci_driver cgx_driver = {
+ .name = DRV_NAME,
+ .id_table = cgx_id_table,
+ .probe = cgx_probe,
+ .remove = cgx_remove,
+};
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
new file mode 100644
index 0000000..a7d4b39
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTx2 CGX driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef CGX_H
+#define CGX_H
+
+ /* PCI device IDs */
+#define PCI_DEVID_OCTEONTX2_CGX 0xA059
+
+/* PCI BAR nos */
+#define PCI_CFG_REG_BAR_NUM 0
+
+extern struct pci_driver cgx_driver;
+
+#endif /* CGX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 7bcc1fd..72cb202 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -15,6 +15,7 @@
#include <linux/pci.h>
#include <linux/sysfs.h>
+#include "cgx.h"
#include "rvu.h"
#include "rvu_reg.h"
@@ -1605,14 +1606,25 @@ static struct pci_driver rvu_driver = {
static int __init rvu_init_module(void)
{
+ int err;
+
pr_info("%s: %s\n", DRV_NAME, DRV_STRING);
- return pci_register_driver(&rvu_driver);
+ err = pci_register_driver(&cgx_driver);
+ if (err < 0)
+ return err;
+
+ err = pci_register_driver(&rvu_driver);
+ if (err < 0)
+ pci_unregister_driver(&cgx_driver);
+
+ return err;
}
static void __exit rvu_cleanup_module(void)
{
pci_unregister_driver(&rvu_driver);
+ pci_unregister_driver(&cgx_driver);
}
module_init(rvu_init_module);
--
2.7.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox