* [PATCH bpf-next 5/6] selftests/bpf: test BPF_MAP_DUMP command on a bpf hashmap
From: Brian Vazquez @ 2019-07-24 16:58 UTC (permalink / raw)
To: Brian Vazquez, Alexei Starovoitov, Daniel Borkmann,
David S . Miller
Cc: Stanislav Fomichev, Willem de Bruijn, Petar Penkov, linux-kernel,
netdev, bpf, Brian Vazquez
In-Reply-To: <20190724165803.87470-1-brianvv@google.com>
This tests exercise the new command on a bpf hashmap and make sure it
works as expected.
Signed-off-by: Brian Vazquez <brianvv@google.com>
---
tools/testing/selftests/bpf/test_maps.c | 83 ++++++++++++++++++++++++-
1 file changed, 81 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 5443b9bd75ed7..f7ab401399d40 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -309,6 +309,86 @@ static void test_hashmap_walk(unsigned int task, void *data)
close(fd);
}
+static void test_hashmap_dump(void)
+{
+ int fd, i, max_entries = 5;
+ uint64_t keys[max_entries], values[max_entries];
+ uint64_t key, value, next_key, prev_key;
+ bool next_key_valid = true;
+ void *buf, *elem;
+ u32 buf_len;
+ const int elem_size = sizeof(key) + sizeof(value);
+
+ fd = helper_fill_hashmap(max_entries);
+
+ // Get the elements in the hashmap, and store them in that order
+ assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
+ i = 0;
+ keys[i] = key;
+ for (i = 1; next_key_valid; i++) {
+ next_key_valid = bpf_map_get_next_key(fd, &key, &next_key) == 0;
+ assert(bpf_map_lookup_elem(fd, &key, &values[i - 1]) == 0);
+ keys[i-1] = key;
+ key = next_key;
+ }
+
+ // Alloc memory for the whole table
+ buf = malloc(elem_size * max_entries);
+ assert(buf != NULL);
+
+ // Check that buf_len < elem_size returns EINVAL
+ buf_len = elem_size-1;
+ errno = 0;
+ assert(bpf_map_dump(fd, NULL, buf, &buf_len) == -1 && errno == EINVAL);
+
+ // Check that it returns the first two elements
+ errno = 0;
+ buf_len = elem_size * 2;
+ i = 0;
+ assert(bpf_map_dump(fd, NULL, buf, &buf_len) == 0 &&
+ buf_len == 2*elem_size);
+ elem = buf;
+ assert((*(uint64_t *)elem) == keys[i] &&
+ (*(uint64_t *)(elem + sizeof(key))) == values[i]);
+ elem = buf + elem_size;
+ i++;
+ assert((*(uint64_t *)elem) == keys[i] &&
+ (*(uint64_t *)(elem + sizeof(key))) == values[i]);
+ i++;
+
+ /* Check that prev_key contains key from last_elem retrieved in previous
+ * call
+ */
+ prev_key = *((uint64_t *)elem);
+ assert(bpf_map_dump(fd, &prev_key, buf, &buf_len) == 0 &&
+ buf_len == elem_size*2);
+ elem = buf;
+ assert((*(uint64_t *)elem) == keys[i] &&
+ (*(uint64_t *)(elem + sizeof(key))) == values[i]);
+ elem = buf + elem_size;
+ i++;
+ assert((*(uint64_t *)elem) == keys[i] &&
+ (*(uint64_t *)(elem + sizeof(key))) == values[i]);
+ i++;
+ assert(prev_key == (*(uint64_t *)elem));
+
+ /* Continue reading from map and verify buf_len only contains 1 element
+ * even though buf_len is 2 elem_size and it returns err = 0.
+ */
+ assert(bpf_map_dump(fd, &prev_key, buf, &buf_len) == 0 &&
+ buf_len == elem_size);
+ elem = buf;
+ assert((*(uint64_t *)elem) == keys[i] &&
+ (*(uint64_t *)(elem + sizeof(key))) == values[i]);
+
+ // Verify there's no more entries and err = ENOENT
+ assert(bpf_map_dump(fd, &prev_key, buf, &buf_len) == -1 &&
+ errno == ENOENT);
+
+ free(buf);
+ close(fd);
+}
+
static void test_hashmap_zero_seed(void)
{
int i, first, second, old_flags;
@@ -1677,6 +1757,7 @@ static void run_all_tests(void)
test_hashmap_percpu(0, NULL);
test_hashmap_walk(0, NULL);
test_hashmap_zero_seed();
+ test_hashmap_dump();
test_arraymap(0, NULL);
test_arraymap_percpu(0, NULL);
@@ -1714,11 +1795,9 @@ int main(void)
map_flags = BPF_F_NO_PREALLOC;
run_all_tests();
-
#define CALL
#include <map_tests/tests.h>
#undef CALL
-
printf("test_maps: OK, %d SKIPPED\n", skips);
return 0;
}
--
2.22.0.657.g960e92d24f-goog
^ permalink raw reply related
* [PATCH bpf-next 4/6] libbpf: support BPF_MAP_DUMP command
From: Brian Vazquez @ 2019-07-24 16:58 UTC (permalink / raw)
To: Brian Vazquez, Alexei Starovoitov, Daniel Borkmann,
David S . Miller
Cc: Stanislav Fomichev, Willem de Bruijn, Petar Penkov, linux-kernel,
netdev, bpf, Brian Vazquez
In-Reply-To: <20190724165803.87470-1-brianvv@google.com>
Make libbpf aware of new BPF_MAP_DUMP command and add bpf_map_dump and
bpf_map_dump_flags to use them from the library.
Suggested-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Brian Vazquez <brianvv@google.com>
---
tools/lib/bpf/bpf.c | 28 ++++++++++++++++++++++++++++
tools/lib/bpf/bpf.h | 4 ++++
2 files changed, 32 insertions(+)
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index c7d7993c44bb0..c1139b7db756a 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -368,6 +368,34 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
}
+int bpf_map_dump(int fd, const void *prev_key, void *buf, void *buf_len)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.dump.map_fd = fd;
+ attr.dump.prev_key = ptr_to_u64(prev_key);
+ attr.dump.buf = ptr_to_u64(buf);
+ attr.dump.buf_len = ptr_to_u64(buf_len);
+
+ return sys_bpf(BPF_MAP_DUMP, &attr, sizeof(attr));
+}
+
+int bpf_map_dump_flags(int fd, const void *prev_key, void *buf, void *buf_len,
+ __u64 flags)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.dump.map_fd = fd;
+ attr.dump.prev_key = ptr_to_u64(prev_key);
+ attr.dump.buf = ptr_to_u64(buf);
+ attr.dump.buf_len = ptr_to_u64(buf_len);
+ attr.dump.flags = flags;
+
+ return sys_bpf(BPF_MAP_DUMP, &attr, sizeof(attr));
+}
+
int bpf_map_lookup_elem(int fd, const void *key, void *value)
{
union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index ff42ca043dc8f..86496443440e9 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -112,6 +112,10 @@ LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
__u64 flags);
+LIBBPF_API int bpf_map_dump(int fd, const void *prev_key, void *buf,
+ void *buf_len);
+LIBBPF_API int bpf_map_dump_flags(int fd, const void *prev_key, void *buf,
+ void *buf_len, __u64 flags);
LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value,
__u64 flags);
--
2.22.0.657.g960e92d24f-goog
^ permalink raw reply related
* [PATCH bpf-next 6/6] selftests/bpf: add test to measure performance of BPF_MAP_DUMP
From: Brian Vazquez @ 2019-07-24 16:58 UTC (permalink / raw)
To: Brian Vazquez, Alexei Starovoitov, Daniel Borkmann,
David S . Miller
Cc: Stanislav Fomichev, Willem de Bruijn, Petar Penkov, linux-kernel,
netdev, bpf, Brian Vazquez
In-Reply-To: <20190724165803.87470-1-brianvv@google.com>
This tests compares the amount of time that takes to read an entire
table of 100K elements on a bpf hashmap using both BPF_MAP_DUMP and
BPF_MAP_GET_NEXT_KEY + BPF_MAP_LOOKUP_ELEM.
Signed-off-by: Brian Vazquez <brianvv@google.com>
---
tools/testing/selftests/bpf/test_maps.c | 65 +++++++++++++++++++++++++
1 file changed, 65 insertions(+)
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index f7ab401399d40..c4593a8904ca6 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -18,6 +18,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
#include <linux/bpf.h>
+#include <linux/time64.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
@@ -389,6 +390,69 @@ static void test_hashmap_dump(void)
close(fd);
}
+static void test_hashmap_dump_perf(void)
+{
+ int fd, i, max_entries = 100000;
+ uint64_t key, value, next_key;
+ bool next_key_valid = true;
+ void *buf;
+ u32 buf_len, entries;
+ int j = 0;
+ int clk_id = CLOCK_MONOTONIC;
+ struct timespec begin, end;
+ long long time_spent, dump_time_spent;
+ double res;
+ int tests[] = {1, 2, 230, 5000, 73000, 100000, 234567};
+ int test_len = ARRAY_SIZE(tests);
+ const int elem_size = sizeof(key) + sizeof(value);
+
+ fd = helper_fill_hashmap(max_entries);
+ // Alloc memory considering the largest buffer
+ buf = malloc(elem_size * tests[test_len-1]);
+ assert(buf != NULL);
+
+test:
+ entries = tests[j];
+ buf_len = elem_size*tests[j];
+ j++;
+ clock_gettime(clk_id, &begin);
+ errno = 0;
+ i = 0;
+ while (errno == 0) {
+ bpf_map_dump(fd, !i ? NULL : &key,
+ buf, &buf_len);
+ if (errno)
+ break;
+ if (!i)
+ key = *((uint64_t *)(buf + buf_len - elem_size));
+ i += buf_len / elem_size;
+ }
+ clock_gettime(clk_id, &end);
+ assert(i == max_entries);
+ dump_time_spent = NSEC_PER_SEC * (end.tv_sec - begin.tv_sec) +
+ end.tv_nsec - begin.tv_nsec;
+ next_key_valid = true;
+ clock_gettime(clk_id, &begin);
+ assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
+ for (i = 0; next_key_valid; i++) {
+ next_key_valid = bpf_map_get_next_key(fd, &key, &next_key) == 0;
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
+ key = next_key;
+ }
+ clock_gettime(clk_id, &end);
+ time_spent = NSEC_PER_SEC * (end.tv_sec - begin.tv_sec) +
+ end.tv_nsec - begin.tv_nsec;
+ res = (1-((double)dump_time_spent/time_spent))*100;
+ printf("buf_len_%u:\t %llu entry-by-entry: %llu improvement %lf\n",
+ entries, dump_time_spent, time_spent, res);
+ assert(i == max_entries);
+
+ if (j < test_len)
+ goto test;
+ free(buf);
+ close(fd);
+}
+
static void test_hashmap_zero_seed(void)
{
int i, first, second, old_flags;
@@ -1758,6 +1822,7 @@ static void run_all_tests(void)
test_hashmap_walk(0, NULL);
test_hashmap_zero_seed();
test_hashmap_dump();
+ test_hashmap_dump_perf();
test_arraymap(0, NULL);
test_arraymap_percpu(0, NULL);
--
2.22.0.657.g960e92d24f-goog
^ permalink raw reply related
* [PATCH bpf-next 2/6] bpf: add BPF_MAP_DUMP command to dump more than one entry per call
From: Brian Vazquez @ 2019-07-24 16:57 UTC (permalink / raw)
To: Brian Vazquez, Alexei Starovoitov, Daniel Borkmann,
David S . Miller
Cc: Stanislav Fomichev, Willem de Bruijn, Petar Penkov, linux-kernel,
netdev, bpf, Brian Vazquez
In-Reply-To: <20190724165803.87470-1-brianvv@google.com>
This introduces a new command to retrieve multiple number of entries
from a bpf map, wrapping the existing bpf methods:
map_get_next_key and map_lookup_elem
To start dumping the map from the beginning you must specify NULL as
the prev_key.
The new API returns 0 when it successfully copied all the elements
requested or it copied less because there weren't more elements to
retrieved (i.e err == -ENOENT). In last scenario err will be masked to 0.
On a successful call buf and buf_len will contain correct data and in
case prev_key was provided (not for the first walk, since prev_key is
NULL) it will contain the last_key copied into the prev_key which will
simplify next call.
Only when it can't find a single element it will return -ENOENT meaning
that the map has been entirely walked. When an error is return buf,
buf_len and prev_key shouldn't be read nor used.
Because maps can be called from userspace and kernel code, this function
can have a scenario where the next_key was found but by the time we
try to retrieve the value the element is not there, in this case the
function continues and tries to get a new next_key value, skipping the
deleted key. If at some point the function find itself trap in a loop,
it will return -EINTR.
The function will try to fit as much as possible in the buf provided and
will return -EINVAL if buf_len is smaller than elem_size.
QUEUE and STACK maps are not supported.
Note that map_dump doesn't guarantee that reading the entire table is
consistent since this function is always racing with kernel and user code
but the same behaviour is found when the entire table is walked using
the current interfaces: map_get_next_key + map_lookup_elem.
It is also important to note that with a locked map, the lock is grabbed
for 1 entry at the time, meaning that the returned buf might or might not
be consistent.
Suggested-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Brian Vazquez <brianvv@google.com>
---
include/uapi/linux/bpf.h | 9 +++
kernel/bpf/syscall.c | 117 +++++++++++++++++++++++++++++++++++++++
2 files changed, 126 insertions(+)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index fa1c753dcdbc7..66dab5385170d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -106,6 +106,7 @@ enum bpf_cmd {
BPF_TASK_FD_QUERY,
BPF_MAP_LOOKUP_AND_DELETE_ELEM,
BPF_MAP_FREEZE,
+ BPF_MAP_DUMP,
};
enum bpf_map_type {
@@ -388,6 +389,14 @@ union bpf_attr {
__u64 flags;
};
+ struct { /* struct used by BPF_MAP_DUMP command */
+ __aligned_u64 prev_key;
+ __aligned_u64 buf;
+ __aligned_u64 buf_len; /* input/output: len of buf */
+ __u64 flags;
+ __u32 map_fd;
+ } dump;
+
struct { /* anonymous struct used by BPF_PROG_LOAD command */
__u32 prog_type; /* one of enum bpf_prog_type */
__u32 insn_cnt;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 86cdc2f7bb56e..0c35505aa219f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1097,6 +1097,120 @@ static int map_get_next_key(union bpf_attr *attr)
return err;
}
+/* last field in 'union bpf_attr' used by this command */
+#define BPF_MAP_DUMP_LAST_FIELD dump.map_fd
+
+static int map_dump(union bpf_attr *attr)
+{
+ void __user *ukey = u64_to_user_ptr(attr->dump.prev_key);
+ void __user *ubuf = u64_to_user_ptr(attr->dump.buf);
+ u32 __user *ubuf_len = u64_to_user_ptr(attr->dump.buf_len);
+ int ufd = attr->dump.map_fd;
+ struct bpf_map *map;
+ void *buf, *prev_key, *key, *value;
+ u32 value_size, elem_size, buf_len, cp_len;
+ struct fd f;
+ int err;
+ bool first_key = false;
+
+ if (CHECK_ATTR(BPF_MAP_DUMP))
+ return -EINVAL;
+
+ if (attr->dump.flags & ~BPF_F_LOCK)
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
+ err = -EPERM;
+ goto err_put;
+ }
+
+ if ((attr->dump.flags & BPF_F_LOCK) &&
+ !map_value_has_spin_lock(map)) {
+ err = -EINVAL;
+ goto err_put;
+ }
+
+ if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+ map->map_type == BPF_MAP_TYPE_STACK) {
+ err = -ENOTSUPP;
+ goto err_put;
+ }
+
+ value_size = bpf_map_value_size(map);
+
+ err = get_user(buf_len, ubuf_len);
+ if (err)
+ goto err_put;
+
+ elem_size = map->key_size + value_size;
+ if (buf_len < elem_size) {
+ err = -EINVAL;
+ goto err_put;
+ }
+
+ if (ukey) {
+ prev_key = __bpf_copy_key(ukey, map->key_size);
+ if (IS_ERR(prev_key)) {
+ err = PTR_ERR(prev_key);
+ goto err_put;
+ }
+ } else {
+ prev_key = NULL;
+ first_key = true;
+ }
+
+ err = -ENOMEM;
+ buf = kmalloc(elem_size, GFP_USER | __GFP_NOWARN);
+ if (!buf)
+ goto err_put;
+
+ key = buf;
+ value = key + map->key_size;
+ for (cp_len = 0; cp_len + elem_size <= buf_len;) {
+ if (signal_pending(current)) {
+ err = -EINTR;
+ break;
+ }
+
+ rcu_read_lock();
+ err = map->ops->map_get_next_key(map, prev_key, key);
+ rcu_read_unlock();
+
+ if (err)
+ break;
+
+ err = bpf_map_copy_value(map, key, value, attr->dump.flags);
+
+ if (err == -ENOENT)
+ continue;
+ if (err)
+ goto free_buf;
+
+ if (copy_to_user(ubuf + cp_len, buf, elem_size)) {
+ err = -EFAULT;
+ goto free_buf;
+ }
+
+ prev_key = key;
+ cp_len += elem_size;
+ }
+
+ if (err == -ENOENT && cp_len)
+ err = 0;
+ if (!err && (copy_to_user(ubuf_len, &cp_len, sizeof(cp_len)) ||
+ (!first_key && copy_to_user(ukey, key, map->key_size))))
+ err = -EFAULT;
+free_buf:
+ kfree(buf);
+err_put:
+ fdput(f);
+ return err;
+}
+
#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
static int map_lookup_and_delete_elem(union bpf_attr *attr)
@@ -2910,6 +3024,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
err = map_lookup_and_delete_elem(&attr);
break;
+ case BPF_MAP_DUMP:
+ err = map_dump(&attr);
+ break;
default:
err = -EINVAL;
break;
--
2.22.0.657.g960e92d24f-goog
^ permalink raw reply related
* [PATCH bpf-next 0/6] bpf: add BPF_MAP_DUMP command to dump more than one entry per call
From: Brian Vazquez @ 2019-07-24 16:57 UTC (permalink / raw)
To: Brian Vazquez, Alexei Starovoitov, Daniel Borkmann,
David S . Miller
Cc: Stanislav Fomichev, Willem de Bruijn, Petar Penkov, linux-kernel,
netdev, bpf, Brian Vazquez
This introduces a new command to retrieve multiple number of entries
from a bpf map.
This new command can be executed from the existing BPF syscall as
follows:
err = bpf(BPF_MAP_DUMP, union bpf_attr *attr, u32 size)
using attr->dump.map_fd, attr->dump.prev_key, attr->dump.buf,
attr->dump.buf_len
returns zero or negative error, and populates buf and buf_len on
succees
This implementation is wrapping the existing bpf methods:
map_get_next_key and map_lookup_elem
Note that this implementation can be extended later to do dump and
delete by extending map_lookup_and_delete_elem (currently it only works
for bpf queue/stack maps) and either use a new flag in map_dump or a new
command map_dump_and_delete.
Results show that even with a 1-elem_size buffer, it runs ~40 faster
than the current implementation, improvements of ~85% are reported when
the buffer size is increased, although, after the buffer size is around
5% of the total number of entries there's no huge difference in
increasing it.
Tested:
Tried different size buffers to handle case where the bulk is bigger, or
the elements to retrieve are less than the existing ones, all runs read
a map of 100K entries. Below are the results(in ns) from the different
runs:
buf_len_1: 69038725 entry-by-entry: 112384424 improvement
38.569134
buf_len_2: 40897447 entry-by-entry: 111030546 improvement
63.165590
buf_len_230: 13652714 entry-by-entry: 111694058 improvement
87.776687
buf_len_5000: 13576271 entry-by-entry: 111101169 improvement
87.780263
buf_len_73000: 14694343 entry-by-entry: 111740162 improvement
86.849542
buf_len_100000: 13745969 entry-by-entry: 114151991 improvement
87.958187
buf_len_234567: 14329834 entry-by-entry: 114427589 improvement
87.476941
The series of patches are split as follows:
- First patch move some map_lookup_elem logic into 2 fucntions to
deduplicate code: bpf_map_value_size and bpf_map_copy_value
- Second patch introduce map_dump function
- Third patch syncs tools linux headers
- Fourth patch adds libbpf support
- Last two patches adds tests
RFC Changelog:
- remove wrong usage of attr.flags
- move map_fd to remove hole after it
v3:
- add explanation of the API in the commit message
- fix masked errors and return them to user
- copy last_key from return buf into prev_key if it was provided
- run perf test with kpti and retpoline mitigations
v2:
- use proper bpf-next tag
Brian Vazquez (6):
bpf: add bpf_map_value_size and bp_map_copy_value helper functions
bpf: add BPF_MAP_DUMP command to dump more than one entry per call
bpf: keep bpf.h in sync with tools/
libbpf: support BPF_MAP_DUMP command
selftests/bpf: test BPF_MAP_DUMP command on a bpf hashmap
selftests/bpf: add test to measure performance of BPF_MAP_DUMP
include/uapi/linux/bpf.h | 9 +
kernel/bpf/syscall.c | 251 ++++++++++++++++++------
tools/include/uapi/linux/bpf.h | 9 +
tools/lib/bpf/bpf.c | 28 +++
tools/lib/bpf/bpf.h | 4 +
tools/lib/bpf/libbpf.map | 2 +
tools/testing/selftests/bpf/test_maps.c | 148 +++++++++++++-
7 files changed, 388 insertions(+), 63 deletions(-)
--
2.22.0.657.g960e92d24f-goog
^ permalink raw reply
* Re: [RFC PATCH net-next 10/12] drop_monitor: Add packet alert mode
From: Ido Schimmel @ 2019-07-24 16:57 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, nhorman, dsahern, roopa, nikolay, jakub.kicinski,
toke, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190724125341.GB2225@nanopsycho>
On Wed, Jul 24, 2019 at 02:53:41PM +0200, Jiri Pirko wrote:
> Mon, Jul 22, 2019 at 08:31:32PM CEST, idosch@idosch.org wrote:
> >+static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
> >+ [NET_DM_ALERT_MODE_SUMMARY] = &net_dm_alert_summary_ops,
> >+ [NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops,
> >+};
>
> Please split this patch into 2:
> 1) introducing the ops and modes (only summary)
> 2) introducing the packet mode
Ack
...
> >+static int net_dm_alert_mode_set(struct genl_info *info)
> >+{
> >+ struct netlink_ext_ack *extack = info->extack;
> >+ enum net_dm_alert_mode alert_mode;
> >+ int rc;
> >+
> >+ if (!info->attrs[NET_DM_ATTR_ALERT_MODE])
> >+ return 0;
> >+
> >+ rc = net_dm_alert_mode_get_from_info(info, &alert_mode);
> >+ if (rc) {
> >+ NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode");
> >+ return -EINVAL;
> >+ }
> >+
> >+ net_dm_alert_mode = alert_mode;
>
> 2 things:
> 1) Shouldn't you check if the tracing is on and return -EBUSY in case it is?
I'm doing it below in net_dm_cmd_config() :)
But I'm returning '-EOPNOTSUPP'. I guess '-EBUSY' is more appropriate.
Will change.
> 2) You setup the mode globally. I guess it is fine and it does not make
> sense to do it otherwise, right? Like per-net or something.
Yes, it's global. I didn't change that aspect of drop monitor and I
don't really see a use case for that.
>
>
> >+
> >+ return 0;
> >+}
> >+
> > static int net_dm_cmd_config(struct sk_buff *skb,
> > struct genl_info *info)
> > {
> >- NL_SET_ERR_MSG_MOD(info->extack, "Command not supported");
> >+ struct netlink_ext_ack *extack = info->extack;
> >+ int rc;
> >
> >- return -EOPNOTSUPP;
> >+ if (trace_state == TRACE_ON) {
> >+ NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor while tracing is on");
> >+ return -EOPNOTSUPP;
> >+ }
> >+
> >+ rc = net_dm_alert_mode_set(info);
> >+ if (rc)
> >+ return rc;
> >+
> >+ return 0;
> > }
^ permalink raw reply
* Re: [RFC PATCH net-next 11/12] drop_monitor: Allow truncation of dropped packets
From: Ido Schimmel @ 2019-07-24 16:49 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, nhorman, dsahern, roopa, nikolay, jakub.kicinski,
toke, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190724125537.GC2225@nanopsycho>
On Wed, Jul 24, 2019 at 02:55:37PM +0200, Jiri Pirko wrote:
> Mon, Jul 22, 2019 at 08:31:33PM CEST, idosch@idosch.org wrote:
> >+static int net_dm_trunc_len_set(struct genl_info *info)
>
> void.
Ack, will change.
>
>
> >+{
> >+ if (!info->attrs[NET_DM_ATTR_TRUNC_LEN])
> >+ return 0;
> >+
> >+ net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]);
> >+
> >+ return 0;
> >+}
^ permalink raw reply
* Re: [RFC PATCH net-next 00/12] drop_monitor: Capture dropped packets and metadata
From: Ido Schimmel @ 2019-07-24 16:48 UTC (permalink / raw)
To: Jiri Pirko
Cc: Toke Høiland-Jørgensen, netdev, davem, nhorman, dsahern,
roopa, nikolay, jakub.kicinski, andy, f.fainelli, andrew,
vivien.didelot, mlxsw, Ido Schimmel
In-Reply-To: <20190724125851.GD2225@nanopsycho>
On Wed, Jul 24, 2019 at 02:58:51PM +0200, Jiri Pirko wrote:
> Shouldn't the queue len be configurable?
Yes, it will be configurable in v1. I will use a sane limit as default.
^ permalink raw reply
* Re: [PATCH net-next 03/10] sfc: Use dev_get_drvdata where possible
From: Edward Cree @ 2019-07-24 16:44 UTC (permalink / raw)
To: Chuhong Yuan
Cc: Solarflare linux maintainers, Martin Habets, David S . Miller,
netdev, linux-kernel
In-Reply-To: <20190724112658.13241-1-hslester96@gmail.com>
On 24/07/2019 12:26, Chuhong Yuan wrote:
> Instead of using to_pci_dev + pci_get_drvdata,
> use dev_get_drvdata to make code simpler.
>
> Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
Acked-by: Edward Cree <ecree@solarflare.com>
> ---
> drivers/net/ethernet/sfc/ef10.c | 4 ++--
> drivers/net/ethernet/sfc/efx.c | 10 +++++-----
> 2 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
> index 16d6952c312a..0ec13f520e90 100644
> --- a/drivers/net/ethernet/sfc/ef10.c
> +++ b/drivers/net/ethernet/sfc/ef10.c
> @@ -508,7 +508,7 @@ static ssize_t efx_ef10_show_link_control_flag(struct device *dev,
> struct device_attribute *attr,
> char *buf)
> {
> - struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
> + struct efx_nic *efx = dev_get_drvdata(dev);
>
> return sprintf(buf, "%d\n",
> ((efx->mcdi->fn_flags) &
> @@ -520,7 +520,7 @@ static ssize_t efx_ef10_show_primary_flag(struct device *dev,
> struct device_attribute *attr,
> char *buf)
> {
> - struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
> + struct efx_nic *efx = dev_get_drvdata(dev);
>
> return sprintf(buf, "%d\n",
> ((efx->mcdi->fn_flags) &
> diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
> index ab58b837df47..2fef7402233e 100644
> --- a/drivers/net/ethernet/sfc/efx.c
> +++ b/drivers/net/ethernet/sfc/efx.c
> @@ -2517,7 +2517,7 @@ static struct notifier_block efx_netdev_notifier = {
> static ssize_t
> show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
> {
> - struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
> + struct efx_nic *efx = dev_get_drvdata(dev);
> return sprintf(buf, "%d\n", efx->phy_type);
> }
> static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
> @@ -2526,7 +2526,7 @@ static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
> static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
> char *buf)
> {
> - struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
> + struct efx_nic *efx = dev_get_drvdata(dev);
> struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
>
> return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
> @@ -2534,7 +2534,7 @@ static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
> static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
> const char *buf, size_t count)
> {
> - struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
> + struct efx_nic *efx = dev_get_drvdata(dev);
> struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
> bool enable = count > 0 && *buf != '0';
>
> @@ -3654,7 +3654,7 @@ static int efx_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
>
> static int efx_pm_freeze(struct device *dev)
> {
> - struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
> + struct efx_nic *efx = dev_get_drvdata(dev);
>
> rtnl_lock();
>
> @@ -3675,7 +3675,7 @@ static int efx_pm_freeze(struct device *dev)
> static int efx_pm_thaw(struct device *dev)
> {
> int rc;
> - struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
> + struct efx_nic *efx = dev_get_drvdata(dev);
>
> rtnl_lock();
>
^ permalink raw reply
* Re: [PATCH -next v2] net/ixgbevf: fix a compilation error of skb_frag_t
From: Jeff Kirsher @ 2019-07-24 16:39 UTC (permalink / raw)
To: Qian Cai, davem; +Cc: netdev, linux-kernel
In-Reply-To: <1563985079-12888-1-git-send-email-cai@lca.pw>
[-- Attachment #1: Type: text/plain, Size: 1727 bytes --]
On Wed, 2019-07-24 at 12:17 -0400, Qian Cai wrote:
> The linux-next commit "net: Rename skb_frag_t size to bv_len" [1]
> introduced a compilation error on powerpc as it forgot to deal with
> the
> renaming from "size" to "bv_len" for ixgbevf.
>
> [1]
> https://lore.kernel.org/netdev/20190723030831.11879-1-willy@infradead.org/T/#md052f1c7de965ccd1bdcb6f92e1990a52298eac5
>
> In file included from ./include/linux/cache.h:5,
> from ./include/linux/printk.h:9,
> from ./include/linux/kernel.h:15,
> from ./include/linux/list.h:9,
> from ./include/linux/module.h:9,
> from
> drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c:12:
> drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c: In function
> 'ixgbevf_xmit_frame_ring':
> drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c:4138:51: error:
> 'skb_frag_t' {aka 'struct bio_vec'} has no member named 'size'
> count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
> ^
> ./include/uapi/linux/kernel.h:13:40: note: in definition of macro
> '__KERNEL_DIV_ROUND_UP'
> #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
> ^
> drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c:4138:12: note: in
> expansion of macro 'TXD_USE_COUNT'
> count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
>
> Signed-off-by: Qian Cai <cai@lca.pw>
> ---
>
> v2: Use the fine accessor per Matthew.
>
> drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 7 +++++--
> 1 file changed, 5 insertions(+), 2 deletions(-)
Dave I will pick this up and add it to my queue.
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* Re: [PATCH net-next v1 1/4] enetc: Clean up local mdio bus allocation
From: Andrew Lunn @ 2019-07-24 16:39 UTC (permalink / raw)
To: Claudiu Manoil
Cc: David S . Miller, Rob Herring, Leo Li, Alexandru Marginean,
netdev@vger.kernel.org, devicetree@vger.kernel.org,
linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org
In-Reply-To: <VI1PR04MB4880CD977A5D58DA0A7EE56696C60@VI1PR04MB4880.eurprd04.prod.outlook.com>
> >All the horrible casts go away, the driver is structured like every
> >other driver, sparse is probably happy, etc.
> >
>
> This looks more like a matter cosmetic preferences. I mean, I didn't
> notice anything "horrible" in the code so far.
#define bus_to_enetc_regs(bus) (struct enetc_mdio_regs __iomem *)((bus)->priv)
You should not need a cast here, bus->priv is a void *. But bus->priv
is being abused to hold a __iomem pointer.
enetc_wr_reg(®s->mdio_cfg, mdio_cfg);
This is also rather odd, passing the address of something to an IO
operator? I also don't know the C standard well enough to know if it
is guaranteed that:
struct enetc_mdio_regs {
u32 mdio_cfg; /* MDIO configuration and status */
u32 mdio_ctl; /* MDIO control */
u32 mdio_data; /* MDIO data */
u32 mdio_addr; /* MDIO address */
};
actually works. On a 64bit system is the compiler allowed to put in
padding to keep the u32 64 bit aligned?
> I actually find it more
> ugly to define a new structure with only one element inside, like:
> struct enetc_mdio_priv {
> struct enetc_hw *hw;
> }
One advantage of this is that struct enetc_hw correctly has all the
__iomem attributes. All the casts to __iomem go away, and sparse is
happy.
> Anyway, if others already did this in the kernel, what can I do?
Clean it up. Make the code more readable and easy to maintain.
Andrew
^ permalink raw reply
* Re: Reminder: 99 open syzbot bugs in net subsystem
From: Eric Biggers @ 2019-07-24 16:30 UTC (permalink / raw)
To: Eric Dumazet
Cc: Dmitry Vyukov, netdev, David S. Miller, Florian Westphal,
Ilya Maximets, Eric Dumazet, David Ahern, linux-kernel,
syzkaller-bugs
In-Reply-To: <63f12327-dd4b-5210-4de2-705af6bc4ba4@gmail.com>
On Wed, Jul 24, 2019 at 08:39:05AM +0200, Eric Dumazet wrote:
>
>
> On 7/24/19 3:38 AM, Eric Biggers wrote:
> > [This email was generated by a script. Let me know if you have any suggestions
> > to make it better, or if you want it re-generated with the latest status.]
> >
> > Of the currently open syzbot reports against the upstream kernel, I've manually
> > marked 99 of them as possibly being bugs in the net subsystem. This category
> > only includes the networking bugs that I couldn't assign to a more specific
> > component (bpf, xfrm, bluetooth, tls, tipc, sctp, wireless, etc.). I've listed
> > these reports below, sorted by an algorithm that tries to list first the reports
> > most likely to be still valid, important, and actionable.
> >
> > Of these 99 bugs, 17 were seen in mainline in the last week.
> >
> > Of these 99 bugs, 4 were bisected to commits from the following people:
> >
> > Florian Westphal <fw@strlen.de>
> > Ilya Maximets <i.maximets@samsung.com>
> > Eric Dumazet <edumazet@google.com>
> > David Ahern <dsahern@gmail.com>
> >
> > If you believe a bug is no longer valid, please close the syzbot report by
> > sending a '#syz fix', '#syz dup', or '#syz invalid' command in reply to the
> > original thread, as explained at https://goo.gl/tpsmEJ#status
> >
> > If you believe I misattributed a bug to the net subsystem, please let me know,
> > and if possible forward the report to the correct people or mailing list.
> >
>
> Some of the bugs have been fixed already, before syzbot found them.
>
> Why force human to be gentle to bots and actually replying to them ?
>
> I usually simply wait that syzbot is finding the bug does not repro anymore,
> but now if you send these emails, we will have even more pressure on us.
>
First, based on experience, I'd guess about 30-45 of these are still valid. 17
were seen in mainline in the last week, but some others are valid too. The ones
most likely to still be valid are at the beginning of the list. So let's try
not use the presence of outdated bugs as an excuse not to fix current bugs.
Second, all these bug reports are still open, regardless of whether reminders
are sent or not. I think you're really suggesting that possibly outdated bug
reports should be automatically invalidated by syzbot.
syzbot already does that for bugs with no reproducer. However, that still
leaves a lot of outdated bugs with reproducers.
Since the kernel community is basically in continuous bug bankruptcy and lots of
syzbot reports are being ignored anyway, I'm in favor of making the invalidation
criteria more aggressive, so we can best focus people's efforts. I understand
that Dmitry has been against this though, since a significant fraction of bugs
that syzbot stopped hitting for some reason actually turn out to be still valid.
But we probably have no choice. So I suggest we agree on new criteria for
invalidating bugs. I'd suggest assigning a timeout to each bug, based on
attributes like "seen in mainline?", "reproducer type", "bisected?", "does it
look like a 'bad' crash (e.g. use-after-free)"; similar to the algorithm I'm
using to sort the bugs when sorting these reminders. I.e., bugs most likely to
still be valid, important, and actionable get longest timeouts.
Then if no crash or activity was seen in the timeout, the bug is closed.
Any thoughts from anyone?
- Eric
^ permalink raw reply
* Re: kernel panic: stack is corrupted in pointer
From: John Fastabend @ 2019-07-24 16:22 UTC (permalink / raw)
To: Dmitry Vyukov, John Fastabend
Cc: syzbot, bpf, David Airlie, alexander.deucher, amd-gfx,
Alexei Starovoitov, christian.koenig, Daniel Borkmann,
david1.zhou, DRI, leo.liu, LKML, netdev, syzkaller-bugs,
Marco Elver
In-Reply-To: <CACT4Y+ZbPmRB9T9ZzhE79VnKKD3+ieHeLpaDGRkcQ72nADKH_g@mail.gmail.com>
Dmitry Vyukov wrote:
> On Tue, Jul 23, 2019 at 7:26 PM John Fastabend <john.fastabend@gmail.com> wrote:
> >
> > Dmitry Vyukov wrote:
> > > On Wed, Jul 17, 2019 at 10:58 AM syzbot
> > > <syzbot+79f5f028005a77ecb6bb@syzkaller.appspotmail.com> wrote:
> > > >
> > > > Hello,
> > > >
> > > > syzbot found the following crash on:
> > > >
> > > > HEAD commit: 1438cde7 Add linux-next specific files for 20190716
> > > > git tree: linux-next
> > > > console output: https://syzkaller.appspot.com/x/log.txt?x=13988058600000
> > > > kernel config: https://syzkaller.appspot.com/x/.config?x=3430a151e1452331
> > > > dashboard link: https://syzkaller.appspot.com/bug?extid=79f5f028005a77ecb6bb
> > > > compiler: gcc (GCC) 9.0.0 20181231 (experimental)
> > > > syz repro: https://syzkaller.appspot.com/x/repro.syz?x=111fc8afa00000
> > >
> > > From the repro it looks like the same bpf stack overflow bug. +John
> > > We need to dup them onto some canonical report for this bug, or this
> > > becomes unmanageable.
> >
> > Fixes in bpf tree should fix this. Hopefully, we will squash this once fixes
> > percolate up.
> >
> > #syz test: git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
>
> Cool! What is the fix?
It took a series of patches here,
https://www.spinics.net/lists/netdev/msg586986.html
The fix commits from bpf tree are,
(git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git)
318892ac068397f40ff81d9155898da01493b1d2
ac78fc148d8249dbf382c2127456dd08ec5b161c
f87e62d45e51b12d48d2cb46b5cde8f83b866bc4
313ab004805cf52a42673b15852b3842474ccd87
32857cf57f920cdc03b5095f08febec94cf9c36b
45a4521dcbd92e71c9e53031b40e34211d3b4feb
2bb90e5cc90e1d09f631aeab041a9cf913a5bbe5
0e858739c2d2eedeeac1d35bfa0ec3cc2a7190d8
95fa145479fbc0a0c1fd3274ceb42ec03c042a4a
The last commit fixes this paticular syzbot issue,
commit 95fa145479fbc0a0c1fd3274ceb42ec03c042a4a
Author: John Fastabend <john.fastabend@gmail.com>
Date: Fri Jul 19 10:29:22 2019 -0700
bpf: sockmap/tls, close can race with map free
The other commits address some other issues found while testing.
> We don't need to wait for the fix to percolate up (and then down
> too!). syzbot gracefully handles when a patch is not yet present
> everywhere (it happens all the time).
Great. By the way the above should fix many of the outstanding
reports against bpf sockmap and tls side. I'll have to walk through
each one individually to double check though. I guess we can mark
them as dup reports and syzbot should sort it out?
>
> Btw, this was due to a stack overflow, right? Or something else?
Right, stack overflow due to race in updating sock ops where build a
circular call chain.
> We are trying to make KASAN configuration detect stack overflows too,
> so that it does not cause havoc next time. But it turns out to be
> non-trivial and our current attempt seems to fail:
> https://groups.google.com/forum/#!topic/kasan-dev/IhYv7QYhLfY
>
>
^ permalink raw reply
* [PATCH -next v2] net/ixgbevf: fix a compilation error of skb_frag_t
From: Qian Cai @ 2019-07-24 16:17 UTC (permalink / raw)
To: davem; +Cc: jeffrey.t.kirsher, netdev, linux-kernel, Qian Cai
The linux-next commit "net: Rename skb_frag_t size to bv_len" [1]
introduced a compilation error on powerpc as it forgot to deal with the
renaming from "size" to "bv_len" for ixgbevf.
[1] https://lore.kernel.org/netdev/20190723030831.11879-1-willy@infradead.org/T/#md052f1c7de965ccd1bdcb6f92e1990a52298eac5
In file included from ./include/linux/cache.h:5,
from ./include/linux/printk.h:9,
from ./include/linux/kernel.h:15,
from ./include/linux/list.h:9,
from ./include/linux/module.h:9,
from
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c:12:
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c: In function
'ixgbevf_xmit_frame_ring':
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c:4138:51: error:
'skb_frag_t' {aka 'struct bio_vec'} has no member named 'size'
count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
^
./include/uapi/linux/kernel.h:13:40: note: in definition of macro
'__KERNEL_DIV_ROUND_UP'
#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
^
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c:4138:12: note: in
expansion of macro 'TXD_USE_COUNT'
count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
Signed-off-by: Qian Cai <cai@lca.pw>
---
v2: Use the fine accessor per Matthew.
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index bdfccaf38edd..8c011d4ce7a9 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -4134,8 +4134,11 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
* otherwise try next time
*/
#if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD
- for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
- count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
+ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
+
+ count += TXD_USE_COUNT(skb_frag_size(frag));
+ }
#else
count += skb_shinfo(skb)->nr_frags;
#endif
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH] net: phy: mscc: initialize stats array
From: Andrew Lunn @ 2019-07-24 16:16 UTC (permalink / raw)
To: Andreas Schwab
Cc: netdev, Florian Fainelli, Heiner Kallweit, David S. Miller,
linux-kernel
In-Reply-To: <mvmh87bih1y.fsf@suse.de>
On Wed, Jul 24, 2019 at 05:32:57PM +0200, Andreas Schwab wrote:
> The memory allocated for the stats array may contain arbitrary data.
>
> Signed-off-by: Andreas Schwab <schwab@suse.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Fixes: e4f9ba642f0b ("net: phy: mscc: add support for VSC8514 PHY.")
Fixes: 00d70d8e0e78 ("net: phy: mscc: add support for VSC8574 PHY")
Fixes: a5afc1678044 ("net: phy: mscc: add support for VSC8584 PHY")
Fixes: f76178dc5218 ("net: phy: mscc: add ethtool statistics counters")
Andrew
^ permalink raw reply
* Problem using skb_cow_data()
From: David Howells @ 2019-07-24 16:11 UTC (permalink / raw)
To: netdev; +Cc: dhowells, linux-afs
Hi,
I have a problem using skb_cow_data() in rxkad_verify_packet{,_1,_2}() and was
wondering if anyone can suggest a better way.
The problem is that the rxrpc packet receive routine, rxrpc_input_data(),
receives an skb from the udp socket, makes it its own and then, if it's a data
packet, stores one or more[*] pointers to the skb, each with its own usage
ref, in the rx ring.
[*] The Rx protocol allows combinable packets (jumbo packet) that need to be
split and each segment treated as a separate data packet.
rxrpc_input_data() then drops its own ref to the packet. Possibly
simultaneously, the receiving process wakes up and starts processing the
packets earmarked for it. This involves decrypting the packets if necessary
(which is done in place in the skb). To do this, the rxkad_verify_packet*()
functions call skb_cow_data() on the skb. This, however:
(a) Can race with the input which may not have released its ref yet.
(b) Could be a jumbo packet with multiple refs on it in the rx ring.
This can result in an assertion failure in pskb_expand_head():
BUG_ON(skb_shared(skb));
In this particular case it shouldn't be an issue since the input path merely
has to release a ref and the subsequent attachment points in the ring buffer
if it's a jumbo packet will not be looked at until the current attachment
point is finished with (data delivery has to proceed delivery).
So, some questions:
(1) Do I actually have to call skb_cow_data()?
(2) Can the assertion be relaxed?
(3) Is it feasible to do decryption directly from an skb to the target
iov_iter, thereby avoiding the need to modify the skb content and saving
a copy and potentially a bunch of allocations? This would seem to
require calling something like skb_copy_and_hash_datagram_iter(), but
with a bespoke cb function to copy from the skb to the iov_iter.
This gets interesting if the target iov_iter is smaller than the content
in the skb as decryption would have to be suspended until a new iov_iter
comes along.
David
^ permalink raw reply
* RE: [PATCH net-next v1 1/4] enetc: Clean up local mdio bus allocation
From: Claudiu Manoil @ 2019-07-24 16:03 UTC (permalink / raw)
To: Andrew Lunn
Cc: David S . Miller, Rob Herring, Leo Li, Alexandru Marginean,
netdev@vger.kernel.org, devicetree@vger.kernel.org,
linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org
In-Reply-To: <20190724151803.GR25635@lunn.ch>
>-----Original Message-----
>From: Andrew Lunn <andrew@lunn.ch>
>Sent: Wednesday, July 24, 2019 6:18 PM
>To: Claudiu Manoil <claudiu.manoil@nxp.com>
>Cc: David S . Miller <davem@davemloft.net>; Rob Herring
><robh+dt@kernel.org>; Leo Li <leoyang.li@nxp.com>; Alexandru Marginean
><alexandru.marginean@nxp.com>; netdev@vger.kernel.org;
>devicetree@vger.kernel.org; linux-arm-kernel@lists.infradead.org; linux-
>kernel@vger.kernel.org
>Subject: Re: [PATCH net-next v1 1/4] enetc: Clean up local mdio bus allocation
>
>On Wed, Jul 24, 2019 at 05:41:38PM +0300, Claudiu Manoil wrote:
>> Though it works, this is not how it should have been.
>> What's needed is a pointer to the mdio registers.
>> Store it properly inside bus->priv allocated space.
>> Use devm_* variant to further clean up the init error /
>> remove paths.
>>
>> Fixes following sparse warning:
>> warning: incorrect type in assignment (different address spaces)
>> expected void *priv
>> got struct enetc_mdio_regs [noderef] <asn:2>*[assigned] regs
>>
>> Fixes: ebfcb23d62ab ("enetc: Add ENETC PF level external MDIO support")
>>
>> Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
>> ---
>> v1 - added this patch
>>
>> .../net/ethernet/freescale/enetc/enetc_mdio.c | 31 +++++++------------
>> 1 file changed, 12 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
>b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
>> index 77b9cd10ba2b..1e3cd21c13ee 100644
>> --- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
>> +++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
>> @@ -15,7 +15,8 @@ struct enetc_mdio_regs {
>> u32 mdio_addr; /* MDIO address */
>> };
>>
>> -#define bus_to_enetc_regs(bus) (struct enetc_mdio_regs __iomem
>*)((bus)->priv)
>> +#define bus_to_enetc_regs(bus) (*(struct enetc_mdio_regs __iomem
>**) \
>> + ((bus)->priv))
>>
>> #define ENETC_MDIO_REG_OFFSET 0x1c00
>> #define ENETC_MDC_DIV 258
>> @@ -146,12 +147,12 @@ static int enetc_mdio_read(struct mii_bus *bus, int
>phy_id, int regnum)
>> int enetc_mdio_probe(struct enetc_pf *pf)
>> {
>> struct device *dev = &pf->si->pdev->dev;
>> - struct enetc_mdio_regs __iomem *regs;
>> + struct enetc_mdio_regs __iomem **regsp;
>> struct device_node *np;
>> struct mii_bus *bus;
>> - int ret;
>> + int err;
>>
>> - bus = mdiobus_alloc_size(sizeof(regs));
>> + bus = devm_mdiobus_alloc_size(dev, sizeof(*regsp));
>> if (!bus)
>> return -ENOMEM;
>>
>> @@ -159,41 +160,33 @@ int enetc_mdio_probe(struct enetc_pf *pf)
>> bus->read = enetc_mdio_read;
>> bus->write = enetc_mdio_write;
>> bus->parent = dev;
>> + regsp = bus->priv;
>> snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
>>
>> /* store the enetc mdio base address for this bus */
>> - regs = pf->si->hw.port + ENETC_MDIO_REG_OFFSET;
>> - bus->priv = regs;
>> + *regsp = pf->si->hw.port + ENETC_MDIO_REG_OFFSET;
>
>This is all very odd and different to every other driver.
>
>If i get the code write, there are 4 registers, each u32 in size,
>starting at pf->si->hw.port + ENETC_MDIO_REG_OFFSET?
>
>There are macros like enetc_port_wr() and enetc_global_wr(). It think
>it would be much cleaner to add a macro enet_mdio_wr() which takes
>hw, off, val.
>
>#define enet_mdio_wr(hw, off, val) enet_port_wr(hw, off +
>ENETC_MDIO_REG_OFFSET, val)
>
>struct enetc_mdio_priv {
> struct enetc_hw *hw;
>}
>
> struct enetc_mdio_priv *mdio_priv;
>
> bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
>
> mdio_priv = bus->priv;
> mdio_priv->hw = pf->si->hw;
>
>
>static int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum,
> u16 value)
>{
> struct enetc_mdio_priv *mdio_priv = bus->priv;
>...
> enet_mdio_wr(priv->hw, ENETC_MDIO_CFG, mdio_cfg);
>}
>
>All the horrible casts go away, the driver is structured like every
>other driver, sparse is probably happy, etc.
>
This looks more like a matter cosmetic preferences. I mean, I didn't
notice anything "horrible" in the code so far. I actually find it more
ugly to define a new structure with only one element inside, like:
struct enetc_mdio_priv {
struct enetc_hw *hw;
}
What is this technique called? Looks like a second type definition for
another type.
Anyway, if others already did this in the kernel, what can I do?
^ permalink raw reply
* Re: [PATCH] ip6_gre: reload ipv6h in prepare_ip6gre_xmit_ipv6
From: William Tu @ 2019-07-24 15:44 UTC (permalink / raw)
To: Haishuang Yan; +Cc: David S. Miller, Alexey Kuznetsov, netdev, linux-kernel
In-Reply-To: <1563969642-11843-1-git-send-email-yanhaishuang@cmss.chinamobile.com>
On Wed, Jul 24, 2019 at 08:00:42PM +0800, Haishuang Yan wrote:
> Since ip6_tnl_parse_tlv_enc_lim() can call pskb_may_pull()
> which may change skb->data, so we need to re-load ipv6h at
> the right place.
>
> Fixes: 898b29798e36 ("ip6_gre: Refactor ip6gre xmit codes")
> Cc: William Tu <u9012063@gmail.com>
> Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
LGTM, thanks for the fix
Acked-by: William Tu <u9012063@gmail.com>
> ---
> net/ipv6/ip6_gre.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
> index c2049c7..dd2d0b96 100644
> --- a/net/ipv6/ip6_gre.c
> +++ b/net/ipv6/ip6_gre.c
> @@ -660,12 +660,13 @@ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
> struct flowi6 *fl6, __u8 *dsfield,
> int *encap_limit)
> {
> - struct ipv6hdr *ipv6h = ipv6_hdr(skb);
> + struct ipv6hdr *ipv6h;
> struct ip6_tnl *t = netdev_priv(dev);
> __u16 offset;
>
> offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
> /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
> + ipv6h = ipv6_hdr(skb);
>
> if (offset > 0) {
> struct ipv6_tlv_tnl_enc_lim *tel;
> --
> 1.8.3.1
>
>
>
^ permalink raw reply
* Re: [PATCH net-next] selftests: mlxsw: Fix typo in qos_mc_aware.sh
From: Ido Schimmel @ 2019-07-24 15:39 UTC (permalink / raw)
To: Masanari Iida
Cc: shuah@kernel.org, linux-kernel@vger.kernel.org, Jiri Pirko,
linux-kselftest@vger.kernel.org, rdunlap@infradead.org,
netdev@vger.kernel.org
In-Reply-To: <20190724152951.4618-1-standby24x7@gmail.com>
On Thu, Jul 25, 2019 at 12:29:51AM +0900, Masanari Iida wrote:
> This patch fix some spelling typo in qos_mc_aware.sh
>
> Signed-off-by: Masanari Iida <standby24x7@gmail.com>
> Acked-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
^ permalink raw reply
* Re: [PATCH -next] iwlwifi: dbg: work around clang bug by marking debug strings static
From: Sedat Dilek @ 2019-07-24 15:38 UTC (permalink / raw)
To: Luca Coelho
Cc: Nick Desaulniers, Joe Perches, Kalle Valo, Arnd Bergmann,
Nathan Chancellor, Johannes Berg, Emmanuel Grumbach,
Intel Linux Wireless, David S. Miller, Shahar S Matityahu,
Sara Sharon, linux-wireless, netdev, LKML, clang-built-linux
In-Reply-To: <da053a97d771eff0ad8db37e644108ed2fad25a3.camel@coelho.fi>
On Tue, Jul 16, 2019 at 11:15 PM Luca Coelho <luca@coelho.fi> wrote:
>
> On Tue, 2019-07-16 at 10:28 -0700, Nick Desaulniers wrote:
> > On Thu, Jul 11, 2019 at 7:15 PM Joe Perches <joe@perches.com> wrote:
> > > On Thu, 2019-07-11 at 17:17 -0700, Nick Desaulniers wrote:
> > > > Commit r353569 in prerelease Clang-9 is producing a linkage failure:
> > > >
> > > > ld: drivers/net/wireless/intel/iwlwifi/fw/dbg.o:
> > > > in function `_iwl_fw_dbg_apply_point':
> > > > dbg.c:(.text+0x827a): undefined reference to `__compiletime_assert_2387'
> > > >
> > > > when the following configs are enabled:
> > > > - CONFIG_IWLWIFI
> > > > - CONFIG_IWLMVM
> > > > - CONFIG_KASAN
> > > >
> > > > Work around the issue for now by marking the debug strings as `static`,
> > > > which they probably should be any ways.
> > > >
> > > > Link: https://bugs.llvm.org/show_bug.cgi?id=42580
> > > > Link: https://github.com/ClangBuiltLinux/linux/issues/580
> > > > Reported-by: Arnd Bergmann <arnd@arndb.de>
> > > > Reported-by: Nathan Chancellor <natechancellor@gmail.com>
> > > > Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
> > > > ---
> > > > drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 4 ++--
> > > > 1 file changed, 2 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
> > > > index e411ac98290d..f8c90ea4e9b4 100644
> > > > --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
> > > > +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
> > > > @@ -2438,7 +2438,7 @@ static void iwl_fw_dbg_info_apply(struct iwl_fw_runtime *fwrt,
> > > > {
> > > > u32 img_name_len = le32_to_cpu(dbg_info->img_name_len);
> > > > u32 dbg_cfg_name_len = le32_to_cpu(dbg_info->dbg_cfg_name_len);
> > > > - const char err_str[] =
> > > > + static const char err_str[] =
> > > > "WRT: ext=%d. Invalid %s name length %d, expected %d\n";
> > >
> > > Better still would be to use the format string directly
> > > in both locations instead of trying to deduplicate it
> > > via storing it into a separate pointer.
> > >
> > > Let the compiler/linker consolidate the format.
> > > It's smaller object code, allows format/argument verification,
> > > and is simpler for humans to understand.
> >
> > Whichever Kalle prefers, I just want my CI green again.
>
> We already changed this in a later internal patch, which will reach the
> mainline (-next) soon. So let's skip this for now.
>
Do you have a link to your internal Git or patchwork for this?
I am interested in testing it.
- Sedat -
^ permalink raw reply
* Re: [PATCH v12 1/5] can: m_can: Create a m_can platform framework
From: Dan Murphy @ 2019-07-24 15:36 UTC (permalink / raw)
To: Greg KH; +Cc: wg, mkl, davem, linux-can, netdev, linux-kernel
In-Reply-To: <20190724064754.GC22447@kroah.com>
Hello
On 7/24/19 1:47 AM, Greg KH wrote:
> On Tue, Jul 23, 2019 at 10:14:14AM -0500, Dan Murphy wrote:
>> Hello
>>
>> On 7/10/19 7:08 AM, Dan Murphy wrote:
>>> Hello
>>>
>>> On 6/17/19 10:09 AM, Dan Murphy wrote:
>>>> Marc
>>>>
>>>> On 6/10/19 11:35 AM, Dan Murphy wrote:
>>>>> Bump
>>>>>
>>>>> On 6/6/19 8:16 AM, Dan Murphy wrote:
>>>>>> Marc
>>>>>>
>>>>>> Bump
>>>>>>
>>>>>> On 5/31/19 6:51 AM, Dan Murphy wrote:
>>>>>>> Marc
>>>>>>>
>>>>>>> On 5/15/19 3:54 PM, Dan Murphy wrote:
>>>>>>>> Marc
>>>>>>>>
>>>>>>>> On 5/9/19 11:11 AM, Dan Murphy wrote:
>>>>>>>>> Create a m_can platform framework that peripheral
>>>>>>>>> devices can register to and use common code and register sets.
>>>>>>>>> The peripheral devices may provide read/write and configuration
>>>>>>>>> support of the IP.
>>>>>>>>>
>>>>>>>>> Acked-by: Wolfgang Grandegger <wg@grandegger.com>
>>>>>>>>> Signed-off-by: Dan Murphy <dmurphy@ti.com>
>>>>>>>>> ---
>>>>>>>>>
>>>>>>>>> v12 - Update the m_can_read/write functions to
>>>>>>>>> create a backtrace if the callback
>>>>>>>>> pointer is NULL. - https://lore.kernel.org/patchwork/patch/1052302/
>>>>>>>>>
>>>>>>>> Is this able to be merged now?
>>>>>>> ping
>>>> Wondering if there is anything else we need to do?
>>>>
>>>> The part has officially shipped and we had hoped to have driver
>>>> support in Linux as part of the announcement.
>>>>
>>> Is this being sent in a PR for 5.3?
>>>
>>> Dan
>>>
>> Adding Greg to this thread as I have no idea what is going on with this.
> Why me? What am I supposed to do here? I see no patches at all to do
> anything with :(
I am not sure who to email. The maintainer seems to be on hiatus or
super busy with other work.
So I added you to see if you know how to handle this. Wolfgang Acked it
but he said Marc needs to pull
it in. We have quite a few users of this patchset. I have been hosting
the patchset in a different tree.
These users keep pinging us for upstream status and all we can do is
point them to the
LKML to show we are continuing to pursue inclusion.
https://lore.kernel.org/patchwork/project/lkml/list/?series=393454
Thanks
Dan
>
> thanks,
>
> greg "not a miracle worker" k-h
^ permalink raw reply
* [PATCH] net: phy: mscc: initialize stats array
From: Andreas Schwab @ 2019-07-24 15:32 UTC (permalink / raw)
To: netdev
Cc: Andrew Lunn, Florian Fainelli, Heiner Kallweit, David S. Miller,
linux-kernel
The memory allocated for the stats array may contain arbitrary data.
Signed-off-by: Andreas Schwab <schwab@suse.de>
---
drivers/net/phy/mscc.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c
index 28676af97b42..645d354ffb48 100644
--- a/drivers/net/phy/mscc.c
+++ b/drivers/net/phy/mscc.c
@@ -2226,8 +2226,8 @@ static int vsc8514_probe(struct phy_device *phydev)
vsc8531->supp_led_modes = VSC85XX_SUPP_LED_MODES;
vsc8531->hw_stats = vsc85xx_hw_stats;
vsc8531->nstats = ARRAY_SIZE(vsc85xx_hw_stats);
- vsc8531->stats = devm_kmalloc_array(&phydev->mdio.dev, vsc8531->nstats,
- sizeof(u64), GFP_KERNEL);
+ vsc8531->stats = devm_kcalloc(&phydev->mdio.dev, vsc8531->nstats,
+ sizeof(u64), GFP_KERNEL);
if (!vsc8531->stats)
return -ENOMEM;
@@ -2251,8 +2251,8 @@ static int vsc8574_probe(struct phy_device *phydev)
vsc8531->supp_led_modes = VSC8584_SUPP_LED_MODES;
vsc8531->hw_stats = vsc8584_hw_stats;
vsc8531->nstats = ARRAY_SIZE(vsc8584_hw_stats);
- vsc8531->stats = devm_kmalloc_array(&phydev->mdio.dev, vsc8531->nstats,
- sizeof(u64), GFP_KERNEL);
+ vsc8531->stats = devm_kcalloc(&phydev->mdio.dev, vsc8531->nstats,
+ sizeof(u64), GFP_KERNEL);
if (!vsc8531->stats)
return -ENOMEM;
@@ -2281,8 +2281,8 @@ static int vsc8584_probe(struct phy_device *phydev)
vsc8531->supp_led_modes = VSC8584_SUPP_LED_MODES;
vsc8531->hw_stats = vsc8584_hw_stats;
vsc8531->nstats = ARRAY_SIZE(vsc8584_hw_stats);
- vsc8531->stats = devm_kmalloc_array(&phydev->mdio.dev, vsc8531->nstats,
- sizeof(u64), GFP_KERNEL);
+ vsc8531->stats = devm_kcalloc(&phydev->mdio.dev, vsc8531->nstats,
+ sizeof(u64), GFP_KERNEL);
if (!vsc8531->stats)
return -ENOMEM;
@@ -2311,8 +2311,8 @@ static int vsc85xx_probe(struct phy_device *phydev)
vsc8531->supp_led_modes = VSC85XX_SUPP_LED_MODES;
vsc8531->hw_stats = vsc85xx_hw_stats;
vsc8531->nstats = ARRAY_SIZE(vsc85xx_hw_stats);
- vsc8531->stats = devm_kmalloc_array(&phydev->mdio.dev, vsc8531->nstats,
- sizeof(u64), GFP_KERNEL);
+ vsc8531->stats = devm_kcalloc(&phydev->mdio.dev, vsc8531->nstats,
+ sizeof(u64), GFP_KERNEL);
if (!vsc8531->stats)
return -ENOMEM;
--
2.22.0
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."
^ permalink raw reply related
* [PATCH net-next] selftests: mlxsw: Fix typo in qos_mc_aware.sh
From: Masanari Iida @ 2019-07-24 15:29 UTC (permalink / raw)
To: shuah, linux-kernel, jiri, idosch, linux-kselftest, rdunlap,
netdev
Cc: Masanari Iida
This patch fix some spelling typo in qos_mc_aware.sh
Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
---
tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index 71231ad2dbfb..47315fe48d5a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -262,7 +262,7 @@ test_mc_aware()
stop_traffic
- log_test "UC performace under MC overload"
+ log_test "UC performance under MC overload"
echo "UC-only throughput $(humanize $ucth1)"
echo "UC+MC throughput $(humanize $ucth2)"
@@ -316,7 +316,7 @@ test_uc_aware()
stop_traffic
- log_test "MC performace under UC overload"
+ log_test "MC performance under UC overload"
echo " ingress UC throughput $(humanize ${uc_ir})"
echo " egress UC throughput $(humanize ${uc_er})"
echo " sent $attempts BC ARPs, got $passes responses"
--
2.22.0.545.g9c9b961d7eb1
^ permalink raw reply related
* Re: [PATCH net-next v1 1/4] enetc: Clean up local mdio bus allocation
From: Andrew Lunn @ 2019-07-24 15:18 UTC (permalink / raw)
To: Claudiu Manoil
Cc: David S . Miller, Rob Herring, Li Yang, alexandru.marginean,
netdev, devicetree, linux-arm-kernel, linux-kernel
In-Reply-To: <1563979301-596-2-git-send-email-claudiu.manoil@nxp.com>
On Wed, Jul 24, 2019 at 05:41:38PM +0300, Claudiu Manoil wrote:
> Though it works, this is not how it should have been.
> What's needed is a pointer to the mdio registers.
> Store it properly inside bus->priv allocated space.
> Use devm_* variant to further clean up the init error /
> remove paths.
>
> Fixes following sparse warning:
> warning: incorrect type in assignment (different address spaces)
> expected void *priv
> got struct enetc_mdio_regs [noderef] <asn:2>*[assigned] regs
>
> Fixes: ebfcb23d62ab ("enetc: Add ENETC PF level external MDIO support")
>
> Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
> ---
> v1 - added this patch
>
> .../net/ethernet/freescale/enetc/enetc_mdio.c | 31 +++++++------------
> 1 file changed, 12 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
> index 77b9cd10ba2b..1e3cd21c13ee 100644
> --- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
> +++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
> @@ -15,7 +15,8 @@ struct enetc_mdio_regs {
> u32 mdio_addr; /* MDIO address */
> };
>
> -#define bus_to_enetc_regs(bus) (struct enetc_mdio_regs __iomem *)((bus)->priv)
> +#define bus_to_enetc_regs(bus) (*(struct enetc_mdio_regs __iomem **) \
> + ((bus)->priv))
>
> #define ENETC_MDIO_REG_OFFSET 0x1c00
> #define ENETC_MDC_DIV 258
> @@ -146,12 +147,12 @@ static int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
> int enetc_mdio_probe(struct enetc_pf *pf)
> {
> struct device *dev = &pf->si->pdev->dev;
> - struct enetc_mdio_regs __iomem *regs;
> + struct enetc_mdio_regs __iomem **regsp;
> struct device_node *np;
> struct mii_bus *bus;
> - int ret;
> + int err;
>
> - bus = mdiobus_alloc_size(sizeof(regs));
> + bus = devm_mdiobus_alloc_size(dev, sizeof(*regsp));
> if (!bus)
> return -ENOMEM;
>
> @@ -159,41 +160,33 @@ int enetc_mdio_probe(struct enetc_pf *pf)
> bus->read = enetc_mdio_read;
> bus->write = enetc_mdio_write;
> bus->parent = dev;
> + regsp = bus->priv;
> snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
>
> /* store the enetc mdio base address for this bus */
> - regs = pf->si->hw.port + ENETC_MDIO_REG_OFFSET;
> - bus->priv = regs;
> + *regsp = pf->si->hw.port + ENETC_MDIO_REG_OFFSET;
This is all very odd and different to every other driver.
If i get the code write, there are 4 registers, each u32 in size,
starting at pf->si->hw.port + ENETC_MDIO_REG_OFFSET?
There are macros like enetc_port_wr() and enetc_global_wr(). It think
it would be much cleaner to add a macro enet_mdio_wr() which takes
hw, off, val.
#define enet_mdio_wr(hw, off, val) enet_port_wr(hw, off + ENETC_MDIO_REG_OFFSET, val)
struct enetc_mdio_priv {
struct enetc_hw *hw;
}
struct enetc_mdio_priv *mdio_priv;
bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
mdio_priv = bus->priv;
mdio_priv->hw = pf->si->hw;
static int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum,
u16 value)
{
struct enetc_mdio_priv *mdio_priv = bus->priv;
...
enet_mdio_wr(priv->hw, ENETC_MDIO_CFG, mdio_cfg);
}
All the horrible casts go away, the driver is structured like every
other driver, sparse is probably happy, etc.
Andrew
^ permalink raw reply
* Re: [RFC PATCH net-next 00/12] drop_monitor: Capture dropped packets and metadata
From: Jiri Pirko @ 2019-07-24 15:15 UTC (permalink / raw)
To: Ido Schimmel
Cc: netdev, davem, nhorman, dsahern, roopa, nikolay, jakub.kicinski,
toke, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190722183134.14516-1-idosch@idosch.org>
Mon, Jul 22, 2019 at 08:31:22PM CEST, idosch@idosch.org wrote:
>From: Ido Schimmel <idosch@mellanox.com>
>
>So far drop monitor supported only one mode of operation in which a
>summary of recent packet drops is periodically sent to user space as a
>netlink event. The event only includes the drop location (program
>counter) and number of drops in the last interval.
>
>While this mode of operation allows one to understand if the system is
>dropping packets, it is not sufficient if a more detailed analysis is
>required. Both the packet itself and related metadata are missing.
>
>This patchset extends drop monitor with another mode of operation where
>the packet - potentially truncated - and metadata (e.g., drop location,
>timestamp, netdev) are sent to user space as a netlink event. Thanks to
>the extensible nature of netlink, more metadata can be added in the
>future.
>
>To avoid performing expensive operations in the context in which
>kfree_skb() is called, the dropped skbs are cloned and queued on per-CPU
>skb drop list. The list is then processed in process context (using a
>workqueue), where the netlink messages are allocated, prepared and
>finally sent to user space.
>
>As a follow-up, I plan to integrate drop monitor with devlink and allow
>the latter to call into drop monitor to report hardware drops. In the
>future, XDP drops can be added as well, thereby making drop monitor the
>go-to netlink channel for diagnosing all packet drops.
>
>Example usage with patched dropwatch [1] can be found here [2]. Example
>dissection of drop monitor netlink events with patched wireshark [3] can
>be found here [4]. I will submit both changes upstream after the kernel
>changes are accepted.
>
>Patches #1-#6 are just cleanups with no functional changes intended.
>
>Patches #7-#8 perform small refactoring before the actual changes are
>introduced in the last four patches.
In general, this looks very good to me. Thanks!
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox