Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH bpf-next v4 2/4] bpf: support cloning sk storage on accept()
From: Stanislav Fomichev @ 2019-08-14 17:37 UTC (permalink / raw)
  To: netdev, bpf
  Cc: davem, ast, daniel, Stanislav Fomichev, Martin KaFai Lau,
	Yonghong Song
In-Reply-To: <20190814173751.31806-1-sdf@google.com>

Add new helper bpf_sk_storage_clone which optionally clones sk storage
and call it from sk_clone_lock.

Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/net/bpf_sk_storage.h |  10 ++++
 include/uapi/linux/bpf.h     |   3 +
 net/core/bpf_sk_storage.c    | 104 ++++++++++++++++++++++++++++++++++-
 net/core/sock.c              |   9 ++-
 4 files changed, 120 insertions(+), 6 deletions(-)

diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index b9dcb02e756b..8e4f831d2e52 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -10,4 +10,14 @@ void bpf_sk_storage_free(struct sock *sk);
 extern const struct bpf_func_proto bpf_sk_storage_get_proto;
 extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
 
+#ifdef CONFIG_BPF_SYSCALL
+int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
+#else
+static inline int bpf_sk_storage_clone(const struct sock *sk,
+				       struct sock *newsk)
+{
+	return 0;
+}
+#endif
+
 #endif /* _BPF_SK_STORAGE_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4393bd4b2419..0ef594ac3899 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -337,6 +337,9 @@ enum bpf_attach_type {
 #define BPF_F_RDONLY_PROG	(1U << 7)
 #define BPF_F_WRONLY_PROG	(1U << 8)
 
+/* Clone map from listener for newly accepted socket */
+#define BPF_F_CLONE		(1U << 9)
+
 /* flags for BPF_PROG_QUERY */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 94c7f77ecb6b..da5639a5bd3b 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -12,6 +12,9 @@
 
 static atomic_t cache_idx;
 
+#define SK_STORAGE_CREATE_FLAG_MASK					\
+	(BPF_F_NO_PREALLOC | BPF_F_CLONE)
+
 struct bucket {
 	struct hlist_head list;
 	raw_spinlock_t lock;
@@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
 		kfree_rcu(sk_storage, rcu);
 }
 
-/* sk_storage->lock must be held and sk_storage->list cannot be empty */
 static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
 			    struct bpf_sk_storage_elem *selem)
 {
@@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
 	return 0;
 }
 
-/* Called by __sk_destruct() */
+/* Called by __sk_destruct() & bpf_sk_storage_clone() */
 void bpf_sk_storage_free(struct sock *sk)
 {
 	struct bpf_sk_storage_elem *selem;
@@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
 
 	smap = (struct bpf_sk_storage_map *)map;
 
+	/* Note that this map might be concurrently cloned from
+	 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
+	 * RCU read section to finish before proceeding. New RCU
+	 * read sections should be prevented via bpf_map_inc_not_zero.
+	 */
 	synchronize_rcu();
 
 	/* bpf prog and the userspace can no longer access this map
@@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
 
 static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
 {
-	if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries ||
+	if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
+	    !(attr->map_flags & BPF_F_NO_PREALLOC) ||
+	    attr->max_entries ||
 	    attr->key_size != sizeof(int) || !attr->value_size ||
 	    /* Enforce BTF for userspace sk dumping */
 	    !attr->btf_key_type_id || !attr->btf_value_type_id)
@@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
 	return err;
 }
 
+static struct bpf_sk_storage_elem *
+bpf_sk_storage_clone_elem(struct sock *newsk,
+			  struct bpf_sk_storage_map *smap,
+			  struct bpf_sk_storage_elem *selem)
+{
+	struct bpf_sk_storage_elem *copy_selem;
+
+	copy_selem = selem_alloc(smap, newsk, NULL, true);
+	if (!copy_selem)
+		return NULL;
+
+	if (map_value_has_spin_lock(&smap->map))
+		copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
+				      SDATA(selem)->data, true);
+	else
+		copy_map_value(&smap->map, SDATA(copy_selem)->data,
+			       SDATA(selem)->data);
+
+	return copy_selem;
+}
+
+int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
+{
+	struct bpf_sk_storage *new_sk_storage = NULL;
+	struct bpf_sk_storage *sk_storage;
+	struct bpf_sk_storage_elem *selem;
+	int ret = 0;
+
+	RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
+
+	rcu_read_lock();
+	sk_storage = rcu_dereference(sk->sk_bpf_storage);
+
+	if (!sk_storage || hlist_empty(&sk_storage->list))
+		goto out;
+
+	hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
+		struct bpf_sk_storage_elem *copy_selem;
+		struct bpf_sk_storage_map *smap;
+		struct bpf_map *map;
+
+		smap = rcu_dereference(SDATA(selem)->smap);
+		if (!(smap->map.map_flags & BPF_F_CLONE))
+			continue;
+
+		/* Note that for lockless listeners adding new element
+		 * here can race with cleanup in bpf_sk_storage_map_free.
+		 * Try to grab map refcnt to make sure that it's still
+		 * alive and prevent concurrent removal.
+		 */
+		map = bpf_map_inc_not_zero(&smap->map, false);
+		if (IS_ERR(map))
+			continue;
+
+		copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
+		if (!copy_selem) {
+			ret = -ENOMEM;
+			bpf_map_put(map);
+			goto out;
+		}
+
+		if (new_sk_storage) {
+			selem_link_map(smap, copy_selem);
+			__selem_link_sk(new_sk_storage, copy_selem);
+		} else {
+			ret = sk_storage_alloc(newsk, smap, copy_selem);
+			if (ret) {
+				kfree(copy_selem);
+				atomic_sub(smap->elem_size,
+					   &newsk->sk_omem_alloc);
+				bpf_map_put(map);
+				goto out;
+			}
+
+			new_sk_storage = rcu_dereference(copy_selem->sk_storage);
+		}
+		bpf_map_put(map);
+	}
+
+out:
+	rcu_read_unlock();
+
+	/* In case of an error, don't free anything explicitly here, the
+	 * caller is responsible to call bpf_sk_storage_free.
+	 */
+
+	return ret;
+}
+
 BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
 	   void *, value, u64, flags)
 {
diff --git a/net/core/sock.c b/net/core/sock.c
index d57b0cc995a0..f5e801a9cea4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 			goto out;
 		}
 		RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
-#ifdef CONFIG_BPF_SYSCALL
-		RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
-#endif
+
+		if (bpf_sk_storage_clone(sk, newsk)) {
+			sk_free_unlock_clone(newsk);
+			newsk = NULL;
+			goto out;
+		}
 
 		newsk->sk_err	   = 0;
 		newsk->sk_err_soft = 0;
-- 
2.23.0.rc1.153.gdeed80330f-goog


^ permalink raw reply related

* [PATCH bpf-next v4 3/4] bpf: sync bpf.h to tools/
From: Stanislav Fomichev @ 2019-08-14 17:37 UTC (permalink / raw)
  To: netdev, bpf
  Cc: davem, ast, daniel, Stanislav Fomichev, Martin KaFai Lau,
	Yonghong Song
In-Reply-To: <20190814173751.31806-1-sdf@google.com>

Sync new sk storage clone flag.

Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 tools/include/uapi/linux/bpf.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4393bd4b2419..0ef594ac3899 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -337,6 +337,9 @@ enum bpf_attach_type {
 #define BPF_F_RDONLY_PROG	(1U << 7)
 #define BPF_F_WRONLY_PROG	(1U << 8)
 
+/* Clone map from listener for newly accepted socket */
+#define BPF_F_CLONE		(1U << 9)
+
 /* flags for BPF_PROG_QUERY */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
-- 
2.23.0.rc1.153.gdeed80330f-goog


^ permalink raw reply related

* [PATCH bpf-next v4 4/4] selftests/bpf: add sockopt clone/inheritance test
From: Stanislav Fomichev @ 2019-08-14 17:37 UTC (permalink / raw)
  To: netdev, bpf
  Cc: davem, ast, daniel, Stanislav Fomichev, Martin KaFai Lau,
	Yonghong Song
In-Reply-To: <20190814173751.31806-1-sdf@google.com>

Add a test that calls setsockopt on the listener socket which triggers
BPF program. This BPF program writes to the sk storage and sets
clone flag. Make sure that sk storage is cloned for a newly
accepted connection.

We have two cloned maps in the tests to make sure we hit both cases
in bpf_sk_storage_clone: first element (sk_storage_alloc) and
non-first element(s) (selem_link_map).

Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 tools/testing/selftests/bpf/.gitignore        |   1 +
 tools/testing/selftests/bpf/Makefile          |   3 +-
 .../selftests/bpf/progs/sockopt_inherit.c     |  97 +++++++
 .../selftests/bpf/test_sockopt_inherit.c      | 253 ++++++++++++++++++
 4 files changed, 353 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/sockopt_inherit.c
 create mode 100644 tools/testing/selftests/bpf/test_sockopt_inherit.c

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 90f70d2c7c22..60c9338cd9b4 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -42,4 +42,5 @@ xdping
 test_sockopt
 test_sockopt_sk
 test_sockopt_multi
+test_sockopt_inherit
 test_tcp_rtt
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 29001f944db7..1faad0c3c3c9 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -29,7 +29,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_cgroup_storage test_select_reuseport test_section_names \
 	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
 	test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk \
-	test_sockopt_multi test_tcp_rtt
+	test_sockopt_multi test_sockopt_inherit test_tcp_rtt
 
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -111,6 +111,7 @@ $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
 $(OUTPUT)/test_sockopt: cgroup_helpers.c
 $(OUTPUT)/test_sockopt_sk: cgroup_helpers.c
 $(OUTPUT)/test_sockopt_multi: cgroup_helpers.c
+$(OUTPUT)/test_sockopt_inherit: cgroup_helpers.c
 $(OUTPUT)/test_tcp_rtt: cgroup_helpers.c
 
 .PHONY: force
diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
new file mode 100644
index 000000000000..dede0fcd6102
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
+
+#define SOL_CUSTOM			0xdeadbeef
+#define CUSTOM_INHERIT1			0
+#define CUSTOM_INHERIT2			1
+#define CUSTOM_LISTENER			2
+
+struct sockopt_inherit {
+	__u8 val;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+	__type(key, int);
+	__type(value, struct sockopt_inherit);
+} cloned1_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+	__type(key, int);
+	__type(value, struct sockopt_inherit);
+} cloned2_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct sockopt_inherit);
+} listener_only_map SEC(".maps");
+
+static __inline struct sockopt_inherit *get_storage(struct bpf_sockopt *ctx)
+{
+	if (ctx->optname == CUSTOM_INHERIT1)
+		return bpf_sk_storage_get(&cloned1_map, ctx->sk, 0,
+					  BPF_SK_STORAGE_GET_F_CREATE);
+	else if (ctx->optname == CUSTOM_INHERIT2)
+		return bpf_sk_storage_get(&cloned2_map, ctx->sk, 0,
+					  BPF_SK_STORAGE_GET_F_CREATE);
+	else
+		return bpf_sk_storage_get(&listener_only_map, ctx->sk, 0,
+					  BPF_SK_STORAGE_GET_F_CREATE);
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt(struct bpf_sockopt *ctx)
+{
+	__u8 *optval_end = ctx->optval_end;
+	struct sockopt_inherit *storage;
+	__u8 *optval = ctx->optval;
+
+	if (ctx->level != SOL_CUSTOM)
+		return 1; /* only interested in SOL_CUSTOM */
+
+	if (optval + 1 > optval_end)
+		return 0; /* EPERM, bounds check */
+
+	storage = get_storage(ctx);
+	if (!storage)
+		return 0; /* EPERM, couldn't get sk storage */
+
+	ctx->retval = 0; /* Reset system call return value to zero */
+
+	optval[0] = storage->val;
+	ctx->optlen = 1;
+
+	return 1;
+}
+
+SEC("cgroup/setsockopt")
+int _setsockopt(struct bpf_sockopt *ctx)
+{
+	__u8 *optval_end = ctx->optval_end;
+	struct sockopt_inherit *storage;
+	__u8 *optval = ctx->optval;
+
+	if (ctx->level != SOL_CUSTOM)
+		return 1; /* only interested in SOL_CUSTOM */
+
+	if (optval + 1 > optval_end)
+		return 0; /* EPERM, bounds check */
+
+	storage = get_storage(ctx);
+	if (!storage)
+		return 0; /* EPERM, couldn't get sk storage */
+
+	storage->val = optval[0];
+	ctx->optlen = -1;
+
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/test_sockopt_inherit.c b/tools/testing/selftests/bpf/test_sockopt_inherit.c
new file mode 100644
index 000000000000..1bf699815b9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockopt_inherit.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <pthread.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH				"/sockopt_inherit"
+#define SOL_CUSTOM			0xdeadbeef
+#define CUSTOM_INHERIT1			0
+#define CUSTOM_INHERIT2			1
+#define CUSTOM_LISTENER			2
+
+static int connect_to_server(int server_fd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int fd;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (fd < 0) {
+		log_err("Failed to create client socket");
+		return -1;
+	}
+
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
+		goto out;
+	}
+
+	if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
+		log_err("Fail to connect to server");
+		goto out;
+	}
+
+	return fd;
+
+out:
+	close(fd);
+	return -1;
+}
+
+static int verify_sockopt(int fd, int optname, const char *msg, char expected)
+{
+	socklen_t optlen = 1;
+	char buf = 0;
+	int err;
+
+	err = getsockopt(fd, SOL_CUSTOM, optname, &buf, &optlen);
+	if (err) {
+		log_err("%s: failed to call getsockopt", msg);
+		return 1;
+	}
+
+	printf("%s %d: got=0x%x ? expected=0x%x\n", msg, optname, buf, expected);
+
+	if (buf != expected) {
+		log_err("%s: unexpected getsockopt value %d != %d", msg,
+			buf, expected);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void *server_thread(void *arg)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int fd = *(int *)arg;
+	int client_fd;
+	int err = 0;
+
+	if (listen(fd, 1) < 0)
+		error(1, errno, "Failed to listed on socket");
+
+	err += verify_sockopt(fd, CUSTOM_INHERIT1, "listen", 1);
+	err += verify_sockopt(fd, CUSTOM_INHERIT2, "listen", 1);
+	err += verify_sockopt(fd, CUSTOM_LISTENER, "listen", 1);
+
+	client_fd = accept(fd, (struct sockaddr *)&addr, &len);
+	if (client_fd < 0)
+		error(1, errno, "Failed to accept client");
+
+	err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "accept", 1);
+	err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "accept", 1);
+	err += verify_sockopt(client_fd, CUSTOM_LISTENER, "accept", 0);
+
+	close(client_fd);
+
+	return (void *)(long)err;
+}
+
+static int start_server(void)
+{
+	struct sockaddr_in addr = {
+		.sin_family = AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+	};
+	char buf;
+	int err;
+	int fd;
+	int i;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (fd < 0) {
+		log_err("Failed to create server socket");
+		return -1;
+	}
+
+	for (i = CUSTOM_INHERIT1; i <= CUSTOM_LISTENER; i++) {
+		buf = 0x01;
+		err = setsockopt(fd, SOL_CUSTOM, i, &buf, 1);
+		if (err) {
+			log_err("Failed to call setsockopt(%d)", i);
+			close(fd);
+			return -1;
+		}
+	}
+
+	if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
+		log_err("Failed to bind socket");
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
+static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+{
+	enum bpf_attach_type attach_type;
+	enum bpf_prog_type prog_type;
+	struct bpf_program *prog;
+	int err;
+
+	err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
+	if (err) {
+		log_err("Failed to deduct types for %s BPF program", title);
+		return -1;
+	}
+
+	prog = bpf_object__find_program_by_title(obj, title);
+	if (!prog) {
+		log_err("Failed to find %s BPF program", title);
+		return -1;
+	}
+
+	err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
+			      attach_type, 0);
+	if (err) {
+		log_err("Failed to attach %s BPF program", title);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int run_test(int cgroup_fd)
+{
+	struct bpf_prog_load_attr attr = {
+		.file = "./sockopt_inherit.o",
+	};
+	int server_fd = -1, client_fd;
+	struct bpf_object *obj;
+	void *server_err;
+	pthread_t tid;
+	int ignored;
+	int err;
+
+	err = bpf_prog_load_xattr(&attr, &obj, &ignored);
+	if (err) {
+		log_err("Failed to load BPF object");
+		return -1;
+	}
+
+	err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
+	if (err)
+		goto close_bpf_object;
+
+	err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
+	if (err)
+		goto close_bpf_object;
+
+	server_fd = start_server();
+	if (server_fd < 0) {
+		err = -1;
+		goto close_bpf_object;
+	}
+
+	pthread_create(&tid, NULL, server_thread, (void *)&server_fd);
+
+	client_fd = connect_to_server(server_fd);
+	if (client_fd < 0) {
+		err = -1;
+		goto close_server_fd;
+	}
+
+	err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0);
+	err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0);
+	err += verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0);
+
+	pthread_join(tid, &server_err);
+
+	err += (int)(long)server_err;
+
+	close(client_fd);
+
+close_server_fd:
+	close(server_fd);
+close_bpf_object:
+	bpf_object__close(obj);
+	return err;
+}
+
+int main(int args, char **argv)
+{
+	int cgroup_fd;
+	int err = EXIT_SUCCESS;
+
+	if (setup_cgroup_environment())
+		return err;
+
+	cgroup_fd = create_and_get_cgroup(CG_PATH);
+	if (cgroup_fd < 0)
+		goto cleanup_cgroup_env;
+
+	if (join_cgroup(CG_PATH))
+		goto cleanup_cgroup;
+
+	if (run_test(cgroup_fd))
+		err = EXIT_FAILURE;
+
+	printf("test_sockopt_inherit: %s\n",
+	       err == EXIT_SUCCESS ? "PASSED" : "FAILED");
+
+cleanup_cgroup:
+	close(cgroup_fd);
+cleanup_cgroup_env:
+	cleanup_cgroup_environment();
+	return err;
+}
-- 
2.23.0.rc1.153.gdeed80330f-goog


^ permalink raw reply related

* [PATCH] net: usbnet: fix a memory leak bug
From: Wenwen Wang @ 2019-08-14 17:41 UTC (permalink / raw)
  To: Wenwen Wang
  Cc: Oliver Neukum, David S. Miller,
	open list:USB "USBNET" DRIVER FRAMEWORK,
	open list:USB NETWORKING DRIVERS, open list

In usbnet_start_xmit(), 'urb->sg' is allocated through kmalloc_array() by
invoking build_dma_sg(). Later on, if 'CONFIG_PM' is defined and the if
branch is taken, the execution will go to the label 'deferred'. However,
'urb->sg' is not deallocated on this execution path, leading to a memory
leak bug.

Signed-off-by: Wenwen Wang <wenwen@cs.uga.edu>
---
 drivers/net/usb/usbnet.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 72514c4..f17fafa 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1433,6 +1433,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 		usb_anchor_urb(urb, &dev->deferred);
 		/* no use to process more packets */
 		netif_stop_queue(net);
+		kfree(urb->sg);
 		usb_put_urb(urb);
 		spin_unlock_irqrestore(&dev->txq.lock, flags);
 		netdev_dbg(dev->net, "Delaying transmission for resumption\n");
-- 
2.7.4


^ permalink raw reply related

* Re: [PATCH net-next 1/5] RDS: Re-add pf/sol access via sysctl
From: Gerd Rausch @ 2019-08-14 17:41 UTC (permalink / raw)
  To: Doug Ledford, Santosh Shilimkar, netdev, linux-rdma, rds-devel
  Cc: David Miller
In-Reply-To: <53b40b359d18dd73a6cf264aa8013d33547b593f.camel@redhat.com>

Hi Doug,

On 14/08/2019 08.56, Doug Ledford wrote:
> Good Lord...RDS was taken into the kernel in Feb of 2009, so over 10
> years ago.  The patch to put PF_RDS/AF_RDS/SOL_RDS was taken into
> include/linux/socket.h Feb 26, 2009.  The RDS ports were allocated by
> IANA on Feb 27 and May 20, 2009.  And you *still* have software that
> needs this?

I'll let Santosh elaborate on this, but it looks like we (i.e. Oracle) do:

From our Gerrit, posted on Aug 08, 2019, 10:39:29 AM UTC-07:00:
--------%<--------%<--------%<--------%<--------%<--------%<--------
Santosh Shilimkar Acked-by +1
Patch Set 1: Acked-by+1
Unfortunately we need to keep these around.
--------%<--------%<--------%<--------%<--------%<--------%<--------

> As of today, does your current build of Oracle software still require this,
> or have you at least fixed it up in your modern builds?
> 

I'll let Santosh answer that question as well.

Thanks,

  Gerd

^ permalink raw reply

* Re: [PATCH v4 01/14] net: phy: adin: add support for Analog Devices PHYs
From: Florian Fainelli @ 2019-08-14 17:47 UTC (permalink / raw)
  To: Alexandru Ardelean, netdev, devicetree, linux-kernel
  Cc: davem, robh+dt, mark.rutland, hkallweit1, andrew
In-Reply-To: <20190812112350.15242-2-alexandru.ardelean@analog.com>



On 8/12/2019 4:23 AM, Alexandru Ardelean wrote:
> This change adds support for Analog Devices Industrial Ethernet PHYs.
> Particularly the PHYs this driver adds support for:
>  * ADIN1200 - Robust, Industrial, Low Power 10/100 Ethernet PHY
>  * ADIN1300 - Robust, Industrial, Low Latency 10/100/1000 Gigabit
>    Ethernet PHY
> 
> The 2 chips are register compatible with one another. The main difference
> being that ADIN1200 doesn't operate in gigabit mode.
> 
> The chips can be operated by the Generic PHY driver as well via the
> standard IEEE PHY registers (0x0000 - 0x000F) which are supported by the
> kernel as well. This assumes that configuration of the PHY has been done
> completely in HW, according to spec.
> 
> Configuration can also be done via registers, which will be supported by
> this driver.
> 
> Datasheets:
>   https://www.analog.com/media/en/technical-documentation/data-sheets/ADIN1300.pdf
>   https://www.analog.com/media/en/technical-documentation/data-sheets/ADIN1200.pdf
> 
> Reviewed-by: Andrew Lunn <andrew@lunn.ch>
> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
-- 
Florian

^ permalink raw reply

* Re: [PATCH v4 02/14] net: phy: adin: hook genphy_{suspend,resume} into the driver
From: Florian Fainelli @ 2019-08-14 17:47 UTC (permalink / raw)
  To: Alexandru Ardelean, netdev, devicetree, linux-kernel
  Cc: davem, robh+dt, mark.rutland, hkallweit1, andrew
In-Reply-To: <20190812112350.15242-3-alexandru.ardelean@analog.com>



On 8/12/2019 4:23 AM, Alexandru Ardelean wrote:
> The chip supports standard suspend/resume via BMCR reg.
> Hook these functions into the `adin` driver.
> 
> Reviewed-by: Andrew Lunn <andrew@lunn.ch>
> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
-- 
Florian

^ permalink raw reply

* Re: [PATCH v4 03/14] net: phy: adin: add support for interrupts
From: Florian Fainelli @ 2019-08-14 17:48 UTC (permalink / raw)
  To: Alexandru Ardelean, netdev, devicetree, linux-kernel
  Cc: davem, robh+dt, mark.rutland, hkallweit1, andrew
In-Reply-To: <20190812112350.15242-4-alexandru.ardelean@analog.com>



On 8/12/2019 4:23 AM, Alexandru Ardelean wrote:
> This change hooks link-status-change interrupts to phylib.
> 
> Reviewed-by: Andrew Lunn <andrew@lunn.ch>
> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
-- 
Florian

^ permalink raw reply

* Re: [PATCH v4 04/14] net: phy: adin: add {write,read}_mmd hooks
From: Florian Fainelli @ 2019-08-14 17:49 UTC (permalink / raw)
  To: Alexandru Ardelean, netdev, devicetree, linux-kernel
  Cc: davem, robh+dt, mark.rutland, hkallweit1, andrew
In-Reply-To: <20190812112350.15242-5-alexandru.ardelean@analog.com>



On 8/12/2019 4:23 AM, Alexandru Ardelean wrote:
> Both ADIN1200 & ADIN1300 support Clause 45 access for some registers.
> The Extended Management Interface (EMI) registers are accessible via both
> Clause 45 (at register MDIO_MMD_VEND1) and using Clause 22.
> 
> The Clause 22 access for MMD regs differs from the standard one defined by
> 802.3. The ADIN PHYs  use registers ExtRegPtr (0x0010) and ExtRegData
> (0x0011) to access Clause 45 & EMI registers.
> 
> The indirect access is done via the following mechanism (for both R/W):
> 1. Write the address of the register in the ExtRegPtr
> 2. Read/write the value of the register via reg ExtRegData
> 
> This mechanism is needed to manage configuration of chip settings and to
> access EEE registers via Clause 22.
> 
> Since Clause 45 access will likely never be used, it is not implemented via
> this hook.
> 
> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
-- 
Florian

^ permalink raw reply

* Re: [PATCH v4 05/14] net: phy: adin: configure RGMII/RMII/MII modes on config
From: Florian Fainelli @ 2019-08-14 17:50 UTC (permalink / raw)
  To: Alexandru Ardelean, netdev, devicetree, linux-kernel
  Cc: davem, robh+dt, mark.rutland, hkallweit1, andrew
In-Reply-To: <20190812112350.15242-6-alexandru.ardelean@analog.com>



On 8/12/2019 4:23 AM, Alexandru Ardelean wrote:
> The ADIN1300 chip supports RGMII, RMII & MII modes. Default (if
> unconfigured) is RGMII.
> This change adds support for configuring these modes via the device
> registers.
> 
> For RGMII with internal delays (modes RGMII_ID,RGMII_TXID, RGMII_RXID),
> the default delay is 2 ns. This can be configurable and will be done in
> a subsequent change.
> 
> Reviewed-by: Andrew Lunn <andrew@lunn.ch>
> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
-- 
Florian

^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Andy Lutomirski @ 2019-08-14 17:51 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Colascione
  Cc: Andy Lutomirski, Song Liu, Kees Cook, Networking, bpf,
	Alexei Starovoitov, Daniel Borkmann, Kernel Team, Lorenz Bauer,
	Jann Horn, Greg KH, Linux API, LSM List
In-Reply-To: <20190814005737.4qg6wh4a53vmso2v@ast-mbp>

On Tue, Aug 13, 2019 at 5:57 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:

> hmm. No. Kernel developers should not make any assumptions.
> They should guide their design by real use cases instead. That includes studing
> what people do now and hacks they use to workaround lack of interfaces.
> Effecitvely bpf is root only. There are no unpriv users.
> This root applications go out of their way to reduce privileges
> while they still want to use bpf. That is the need that /dev/bpf is solving.
>
> >
> > > Containers are not providing the level of security that is enough
> > > to run arbitrary code. VMs can do it better, but cpu bugs don't make it easy.
> > > Containers are used to make production systems safer.
> > > Some people call it more 'secure', but it's clearly not secure for
> > > arbitrary code and that is what kernel.unprivileged_bpf_disabled allows.
> > > When we say 'unprivileged bpf' we really mean arbitrary malicious bpf program.
> > > It's been a constant source of pain. The constant blinding, randomization,
> > > verifier speculative analysis, all spectre v1, v2, v4 mitigations
> > > are simply not worth it. It's a lot of complex kernel code without users.
> >
> > Seccomp really will want eBPF some day, and it should work without
> > privilege.  Maybe it should be a restricted subset of eBPF, and
> > Spectre will always be an issue until dramatically better hardware
> > shows up, but I think people will want the ability for regular
> > programs to load eBPF seccomp programs.
>
> I'm absolutely against using eBPF in seccomp.
> Precisely due to discussions like the current one.

I still think I don't really agree with your overall premise.

If eBPF is genuinely not usable by programs that are not fully trusted
by the admin, then no kernel changes at all are needed.  Programs that
want to reduce their own privileges can easily fork() a privileged
subprocess or run a little helper to which they delegate BPF
operations.  This is far more flexible than anything that will ever be
in the kernel because it allows the helper to verify that the rest of
the program is doing exactly what it's supposed to and restrict eBPF
operations to exactly the subset that is needed.  So a container
manager or network manager that drops some provilege could have a
little bpf-helper that manages its BPF XDP, firewalling, etc
configuration.  The two processes would talk over a socketpair.

The interesting cases you're talking about really *do* involved
unprivileged or less privileged eBPF, though.  Let's see:

systemd --user: systemd --user *is not privileged at all*.  There's no
issue of reducing privilege, since systemd --user doesn't have any
privilege to begin with.  But systemd supports some eBPF features, and
presumably it would like to support them in the systemd --user case.
This is unprivileged eBPF.

Seccomp.  Seccomp already uses cBPF, which is a form of BPF although
it doesn't involve the bpf() syscall.  There are some seccomp
proposals in the works that will want some stuff from eBPF.  In
particular, the ability to call seccomp-specific bpf functions from a
seccomp program could be very nice. Similarly, the ability to use the
enhanced instruction set and maybe even *read* maps would be nice.  I
do think that seccomp will continue to want its programs to be
stateless.

So it's a bit of a chicken-and-egg situation.  There aren't major
unprivileged eBPF users because the kernel support isn't there.

>
> >
> > > Hence I prefer this /dev/bpf mechanism to be as simple a possible.
> > > The applications that will use it are going to be just as trusted as systemd.
> >
> > I still don't understand your systemd example.  systemd --users is not
> > trusted systemwide in any respect.  The main PID 1 systemd is root.
> > No matter how you dice it, granting a user systemd instance extra bpf
> > access is tantamount to granting the user extra bpf access in general.
>
> People use systemd --user while their kernel have 'undef CONFIG_USER_NS'.

I don't know what you're getting at.  I'm typing this email in a
browser running under a systemd --user instance, and there are no user
namespaces involved.

$ ps -u luto |grep systemd
 1944 ?        00:00:02 systemd
$ stat /proc/1944
...
Access: (0555/dr-xr-xr-x)  Uid: ( 1000/    luto)   Gid: ( 1000/    luto)
Context: unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
$ gdb -p 1944
[snipped tons of output, but gdb works fine like this]

systemd --user is not privileged.  Giving it /dev/bpf as imagined by
the current set of patches would be a gaping security hole.

>
> I think there should be no unprivileged bpf at all,
> because over all these years we've seen zero use cases.
> Hence all new features are root only.

You're the maintainer.  If you feel this way, then I think you should
just drop the /dev/bpf idea entirely and have userspace manage all of
this by itself.  It will remain extremely awkward for containers and
especially nested containers to use eBPF.

--Andy

^ permalink raw reply

* Re: [PATCH v4 06/14] net: phy: adin: make RGMII internal delays configurable
From: Florian Fainelli @ 2019-08-14 17:52 UTC (permalink / raw)
  To: Alexandru Ardelean, netdev, devicetree, linux-kernel
  Cc: davem, robh+dt, mark.rutland, hkallweit1, andrew
In-Reply-To: <20190812112350.15242-7-alexandru.ardelean@analog.com>



On 8/12/2019 4:23 AM, Alexandru Ardelean wrote:
> The internal delays for the RGMII are configurable for both RX & TX. This
> change adds support for configuring them via device-tree (or ACPI).
> 
> Reviewed-by: Andrew Lunn <andrew@lunn.ch>
> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
-- 
Florian

^ permalink raw reply

* Re: [PATCH v4 07/14] net: phy: adin: make RMII fifo depth configurable
From: Florian Fainelli @ 2019-08-14 17:53 UTC (permalink / raw)
  To: Alexandru Ardelean, netdev, devicetree, linux-kernel
  Cc: davem, robh+dt, mark.rutland, hkallweit1, andrew
In-Reply-To: <20190812112350.15242-8-alexandru.ardelean@analog.com>



On 8/12/2019 4:23 AM, Alexandru Ardelean wrote:
> The FIFO depth can be configured for the RMII mode. This change adds
> support for doing this via device-tree (or ACPI).
> 
> Reviewed-by: Andrew Lunn <andrew@lunn.ch>
> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
-- 
Florian

^ permalink raw reply

* Re: [PATCH v4 08/14] net: phy: adin: add support MDI/MDIX/Auto-MDI selection
From: Florian Fainelli @ 2019-08-14 17:54 UTC (permalink / raw)
  To: Alexandru Ardelean, netdev, devicetree, linux-kernel
  Cc: davem, robh+dt, mark.rutland, hkallweit1, andrew
In-Reply-To: <20190812112350.15242-9-alexandru.ardelean@analog.com>



On 8/12/2019 4:23 AM, Alexandru Ardelean wrote:
> The ADIN PHYs support automatic MDI/MDIX negotiation. By default this is
> disabled, so this is enabled at `config_init`.
> 
> This is controlled via the PHY Control 1 register.
> The supported modes are:
>   1. Manual MDI
>   2. Manual MDIX
>   3. Auto MDIX - prefer MDIX
>   4. Auto MDIX - prefer MDI
> 
> The phydev mdix & mdix_ctrl fields include modes 3 & 4 into a single
> auto-mode. So, the default mode this driver enables is 4 when Auto-MDI mode
> is used.
> 
> When detecting MDI/MDIX mode, a combination of the PHY Control 1 register
> and PHY Status 1 register is used to determine the correct MDI/MDIX mode.
> 
> If Auto-MDI mode is not set, then the manual MDI/MDIX mode is returned.
> If Auto-MDI mode is set, then MDIX mode is returned differs from the
> preferred MDI/MDIX mode.
> This covers all cases where:
>   1. MDI preferred  & Pair01Swapped   == MDIX
>   2. MDIX preferred & Pair01Swapped   == MDI
>   3. MDI preferred  & ! Pair01Swapped == MDIX
>   4. MDIX preferred & ! Pair01Swapped == MDI
> 
> The preferred MDI/MDIX mode is not configured via SW, but can be configured
> via HW pins. Note that the `Pair01Swapped` is the Green-Yellow physical
> pairs.
> 
> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
-- 
Florian

^ permalink raw reply

* Re: [PATCH v4 09/14] net: phy: adin: add EEE translation layer from Clause 45 to Clause 22
From: Florian Fainelli @ 2019-08-14 17:55 UTC (permalink / raw)
  To: Alexandru Ardelean, netdev, devicetree, linux-kernel
  Cc: davem, robh+dt, mark.rutland, hkallweit1, andrew
In-Reply-To: <20190812112350.15242-10-alexandru.ardelean@analog.com>



On 8/12/2019 4:23 AM, Alexandru Ardelean wrote:
> The ADIN1200 & ADIN1300 PHYs support EEE by using standard Clause 45 access
> to access MMD registers for EEE.
> 
> The EEE register addresses (when using Clause 22) are available at
> different addresses (than Clause 45), and since accessing these regs (via
> Clause 22) needs a special mechanism, a translation table is required to
> convert these addresses.
> 
> For Clause 45, this is not needed since the driver will likely never use
> this access mode.
> 
> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
-- 
Florian

^ permalink raw reply

* Re: [PATCH v4 11/14] net: phy: adin: implement Energy Detect Powerdown mode
From: Florian Fainelli @ 2019-08-14 17:57 UTC (permalink / raw)
  To: Alexandru Ardelean, netdev, devicetree, linux-kernel
  Cc: davem, robh+dt, mark.rutland, hkallweit1, andrew
In-Reply-To: <20190812112350.15242-12-alexandru.ardelean@analog.com>



On 8/12/2019 4:23 AM, Alexandru Ardelean wrote:
> The ADIN PHYs support Energy Detect Powerdown mode, which puts the PHY into
> a low power mode when there is no signal on the wire (typically cable
> unplugged).
> This behavior is enabled by default, but can be disabled via device
> property.

We could consider adding a PHY tunable, having this as a Device Tree
property amounts to putting a policy inside DT, which is frowned upon.

> 
> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>

Other than that, the code looks fine:

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
-- 
Florian

^ permalink raw reply

* Re: [PATCH bpf-next v2] net: Don't call XDP_SETUP_PROG when nothing is changed
From: Jonathan Lemon @ 2019-08-14 17:57 UTC (permalink / raw)
  To: Maxim Mikityanskiy
  Cc: Alexei Starovoitov, Daniel Borkmann, Jakub Kicinski, bpf, netdev,
	David S. Miller, Björn Töpel, Saeed Mahameed,
	Jesper Dangaard Brouer, John Fastabend, Martin KaFai Lau,
	Song Liu, Yonghong Song
In-Reply-To: <20190814143352.3759-1-maximmi@mellanox.com>



On 14 Aug 2019, at 7:34, Maxim Mikityanskiy wrote:

> Don't uninstall an XDP program when none is installed, and don't install
> an XDP program that has the same ID as the one already installed.
>
> dev_change_xdp_fd doesn't perform any checks in case it uninstalls an
> XDP program. It means that the driver's ndo_bpf can be called with
> XDP_SETUP_PROG asking to set it to NULL even if it's already NULL. This
> case happens if the user runs `ip link set eth0 xdp off` when there is
> no XDP program attached.
>
> The symmetrical case is possible when the user tries to set the program
> that is already set.
>
> The drivers typically perform some heavy operations on XDP_SETUP_PROG,
> so they all have to handle these cases internally to return early if
> they happen. This patch puts this check into the kernel code, so that
> all drivers will benefit from it.
>
> Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>

Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>

^ permalink raw reply

* Re: [PATCH v4 12/14] net: phy: adin: implement downshift configuration via phy-tunable
From: Florian Fainelli @ 2019-08-14 17:58 UTC (permalink / raw)
  To: Alexandru Ardelean, netdev, devicetree, linux-kernel
  Cc: davem, robh+dt, mark.rutland, hkallweit1, andrew
In-Reply-To: <20190812112350.15242-13-alexandru.ardelean@analog.com>



On 8/12/2019 4:23 AM, Alexandru Ardelean wrote:
> Down-speed auto-negotiation may not always be enabled, in which case the
> PHY won't down-shift to 100 or 10 during auto-negotiation.
> 
> This change enables downshift and configures the number of retries to
> default 4 (which is also in the datasheet
> 
> The downshift control mechanism can also be controlled via the phy-tunable
> interface (ETHTOOL_PHY_DOWNSHIFT control).
> 
> The change has been adapted from the Aquantia PHY driver.
> 
> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
-- 
Florian

^ permalink raw reply

* Re: [PATCH net-next 1/5] RDS: Re-add pf/sol access via sysctl
From: santosh.shilimkar @ 2019-08-14 18:01 UTC (permalink / raw)
  To: Doug Ledford, Gerd Rausch, netdev, linux-rdma, rds-devel; +Cc: David Miller
In-Reply-To: <53b40b359d18dd73a6cf264aa8013d33547b593f.camel@redhat.com>



On 8/14/19 8:56 AM, Doug Ledford wrote:
> On Tue, 2019-08-13 at 11:20 -0700, Gerd Rausch wrote:
>> From: Andy Grover <andy.grover@oracle.com>
>> Date: Tue, 24 Nov 2009 15:35:51 -0800
>>
>> Although RDS has an official PF_RDS value now, existing software
>> expects to look for rds sysctls to determine it. We need to maintain
>> these for now, for backwards compatibility.
>>
>> Signed-off-by: Andy Grover <andy.grover@oracle.com>
>> Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
>> ---
>>   net/rds/sysctl.c | 21 +++++++++++++++++++++
>>   1 file changed, 21 insertions(+)
>>
>> diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
>> index e381bbcd9cc1..9760292a0af4 100644
>> --- a/net/rds/sysctl.c
>> +++ b/net/rds/sysctl.c
>> @@ -49,6 +49,13 @@ unsigned int  rds_sysctl_max_unacked_bytes = (16 <<
>> 20);
>>   
>>   unsigned int rds_sysctl_ping_enable = 1;
>>   
>> +/*
>> + * We have official values, but must maintain the sysctl interface
>> for existing
>> + * software that expects to find these values here.
>> + */
>> +static int rds_sysctl_pf_rds = PF_RDS;
>> +static int rds_sysctl_sol_rds = SOL_RDS;
>> +
>>   static struct ctl_table rds_sysctl_rds_table[] = {
>>   	{
>>   		.procname       = "reconnect_min_delay_ms",
>> @@ -68,6 +75,20 @@ static struct ctl_table rds_sysctl_rds_table[] = {
>>   		.extra1		= &rds_sysctl_reconnect_min_jiffies,
>>   		.extra2		= &rds_sysctl_reconnect_max,
>>   	},
>> +	{
>> +		.procname       = "pf_rds",
>> +		.data		= &rds_sysctl_pf_rds,
>> +		.maxlen         = sizeof(int),
>> +		.mode           = 0444,
>> +		.proc_handler   = &proc_dointvec,
>> +	},
>> +	{
>> +		.procname       = "sol_rds",
>> +		.data		= &rds_sysctl_sol_rds,
>> +		.maxlen         = sizeof(int),
>> +		.mode           = 0444,
>> +		.proc_handler   = &proc_dointvec,
>> +	},
>>   	{
>>   		.procname	= "max_unacked_packets",
>>   		.data		= &rds_sysctl_max_unacked_packets,
> 
> Good Lord...RDS was taken into the kernel in Feb of 2009, so over 10
> years ago.  The patch to put PF_RDS/AF_RDS/SOL_RDS was taken into
> include/linux/socket.h Feb 26, 2009.  The RDS ports were allocated by
> IANA on Feb 27 and May 20, 2009.  And you *still* have software that
> needs this?  The only software that has ever used RDS was Oracle
> software.  I would have expected you guys to update your source code to
> do the right thing long before now.  In fact, I would expect you were
> ready to retire all of the legacy software that needs this by now.  As
> of today, does your current build of Oracle software still require this,
> or have you at least fixed it up in your modern builds?
> 
Some of the application software was released before 2009 and ended up
using these proc entries from downstream kernel. The newer lib/app
using RDS don't use these. Unfortunately lot of customer still use
Oracle 9, 10, 11 which were released before 2007 and run these apps
on modern kernels.

Regards,
Snatosh

^ permalink raw reply

* [PATCH] cx82310_eth: fix a memory leak bug
From: Wenwen Wang @ 2019-08-14 18:03 UTC (permalink / raw)
  To: Wenwen Wang
  Cc: David S. Miller, Thomas Gleixner, Steve Winslow, Jilayne Lovejoy,
	Kate Stewart, open list:USB NETWORKING DRIVERS,
	open list:NETWORKING DRIVERS, open list

In cx82310_bind(), 'dev->partial_data' is allocated through kmalloc().
Then, the execution waits for the firmware to become ready. If the firmware
is not ready in time, the execution is terminated. However, the allocated
'dev->partial_data' is not deallocated on this path, leading to a memory
leak bug. To fix this issue, free 'dev->partial_data' before returning the
error.

Signed-off-by: Wenwen Wang <wenwen@cs.uga.edu>
---
 drivers/net/usb/cx82310_eth.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c
index 5519248..32b08b1 100644
--- a/drivers/net/usb/cx82310_eth.c
+++ b/drivers/net/usb/cx82310_eth.c
@@ -163,7 +163,8 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf)
 	}
 	if (!timeout) {
 		dev_err(&udev->dev, "firmware not ready in time\n");
-		return -ETIMEDOUT;
+		ret = -ETIMEDOUT;
+		goto err;
 	}
 
 	/* enable ethernet mode (?) */
-- 
2.7.4


^ permalink raw reply related

* Re: [PATCH v4 9/9] Input: add IOC3 serio driver
From: Dmitry Torokhov @ 2019-08-14 18:04 UTC (permalink / raw)
  To: Jonas Gorski
  Cc: Thomas Bogendoerfer, Ralf Baechle, Paul Burton, James Hogan,
	Lee Jones, David S. Miller, Srinivas Kandagatla, Alessandro Zummo,
	Alexandre Belloni, Greg Kroah-Hartman, Jiri Slaby,
	Evgeniy Polyakov, linux-mips, linux-kernel, linux-input,
	Network Development, linux-rtc, linux-serial
In-Reply-To: <CAOiHx=mjLpLg9r=mE25T7RQFNRT8wEPkRcy2ZkfT7H=Y5RT-vw@mail.gmail.com>

On Wed, Aug 14, 2019 at 06:57:55PM +0200, Jonas Gorski wrote:
> On Wed, 14 Aug 2019 at 16:37, Thomas Bogendoerfer <tbogendoerfer@suse.de> wrote:
> >
> > On Wed, 14 Aug 2019 15:20:14 +0200
> > Jonas Gorski <jonas.gorski@gmail.com> wrote:
> >
> > > > +       d = devm_kzalloc(&pdev->dev, sizeof(*d), GFP_KERNEL);
> > >
> > > &pdev->dev => dev
> >
> > will change.
> >
> > >
> > > > +       if (!d)
> > > > +               return -ENOMEM;
> > > > +
> > > > +       sk = kzalloc(sizeof(*sk), GFP_KERNEL);
> > >
> > > any reason not to devm_kzalloc this as well? Then you won't need to
> > > manually free it in the error cases.
> >
> > it has different life time than the device, so it may not allocated
> > via devm_kzalloc
> >
> > > > +static int ioc3kbd_remove(struct platform_device *pdev)
> > > > +{
> > > > +       struct ioc3kbd_data *d = platform_get_drvdata(pdev);
> > > > +
> > > > +       devm_free_irq(&pdev->dev, d->irq, d);
> > > > +       serio_unregister_port(d->kbd);
> > > > +       serio_unregister_port(d->aux);
> > > > +       return 0;
> > > > +}
> > >
> > > and on that topic, won't you need to kfree d->kbd and d->aux here?
> >
> > that's done in serio_release_port() by the serio core.
> 
> i see. But in that case, don't the kfree's after the
> serio_unregister_port's in the error path of the .probe function cause
> a double free?

Yes they do, we need to drop kfree()s from there. Nicely spotted.

Thanks.

-- 
Dmitry

^ permalink raw reply

* Re: [PATCH net-next 1/5] RDS: Re-add pf/sol access via sysctl
From: David Miller @ 2019-08-14 18:21 UTC (permalink / raw)
  To: santosh.shilimkar; +Cc: dledford, gerd.rausch, netdev, linux-rdma, rds-devel
In-Reply-To: <a7d09f3a-d01e-7cdb-98ec-8165b6312ffe@oracle.com>

From: santosh.shilimkar@oracle.com
Date: Wed, 14 Aug 2019 11:01:36 -0700

> Some of the application software was released before 2009 and ended up
> using these proc entries from downstream kernel. The newer lib/app
> using RDS don't use these. Unfortunately lot of customer still use
> Oracle 9, 10, 11 which were released before 2007 and run these apps
> on modern kernels.

So those apps are using proc entries that were never upstream...

Sorry, this is completely and utterly inappropriate.

^ permalink raw reply

* Re: [PATCH] netfilter: nft_bitwise: Adjust parentheses to fix memcmp size argument
From: Nick Desaulniers @ 2019-08-14 18:25 UTC (permalink / raw)
  To: Nathan Chancellor
  Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	David S. Miller, netfilter-devel, coreteam, netdev, LKML,
	clang-built-linux, kbuild test robot
In-Reply-To: <20190814165809.46421-1-natechancellor@gmail.com>

On Wed, Aug 14, 2019 at 9:58 AM Nathan Chancellor
<natechancellor@gmail.com> wrote:
>
> clang warns:
>
> net/netfilter/nft_bitwise.c:138:50: error: size argument in 'memcmp'
> call is a comparison [-Werror,-Wmemsize-comparison]
>         if (memcmp(&priv->xor, &zero, sizeof(priv->xor) ||
>                                       ~~~~~~~~~~~~~~~~~~^~
> net/netfilter/nft_bitwise.c:138:6: note: did you mean to compare the
> result of 'memcmp' instead?
>         if (memcmp(&priv->xor, &zero, sizeof(priv->xor) ||
>             ^
>                                                        )
> net/netfilter/nft_bitwise.c:138:32: note: explicitly cast the argument
> to size_t to silence this warning
>         if (memcmp(&priv->xor, &zero, sizeof(priv->xor) ||
>                                       ^
>                                       (size_t)(
> 1 error generated.
>
> Adjust the parentheses so that the result of the sizeof is used for the
> size argument in memcmp, rather than the result of the comparison (which
> would always be true because sizeof is a non-zero number).
>
> Fixes: bd8699e9e292 ("netfilter: nft_bitwise: add offload support")
> Link: https://github.com/ClangBuiltLinux/linux/issues/638
> Reported-by: kbuild test robot <lkp@intel.com>
> Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>

oh no! thanks for the patch.
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>

> ---
>  net/netfilter/nft_bitwise.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
> index 1f04ed5c518c..974300178fa9 100644
> --- a/net/netfilter/nft_bitwise.c
> +++ b/net/netfilter/nft_bitwise.c
> @@ -135,8 +135,8 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx,
>  {
>         const struct nft_bitwise *priv = nft_expr_priv(expr);
>
> -       if (memcmp(&priv->xor, &zero, sizeof(priv->xor) ||
> -           priv->sreg != priv->dreg))
> +       if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) ||
> +           priv->sreg != priv->dreg)
>                 return -EOPNOTSUPP;
>
>         memcpy(&ctx->regs[priv->dreg].mask, &priv->mask, sizeof(priv->mask));
> --
> 2.23.0.rc2
>

-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply

* Re: [PATCH net] ipv6: Fix return value of ipv6_mc_may_pull() for malformed packets
From: Linus Lüssing @ 2019-08-14 18:26 UTC (permalink / raw)
  To: David Miller; +Cc: bridge, sbrivio, gnault, haliu, edumazet, netdev
In-Reply-To: <20190814.125858.37782529545578263.davem@davemloft.net>

On Wed, Aug 14, 2019 at 12:58:58PM -0400, David Miller wrote:
> From: Stefano Brivio <sbrivio@redhat.com>
> Date: Tue, 13 Aug 2019 00:46:01 +0200
> 
> > Commit ba5ea614622d ("bridge: simplify ip_mc_check_igmp() and
> > ipv6_mc_check_mld() calls") replaces direct calls to pskb_may_pull()
> > in br_ipv6_multicast_mld2_report() with calls to ipv6_mc_may_pull(),
> > that returns -EINVAL on buffers too short to be valid IPv6 packets,
> > while maintaining the previous handling of the return code.
> > 
> > This leads to the direct opposite of the intended effect: if the
> > packet is malformed, -EINVAL evaluates as true, and we'll happily
> > proceed with the processing.
> > 
> > Return 0 if the packet is too short, in the same way as this was
> > fixed for IPv4 by commit 083b78a9ed64 ("ip: fix ip_mc_may_pull()
> > return value").
> > 
> > I don't have a reproducer for this, unlike the one referred to by
> > the IPv4 commit, but this is clearly broken.
> > 
> > Fixes: ba5ea614622d ("bridge: simplify ip_mc_check_igmp() and ipv6_mc_check_mld() calls")
> > Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
> 
> Applied and queued up for -stable.

Urgh, sorry... and thanks for the fix(es), absolutely right...

^ permalink raw reply

* Re: [PATCH] MAINTAINERS: r8169: Update path to the driver
From: Heiner Kallweit @ 2019-08-14 18:33 UTC (permalink / raw)
  To: Denis Efremov; +Cc: joe, linux-kernel, nic_swsd, David S . Miller, netdev
In-Reply-To: <20190814121209.3364-1-efremov@linux.com>

On 14.08.2019 14:12, Denis Efremov wrote:
> Update MAINTAINERS record to reflect the filename change.
> The file was moved in commit 25e992a4603c ("r8169: rename
> r8169.c to r8169_main.c")
> 
> Cc: Heiner Kallweit <hkallweit1@gmail.com>
> Cc: nic_swsd@realtek.com
> Cc: David S. Miller <davem@davemloft.net>
> Cc: netdev@vger.kernel.org
> Signed-off-by: Denis Efremov <efremov@linux.com>
> ---
>  MAINTAINERS | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index a43a1f0be49f..905efeda56fb 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -183,7 +183,7 @@ M:	Realtek linux nic maintainers <nic_swsd@realtek.com>
>  M:	Heiner Kallweit <hkallweit1@gmail.com>
>  L:	netdev@vger.kernel.org
>  S:	Maintained
> -F:	drivers/net/ethernet/realtek/r8169.c
> +F:	drivers/net/ethernet/realtek/r8169*
>  
>  8250/16?50 (AND CLONE UARTS) SERIAL DRIVER
>  M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> 
For net-next.
Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox